diff --git a/examples/mcp_servers/echo/pyproject.toml b/examples/mcp_servers/echo/pyproject.toml index dc1bc362..19ba8e50 100644 --- a/examples/mcp_servers/echo/pyproject.toml +++ b/examples/mcp_servers/echo/pyproject.toml @@ -4,12 +4,12 @@ version = "0.1.0" description = "MCP Server created with Arcade.dev" requires-python = ">=3.10" dependencies = [ - "arcade-mcp-server>=1.5.0,<2.0.0", + "arcade-mcp-server>=1.16.0,<2.0.0", ] [project.optional-dependencies] dev = [ - "arcade-mcp[all]>=1.4.0,<2.0.0", + "arcade-mcp[all]>=1.10.0,<2.0.0", ] [build-system] diff --git a/examples/mcp_servers/echo/src/echo/server.py b/examples/mcp_servers/echo/src/echo/server.py index 778d9101..29613822 100644 --- a/examples/mcp_servers/echo/src/echo/server.py +++ b/examples/mcp_servers/echo/src/echo/server.py @@ -1,11 +1,26 @@ from typing import Annotated from arcade_mcp_server import MCPApp +from arcade_mcp_server.metadata import ( + Behavior, + Operation, + ToolMetadata, +) app = MCPApp("EchoServer") -@app.tool +@app.tool( + metadata=ToolMetadata( + behavior=Behavior( + operations=[Operation.READ], + read_only=True, + destructive=False, + idempotent=True, + open_world=False, + ), + ), +) def echo(message: Annotated[str, "The message to echo"]) -> str: """Echo a message back to the caller.""" return message diff --git a/examples/mcp_servers/tool_metadata/README.md b/examples/mcp_servers/tool_metadata/README.md new file mode 100644 index 00000000..97701b78 --- /dev/null +++ b/examples/mcp_servers/tool_metadata/README.md @@ -0,0 +1,84 @@ +# Tool Metadata Example + +This example demonstrates how to use **tool metadata** to describe your tools' classification, behavior, and custom properties. + +## What is Tool Metadata? + +Tool metadata provides structured information about what a tool does: + +| Field | Purpose | Used For | +|-------|---------|----------| +| **Classification** | What type of service the tool interfaces with | Tool discovery & selection boosting | +| **Behavior** | What effects the tool has | Policy decisions, MCP annotations | +| **Extras** | Arbitrary key/values | Custom logic (routing, rate limits, etc.) | + +## Classification + +Describes *what type of service* the tool interfaces with. + +```python +classification=Classification( + service_domains=[ServiceDomain.EMAIL], # What type of service? +) +``` + +**Service Domains** (what type of service): `EMAIL`, `CRM`, `MESSAGING`, `DOCUMENTS`, `CLOUD_STORAGE`, `SOURCE_CODE`, `PAYMENTS`, `SOCIAL_MEDIA`, etc. + +For tools with no external service (`open_world=False`), classification is `None`. + +## Behavior + +Describes the tool's *effects* and maps to MCP annotations. + +```python +behavior=Behavior( + operations=[Operation.CREATE], # What effect? READ, CREATE, UPDATE, DELETE, OPAQUE + read_only=False, # Does it only read data? + destructive=False, # Can it cause irreversible data loss? + idempotent=True, # Are repeated calls safe? + open_world=False, # Does it interact with external systems? +) +``` + +These values become MCP `annotations` that clients like Claude can use to make informed decisions. + +## Extras + +Arbitrary key/values for custom logic that *don't* affect tool selection. + +```python +extras={ + "billing_tier": "free", + "max_requests_per_minute": 100, + "data_classification": "internal", +} +``` + +Use extras for: IDP routing, feature flags, rate limiting hints, compliance metadata. + +## Running the Example + +```bash +cd examples/mcp_servers/tool_metadata + +# Install dependencies +uv sync + +# Run with stdio transport +uv run src/tool_metadata/server.py stdio + +# Or run with HTTP transport +uv run src/tool_metadata/server.py http +``` + +## Tools in This Example + +| Tool | Operations | Behavior | Notes | +|------|------------|----------|-------| +| `reverse_text` | READ | read_only, idempotent | Pure computation | +| `search_notes` | READ | read_only, idempotent | Query data | +| `create_note` | CREATE | not idempotent | Creates new data | +| `update_note` | UPDATE | idempotent | Modifies existing data | +| `delete_note` | DELETE | destructive, idempotent | Removes data permanently | +| `get_notes_stats` | READ | read_only | Has `extras` for custom metadata | +| `upsert_note` | CREATE, UPDATE | idempotent | Multi-operation compound action | diff --git a/examples/mcp_servers/tool_metadata/pyproject.toml b/examples/mcp_servers/tool_metadata/pyproject.toml new file mode 100644 index 00000000..5b664ab5 --- /dev/null +++ b/examples/mcp_servers/tool_metadata/pyproject.toml @@ -0,0 +1,44 @@ +[project] +name = "tool_metadata" +version = "0.1.0" +description = "Example MCP Server demonstrating tool metadata (classification, behavior, extras)" +requires-python = ">=3.10" +dependencies = [ + "arcade-mcp-server>=1.17.0,<2.0.0", +] + +[project.optional-dependencies] +dev = [ + "arcade-mcp[all]>=1.10.0,<2.0.0", + "pytest>=7.0.0", + "pytest-asyncio>=0.21.0", + "mypy>=1.0.0", + "ruff>=0.1.0", +] + +# Tell Arcade.dev that this package has Arcade tools +[project.entry-points.arcade_toolkits] +toolkit_name = "tool_metadata" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/tool_metadata"] + +[tool.ruff] +line-length = 100 +target-version = "py312" + +[tool.mypy] +python_version = "3.12" +warn_unused_configs = true +disallow_untyped_defs = false + +# # Uncomment the following if you are developing inside of the arcade-mcp repo & want to use editable mode +# # Otherwise, you will install the following packages from PyPI +# [tool.uv.sources] +# arcade-mcp = { path = "../../../", editable = true } +# arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } +# arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } diff --git a/examples/mcp_servers/tool_metadata/src/tool_metadata/.env.example b/examples/mcp_servers/tool_metadata/src/tool_metadata/.env.example new file mode 100644 index 00000000..fe5a7446 --- /dev/null +++ b/examples/mcp_servers/tool_metadata/src/tool_metadata/.env.example @@ -0,0 +1 @@ +MY_SECRET_KEY="Your tools can have secrets injected at runtime!" diff --git a/examples/mcp_servers/tool_metadata/src/tool_metadata/__init__.py b/examples/mcp_servers/tool_metadata/src/tool_metadata/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/examples/mcp_servers/tool_metadata/src/tool_metadata/server.py b/examples/mcp_servers/tool_metadata/src/tool_metadata/server.py new file mode 100644 index 00000000..93a744d3 --- /dev/null +++ b/examples/mcp_servers/tool_metadata/src/tool_metadata/server.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 +""" +Tool Metadata Example MCP Server + +This example demonstrates how to use tool metadata to describe your tools' +classification, behavior, and custom properties. Tool metadata helps with: + +- Tool discovery and selection (classification) +- Policy decisions and MCP annotations (behavior) +- Custom logic like routing or feature flags (extras) +""" + +import sys +from typing import Annotated + +from arcade_mcp_server import MCPApp +from arcade_mcp_server.metadata import ( + Behavior, + Operation, + ToolMetadata, +) + +app = MCPApp(name="ToolMetadataDemo", version="1.0.0", log_level="DEBUG") + +# In-memory storage for demo purposes +_notes: dict[str, str] = {} + + +# ============================================================================= +# Example 1: Pure computation tool (read-only, no external service) +# ============================================================================= +@app.tool( + metadata=ToolMetadata( + behavior=Behavior( + operations=[Operation.READ], + read_only=True, + destructive=False, + idempotent=True, + open_world=False, # No external systems + ), + ), +) +def reverse_text(text: Annotated[str, "The text to reverse"]) -> str: + """Reverse the characters in a string. A pure computation with no side effects.""" + return text[::-1] + + +# ============================================================================= +# Example 2: Read-only search tool +# ============================================================================= +@app.tool( + metadata=ToolMetadata( + behavior=Behavior( + operations=[Operation.READ], + read_only=True, + destructive=False, + idempotent=True, + open_world=False, + ), + ), +) +def search_notes( + query: Annotated[str, "Search term to find in note titles and content"], +) -> list[dict[str, str]]: + """Search through stored notes by title or content.""" + query_lower = query.lower() + results = [] + for title, content in _notes.items(): + if query_lower in title.lower() or query_lower in content.lower(): + results.append({"title": title, "content": content}) + return results + + +# ============================================================================= +# Example 3: Create tool (mutating, not destructive) +# ============================================================================= +@app.tool( + metadata=ToolMetadata( + behavior=Behavior( + operations=[Operation.CREATE], + read_only=False, + destructive=False, # Creating is not destructive + idempotent=False, # Creating twice may have different effects + open_world=False, + ), + ), +) +def create_note( + title: Annotated[str, "The title of the note"], + content: Annotated[str, "The content of the note"], +) -> dict[str, str]: + """Create a new note. Fails if a note with the same title already exists.""" + if title in _notes: + return {"error": f"Note '{title}' already exists. Use update_note instead."} + _notes[title] = content + return {"status": "created", "title": title} + + +# ============================================================================= +# Example 4: Update tool (mutating, idempotent) +# ============================================================================= +@app.tool( + metadata=ToolMetadata( + behavior=Behavior( + operations=[Operation.UPDATE], + read_only=False, + destructive=False, + idempotent=True, # Updating with same content is idempotent + open_world=False, + ), + ), +) +def update_note( + title: Annotated[str, "The title of the note to update"], + content: Annotated[str, "The new content for the note"], +) -> dict[str, str]: + """Update an existing note's content.""" + if title not in _notes: + return {"error": f"Note '{title}' not found. Use create_note first."} + _notes[title] = content + return {"status": "updated", "title": title} + + +# ============================================================================= +# Example 5: Delete tool (destructive!) +# ============================================================================= +@app.tool( + metadata=ToolMetadata( + behavior=Behavior( + operations=[Operation.DELETE], + read_only=False, + destructive=True, # Deletion is destructive - data is lost + idempotent=True, # Deleting twice has same effect as once + open_world=False, + ), + ), +) +def delete_note( + title: Annotated[str, "The title of the note to delete"], +) -> dict[str, str]: + """Permanently delete a note. This action cannot be undone.""" + if title not in _notes: + return {"error": f"Note '{title}' not found."} + del _notes[title] + return {"status": "deleted", "title": title} + + +# ============================================================================= +# Example 6: Tool with extras for custom logic +# ============================================================================= +@app.tool( + metadata=ToolMetadata( + behavior=Behavior( + operations=[Operation.READ], + read_only=True, + destructive=False, + idempotent=True, + open_world=False, + ), + # Extras: arbitrary key/values for custom logic + # These don't affect tool selection, but can be used for: + # - Routing decisions (e.g., which IDP to use) + # - Feature flags + # - Rate limiting + # - Governance/compliance metadata + extras={ + "billing_tier": "free", # Feature flag for billing + "max_requests_per_minute": 100, # Rate limiting hint + "data_classification": "internal", # Compliance metadata + "cache_ttl_seconds": 60, # Caching hint + }, + ), +) +def get_notes_stats() -> dict[str, int]: + """Get statistics about stored notes. Demonstrates the 'extras' field.""" + total_notes = len(_notes) + total_chars = sum(len(content) for content in _notes.values()) + return { + "total_notes": total_notes, + "total_characters": total_chars, + "average_length": total_chars // total_notes if total_notes > 0 else 0, + } + + +# ============================================================================= +# Example 7: Multi-operation tool (upsert = CREATE + UPDATE) +# ============================================================================= +@app.tool( + metadata=ToolMetadata( + behavior=Behavior( + operations=[ + Operation.CREATE, + Operation.UPDATE, + ], # Multiple operations for compound actions + read_only=False, + destructive=False, + idempotent=True, # Upsert is idempotent + open_world=False, + ), + ), +) +def upsert_note( + title: Annotated[str, "The title of the note"], + content: Annotated[str, "The content of the note"], +) -> dict[str, str]: + """Create or update a note. If the note exists, it will be updated.""" + action = "updated" if title in _notes else "created" + _notes[title] = content + return {"status": action, "title": title} + + +# Run with specific transport +if __name__ == "__main__": + transport = sys.argv[1] if len(sys.argv) > 1 else "stdio" + app.run(transport=transport, host="127.0.0.1", port=8000) diff --git a/libs/arcade-cli/arcade_cli/new.py b/libs/arcade-cli/arcade_cli/new.py index fba90f87..9da414ec 100644 --- a/libs/arcade-cli/arcade_cli/new.py +++ b/libs/arcade-cli/arcade_cli/new.py @@ -19,14 +19,14 @@ try: ARCADE_MCP_MAX_VERSION = str(int(ARCADE_MCP_MIN_VERSION.split(".")[0]) + 1) + ".0.0" except Exception as e: console.print(f"[red]Failed to get arcade-mcp version: {e}[/red]") - ARCADE_MCP_MIN_VERSION = "1.5.8" # Default version if unable to fetch + ARCADE_MCP_MIN_VERSION = "1.10.0" # Default version if unable to fetch ARCADE_MCP_MAX_VERSION = "2.0.0" -ARCADE_TDK_MIN_VERSION = "3.2.2" +ARCADE_TDK_MIN_VERSION = "3.6.0" ARCADE_TDK_MAX_VERSION = "4.0.0" ARCADE_SERVE_MIN_VERSION = "3.1.5" ARCADE_SERVE_MAX_VERSION = "4.0.0" -ARCADE_MCP_SERVER_MIN_VERSION = "1.11.1" +ARCADE_MCP_SERVER_MIN_VERSION = "1.17.0" ARCADE_MCP_SERVER_MAX_VERSION = "2.0.0" diff --git a/libs/arcade-cli/arcade_cli/templates/minimal/{{ toolkit_name }}/src/{{ toolkit_name }}/server.py b/libs/arcade-cli/arcade_cli/templates/minimal/{{ toolkit_name }}/src/{{ toolkit_name }}/server.py index 0417dfaa..23b64fe4 100644 --- a/libs/arcade-cli/arcade_cli/templates/minimal/{{ toolkit_name }}/src/{{ toolkit_name }}/server.py +++ b/libs/arcade-cli/arcade_cli/templates/minimal/{{ toolkit_name }}/src/{{ toolkit_name }}/server.py @@ -7,6 +7,13 @@ from typing import Annotated import httpx from arcade_mcp_server import Context, MCPApp from arcade_mcp_server.auth import Reddit +from arcade_mcp_server.metadata import ( + Behavior, + Classification, + Operation, + ServiceDomain, + ToolMetadata, +) app = MCPApp(name="{{ toolkit_name }}", version="1.0.0", log_level="DEBUG") @@ -33,7 +40,21 @@ def whisper_secret(context: Context) -> Annotated[str, "The last 4 characters of # To use this tool locally, you need to install the Arcade CLI (uv tool install arcade-mcp) # and then run 'arcade login' to authenticate. -@app.tool(requires_auth=Reddit(scopes=["read"])) +@app.tool( + requires_auth=Reddit(scopes=["read"]), + metadata=ToolMetadata( + classification=Classification( + service_domains=[ServiceDomain.SOCIAL_MEDIA], + ), + behavior=Behavior( + operations=[Operation.READ], + read_only=True, + destructive=False, + idempotent=True, + open_world=True, + ), + ), +) async def get_posts_in_subreddit( context: Context, subreddit: Annotated[str, "The name of the subreddit"] ) -> dict: diff --git a/libs/arcade-core/arcade_core/catalog.py b/libs/arcade-core/arcade_core/catalog.py index 82f9893f..6275a949 100644 --- a/libs/arcade-core/arcade_core/catalog.py +++ b/libs/arcade-core/arcade_core/catalog.py @@ -33,6 +33,7 @@ from arcade_core.errors import ( ToolkitLoadError, ToolOutputSchemaError, ) +from arcade_core.metadata import ToolMetadata from arcade_core.schema import ( TOOL_NAME_SEPARATOR, FullyQualifiedName, @@ -464,6 +465,15 @@ class ToolCatalog(BaseModel): tool_name = snake_to_pascal_case(raw_tool_name) fully_qualified_name = FullyQualifiedName.from_toolkit(tool_name, toolkit_definition) deprecation_message = getattr(tool, "__tool_deprecation_message__", None) + tool_metadata = getattr(tool, "__tool_metadata__", None) + + if tool_metadata is not None: + if not isinstance(tool_metadata, ToolMetadata): + raise ToolDefinitionError( + f"Expected a ToolMetadata instance for 'metadata', " + f"but got {type(tool_metadata).__name__}. " + ) + tool_metadata.validate_for_tool() return ToolDefinition( name=tool_name, @@ -478,6 +488,7 @@ class ToolCatalog(BaseModel): metadata=metadata_requirement, ), deprecation_message=deprecation_message, + metadata=tool_metadata, ) diff --git a/libs/arcade-core/arcade_core/metadata.py b/libs/arcade-core/arcade_core/metadata.py new file mode 100644 index 00000000..0390c2a6 --- /dev/null +++ b/libs/arcade-core/arcade_core/metadata.py @@ -0,0 +1,337 @@ +""" +Tool Metadata + +Defines the metadata model for Arcade tools. This module provides three layers: + +- Classification: What type of service the tool interfaces with (ServiceDomain). + Used for tool discovery and search boosting. + +- Behavior: What effects the tool has (operations, MCP-aligned flags). + MCP Annotations are computed from this. + Commonly used for policy decisions (HITL gates, retry logic, etc.) + +- Extras: Arbitrary key/values for custom logic (IDP routing, feature flags, etc.) +""" + +from enum import Enum +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from arcade_core.errors import ToolDefinitionError + + +class ServiceDomain(str, Enum): + """ + The type of service a tool interfaces with. + + Classifies the target service whose data or functionality the tool provides + access to -- not the infrastructure used to access it. + + Assignment is based on how the service self-identifies and is broadly + recognized in its market. For tools that interact with no external service + (open_world=False), ServiceDomain is None.. + """ + + PROJECT_MANAGEMENT = "project_management" + """Project tracking, issue management, and work item software.""" + + CRM = "crm" + """Customer relationship management - contacts, deals, pipelines, sales activities.""" + + EMAIL = "email" + """Email services for sending, receiving, and managing messages.""" + + CALENDAR = "calendar" + """Calendar and scheduling services.""" + + MESSAGING = "messaging" + """Real-time team and business messaging platforms.""" + + DOCUMENTS = "documents" + """Document editing, wikis, and knowledge base platforms.""" + + CLOUD_STORAGE = "cloud_storage" + """Cloud file storage and sharing services.""" + + SPREADSHEETS = "spreadsheets" + """Spreadsheet and tabular data software.""" + + PRESENTATIONS = "presentations" + """Presentation and slideshow software.""" + + DESIGN = "design" + """UI/UX design and prototyping tools.""" + + SOURCE_CODE = "source_code" + """Source code management, version control, and code review.""" + + PAYMENTS = "payments" + """Payment processing, invoicing, and billing.""" + + SOCIAL_MEDIA = "social_media" + """Platforms where users publish content to a public audience through a social feed.""" + + VIDEO_HOSTING = "video_hosting" + """Video hosting, streaming, and distribution platforms.""" + + MUSIC_STREAMING = "music_streaming" + """Music streaming and playback platforms.""" + + CUSTOMER_SUPPORT = "customer_support" + """Help desk, ticketing, and customer service software.""" + + ECOMMERCE = "ecommerce" + """Online shopping, product catalogs, and retail platforms.""" + + INCIDENT_MANAGEMENT = "incident_management" + """Incident response, on-call management, and operational alerting.""" + + WEB_SCRAPING = "web_scraping" + """Web data extraction and crawling services.""" + + CODE_SANDBOX = "code_sandbox" + """Cloud code execution and sandboxed runtime environments.""" + + VIDEO_CONFERENCING = "video_conferencing" + """Video meeting and conferencing platforms.""" + + GEOSPATIAL = "geospatial" + """Maps, navigation, directions, and geocoding services.""" + + FINANCIAL_DATA = "financial_data" + """Financial market data and stock information services.""" + + TRAVEL = "travel" + """Travel search, flight and hotel booking platforms.""" + + +class Operation(str, Enum): + """ + Classifies the tool's effect on resources in the target system. + + The concrete values represent the four fundamental resource lifecycle + operations (read, create, update, delete). OPAQUE indicates the effect + cannot be determined from the tool's definition because it depends + on runtime inputs such as "ExecuteBashCommand(command="...")". + + Can be used for policy decisions (e.g., "require human approval for DELETE tools"). + """ + + READ = "read" + """ + Observes resources without changing state in the target system. + + When to use: Any operation that only returns information -- fetching records, + searching, listing resources, watching/subscribing to events, validating data, + dry-run previews. Tools with only READ should have read_only=True. + """ + + CREATE = "create" + """ + Brings a new resource or record into existence. + + When to use: Inserting new records, uploading files, provisioning resources, + scheduling jobs, posting messages, sending emails, instantiating new entities. + The resource did not exist before the operation. + """ + + UPDATE = "update" + """ + Modifies an existing resource's state, permissions, metadata, or content. + + When to use: Editing records, changing configuration, renaming, archiving/restoring, + patching, associating/disassociating resources (linking), changing lifecycle state + (start/stop/pause), sharing resources, modifying access permissions. + The resource identity persists after the operation. + """ + + DELETE = "delete" + """ + Removes a resource or record from the system. + + When to use: Permanent deletion, soft-delete where resource becomes inaccessible, + canceling queued jobs, unsubscribing, removing files. Use when the resource is + no longer retrievable through normal operations. Tools with DELETE should have + destructive=True. + """ + + OPAQUE = "opaque" + """ + Effect cannot be determined from the tool's definition because behavior + depends entirely on runtime inputs. + + When to use: Tools like Bash.ExecuteCommand(command="...") or E2b.RunCode(code="...") + where the actual operation is unknowable at definition time. OPAQUE signals to + policy engines that this tool's effects are indeterminate and should be treated + with caution. + """ + + +# Operation categories for validation +_READ_ONLY_OPERATIONS = {Operation.READ} +_MUTATING_OPERATIONS = {Operation.CREATE, Operation.UPDATE, Operation.DELETE} +_INDETERMINATE_OPERATIONS = {Operation.OPAQUE} + + +class Classification(BaseModel): + """ + What type of service does this tool interface with? + + Used for tool discovery and search boosting. + + Examples: + Classification(service_domains=[ServiceDomain.EMAIL]) + Classification(service_domains=[ServiceDomain.CLOUD_STORAGE, ServiceDomain.DOCUMENTS]) + """ + + service_domains: list[ServiceDomain] | None = None + """The service category/categories the tool's backing service belongs to. Multi-select.""" + + model_config = ConfigDict(extra="forbid") + + +class Behavior(BaseModel): + """ + What effects does the tool have? Arcade's data model for tool behavior. + + When using MCP, Behavior is projected to MCP annotations: + - read_only -> readOnlyHint + - destructive -> destructiveHint + - idempotent -> idempotentHint + - open_world -> openWorldHint + + Operations classify the tool's effect on resources and can be used for + policy decisions (e.g., "require human approval for DELETE tools"). + + Example: + Behavior( + operations=[Operation.DELETE], + read_only=False, + destructive=True, # DELETE should be destructive + idempotent=True, # Deleting twice has same effect + open_world=True, # Interacts with external system + ) + """ + + operations: list[Operation] | None = None + """The tool's effect on resources in the target system. Multi-select for compound operations.""" + + read_only: bool | None = None + """Tool only reads data, no mutations. Maps to MCP readOnlyHint.""" + + destructive: bool | None = None + """Tool can cause irreversible data loss. Maps to MCP destructiveHint.""" + + idempotent: bool | None = None + """Repeated calls with same input have no additional effect. Maps to MCP idempotentHint.""" + + open_world: bool | None = None + """Tool interacts with external systems (not purely in-process). Maps to MCP openWorldHint.""" + + model_config = ConfigDict(extra="forbid") + + +class ToolMetadata(BaseModel): + """ + Container for metadata about a tool. + + - classification: What type of service does this tool interface with? (for discovery/boosting) + - behavior: What effects does it have? (for policy, filtering, MCP annotations) + - extras: Arbitrary key/values for custom logic (e.g., IDP routing, feature flags) + + Strict Mode Validation: + By default (strict=True), the constructor validates for logical contradictions: + - Mutating operations + read_only=True -> Error + - OPAQUE operation + read_only=True -> Error + - DELETE operation + destructive=False -> Error + - ServiceDomain present + open_world=False -> Error + + Set strict=False to bypass validation for valid edge cases (e.g., a "read" + tool that increments a view count as a side effect). + + Example: + ToolMetadata( + classification=Classification( + service_domains=[ServiceDomain.EMAIL], + ), + behavior=Behavior( + operations=[Operation.CREATE], + read_only=False, + destructive=False, + idempotent=False, + open_world=True, + ), + extras={"idp": "entraID", "requires_mfa": True}, + ) + """ + + classification: Classification | None = None + """What type of service the tool interfaces with.""" + + behavior: Behavior | None = None + """What effects the tool has.""" + + extras: dict[str, Any] | None = None + """Arbitrary key/values for custom logic.""" + + strict: bool = Field(default=True, exclude=True) + """Enable validation for logical contradictions. Set False for edge cases. + Excluded from serialization - this is a validation-time config flag, not tool metadata.""" + + model_config = ConfigDict(extra="forbid") + + def validate_for_tool(self) -> None: + """ + Validate consistency between behavior and classification. + + Called by the catalog when creating a tool definition. + + Raises: + ToolDefinitionError: If strict=True and validation fails + """ + if not self.strict: + return + + behavior = self.behavior + classification = self.classification + + if behavior: + operations = set(behavior.operations or []) + + # Rule 1: Mutating operations + read_only=True is contradictory + mutating_ops = operations & _MUTATING_OPERATIONS + if mutating_ops and behavior.read_only is True: + raise ToolDefinitionError( + f"Tool has the mutating operation(s): " + f"'{', '.join([op.value.upper() for op in mutating_ops])}' " + f"in its behavior metadata, but is marked read_only=True. " + "Fix the contradiction, or set strict=False to bypass." + ) + + # Rule 2: OPAQUE + read_only=True is contradictory + if Operation.OPAQUE in operations and behavior.read_only is True: + raise ToolDefinitionError( + "Tool has OPAQUE operation but is marked read_only=True. " + "Cannot guarantee read-only when the operation is indeterminate. " + "Fix the contradiction, or set strict=False to bypass." + ) + + # Rule 3: DELETE should have destructive=True + if Operation.DELETE in operations and behavior.destructive is False: + raise ToolDefinitionError( + f"Tool has the '{Operation.DELETE.value.upper()}' operation " + "but is not marked destructive=True. " + "Fix the contradiction, or set strict=False to bypass." + ) + + if classification and behavior: + service_domains = classification.service_domains or [] + + # Rule 4: ServiceDomain present implies open_world=True + if len(service_domains) > 0 and behavior.open_world is False: + raise ToolDefinitionError( + "Tool has a ServiceDomain (implying an external service) " + "but is marked open_world=False. " + "Fix the contradiction, or set strict=False to bypass." + ) diff --git a/libs/arcade-core/arcade_core/schema.py b/libs/arcade-core/arcade_core/schema.py index 97c62658..62eacdc0 100644 --- a/libs/arcade-core/arcade_core/schema.py +++ b/libs/arcade-core/arcade_core/schema.py @@ -24,6 +24,7 @@ from typing import Any, Literal, Protocol from pydantic import BaseModel, Field from arcade_core.errors import ErrorKind +from arcade_core.metadata import ToolMetadata # allow for custom tool name separator TOOL_NAME_SEPARATOR = os.getenv("ARCADE_TOOL_NAME_SEPARATOR", ".") @@ -327,6 +328,9 @@ class ToolDefinition(BaseModel): deprecation_message: str | None = None """The message to display when the tool is deprecated.""" + metadata: ToolMetadata | None = None + """Metadata about the tool""" + def get_fully_qualified_name(self) -> FullyQualifiedName: return FullyQualifiedName(self.name, self.toolkit.name, self.toolkit.version) diff --git a/libs/arcade-core/pyproject.toml b/libs/arcade-core/pyproject.toml index acb34320..16f640ff 100644 --- a/libs/arcade-core/pyproject.toml +++ b/libs/arcade-core/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "arcade-core" -version = "4.3.0" +version = "4.4.0" description = "Arcade Core - Core library for Arcade platform" readme = "README.md" license = { text = "MIT" } diff --git a/libs/arcade-mcp-server/arcade_mcp_server/convert.py b/libs/arcade-mcp-server/arcade_mcp_server/convert.py index 8f4ae426..c0801aa0 100644 --- a/libs/arcade-mcp-server/arcade_mcp_server/convert.py +++ b/libs/arcade-mcp-server/arcade_mcp_server/convert.py @@ -1,8 +1,7 @@ import base64 import json import logging -from enum import Enum -from typing import Any, get_args, get_origin +from typing import Any from arcade_core.catalog import MaterializedTool from arcade_core.schema import ToolDefinition @@ -12,95 +11,83 @@ from arcade_mcp_server.types import MCPContent, MCPTool, TextContent, ToolAnnota logger = logging.getLogger("arcade.mcp") -def create_mcp_tool(tool: MaterializedTool) -> MCPTool | None: +def _build_arcade_meta(definition: ToolDefinition) -> dict[str, Any] | None: + """Build the _meta.arcade structure from a tool definition. + + The structure of _meta.arcade mirrors Arcade format when possible. """ - Create an MCP-compatible tool definition from an Arcade tool. + arcade_meta: dict[str, Any] = {} + + requirements = definition.requirements + if requirements.authorization or requirements.secrets or requirements.metadata: + arcade_meta["requirements"] = requirements.model_dump(exclude_none=True) + + tool_metadata = definition.metadata + if tool_metadata: + metadata_dump = tool_metadata.model_dump(mode="json", exclude_none=True) + if metadata_dump: + arcade_meta["metadata"] = metadata_dump + + return arcade_meta if arcade_meta else None + + +def create_mcp_tool(materialized_tool: MaterializedTool) -> MCPTool: + """ + Create an MCP-compatible tool definition from a MaterializedTool. + + Computes MCP annotations from tool metadata behavior fields and builds + the ``_meta.arcade`` structure with requirements and metadata. Args: - tool: An Arcade tool object + materialized_tool: A materialized Arcade tool Returns: - An MCP tool definition or None if the tool cannot be converted + An MCP tool definition """ - try: - # Get the tool name from the definition - tool_name = getattr(tool.definition, "name", "unknown") - fully_qualified_name = getattr(tool.definition, "fully_qualified_name", None) + definition = materialized_tool.definition + name = definition.fully_qualified_name.replace(".", "_") - # Use fully qualified name for MCP tool name (replacing dots with underscores) - name = fully_qualified_name.replace(".", "_") if fully_qualified_name else tool_name + # Build the tool's description + description = definition.description + deprecation_msg = getattr(definition, "deprecation_message", None) + if deprecation_msg: + description = f"[DEPRECATED: {deprecation_msg}] {description}" - description = getattr(tool.definition, "description", "No description available") + # Build the tool's output schema + output_schema = None + if hasattr(definition, "output") and definition.output: + output_def = definition.output + if getattr(output_def, "value_schema", None): + output_schema = _build_value_schema_json(output_def.value_schema) - # Check for deprecation - deprecation_msg = getattr(tool.definition, "deprecation_message", None) - if deprecation_msg: - description = f"[DEPRECATED: {deprecation_msg}] {description}" - - # Build input schema using authoritative ToolDefinition when available - try: - if getattr(tool.definition, "input", None): - input_schema = build_input_schema_from_definition(tool.definition) - else: - # Fallback to input_model if definition input is missing - input_schema = _build_input_schema_from_model(tool) - except Exception: - logger.exception("Error while constructing input schema; proceeding with empty schema") - input_schema = {"type": "object", "properties": {}, "additionalProperties": False} - - # Create output schema if available - output_schema = None - try: - if hasattr(tool.definition, "output") and tool.definition.output: - output_def = tool.definition.output - if getattr(output_def, "value_schema", None): - output_schema = _build_value_schema_json(output_def.value_schema) - except Exception: - logger.exception("Error while constructing output schema; omitting output schema") - - requirements = tool.definition.requirements - - # Build annotations using model for stricter typing + # Build MCP tool annotations from metadata behavior fields + title = getattr(materialized_tool.tool, "__tool_name__", definition.name) + tool_metadata = definition.metadata + if tool_metadata and tool_metadata.behavior: + behavior = tool_metadata.behavior annotations = ToolAnnotations( - readOnlyHint=not ( - requirements.authorization or requirements.secrets or requirements.metadata - ), - openWorldHint=requirements.authorization is not None, + title=title, + readOnlyHint=behavior.read_only, + destructiveHint=behavior.destructive, + idempotentHint=behavior.idempotent, + openWorldHint=behavior.open_world, ) + else: + annotations = ToolAnnotations(title=title) - # Build meta with requirements if any exist - meta = None - if requirements.authorization or requirements.secrets or requirements.metadata: - meta = {"arcade_requirements": requirements.model_dump(exclude_none=True)} + # Build _meta.arcade structure + arcade_meta = _build_arcade_meta(definition) + meta = {"arcade": arcade_meta} if arcade_meta else None - # Instantiate MCPTool model to ensure shape correctness - return MCPTool( - name=name, - title=tool.definition.toolkit.name + "_" + tool_name, - description=str(description), - inputSchema=input_schema, - outputSchema=output_schema if output_schema else None, - annotations=annotations, - _meta=meta, - ) - - except Exception: - logger.exception( - f"Error creating MCP tool definition for {getattr(tool, 'name', str(tool))}" - ) - try: - # Fallback minimal tool to avoid None in callers - fallback_name = getattr(tool.definition, "fully_qualified_name", "unknown").replace( - ".", "_" - ) - return MCPTool( - name=fallback_name, - title=fallback_name, - description="", - inputSchema={"type": "object", "properties": {}, "additionalProperties": False}, - ) - except Exception: - return None + return MCPTool( + name=name, + title=title, + description=str(description), + inputSchema=build_input_schema_from_definition(definition), + outputSchema=output_schema if output_schema else None, + annotations=annotations, + _meta=meta, + ) def convert_to_mcp_content(value: Any) -> list[MCPContent]: @@ -241,116 +228,6 @@ def build_input_schema_from_definition(definition: ToolDefinition) -> dict[str, return input_schema -def _build_input_schema_from_model(tool: MaterializedTool) -> dict[str, Any]: - """Build input schema from a tool's input_model as a fallback.""" - properties: dict[str, Any] = {} - required: list[str] = [] - - context_param_name = None - tool_input = getattr(tool.definition, "input", None) - if tool_input is not None: - context_param_name = getattr(tool_input, "tool_context_parameter_name", None) - - if ( - hasattr(tool, "input_model") - and tool.input_model is not None - and hasattr(tool.input_model, "model_fields") - ): - for field_name, field in tool.input_model.model_fields.items(): - if field_name == context_param_name: - continue - - field_type = getattr(field, "annotation", None) - field_type_name = "string" # default - - if field_type is int: - field_type_name = "integer" - elif field_type is float: - field_type_name = "number" - elif field_type is bool: - field_type_name = "boolean" - elif field_type is list or (getattr(field_type, "__origin__", None) is list): - field_type_name = "array" - elif field_type is dict or (getattr(field_type, "__origin__", None) is dict): - field_type_name = "object" - - field_description = getattr(field, "description", None) or f"Parameter: {field_name}" - - param_def: dict[str, Any] = { - "type": field_type_name, - "description": field_description, - } - - # Enum support: Enum classes or typing.Annotated[...] with Enum - enum_type = None - ann = getattr(field, "annotation", None) - if ann is not None: - origin = get_origin(ann) - args = get_args(ann) - # typing.Annotated[Enum, ...] - if origin is not None and args: - for arg in args: - if isinstance(arg, type) and issubclass(arg, Enum): - enum_type = arg - break - elif isinstance(ann, type) and issubclass(ann, Enum): - enum_type = ann - if enum_type is not None: - param_def["enum"] = [e.value for e in enum_type] - - # Literal[...] support for enum-like constraints - if ann is not None and get_origin(ann) is None: - pass # no-op, handled above - elif ann is not None and get_origin(ann) is Any: - pass - else: - if get_origin(ann) is None: - ... - - # Attempt to infer inner list item types for list[T] - if field_type_name == "array": - inner = None - if get_origin(field_type) is list and get_args(field_type): - inner = get_args(field_type)[0] - if inner is int: - param_def["items"] = {"type": "integer"} - elif inner is float: - param_def["items"] = {"type": "number"} - elif inner is bool: - param_def["items"] = {"type": "boolean"} - elif inner is str: - param_def["items"] = {"type": "string"} - - properties[field_name] = param_def - - # Required detection with multiple strategies - is_required_attr = getattr(field, "is_required", None) - try: - if callable(is_required_attr): - if is_required_attr(): - required.append(field_name) - elif isinstance(is_required_attr, bool) and is_required_attr: - required.append(field_name) - else: - has_default = getattr(field, "default", None) is not None - has_factory = getattr(field, "default_factory", None) is not None - if not (has_default or has_factory): - required.append(field_name) - except Exception: - logger.debug( - f"Could not determine if field {field_name} is required, assuming optional" - ) - - input_schema: dict[str, Any] = { - "type": "object", - "properties": properties, - "additionalProperties": False, - } - if required: - input_schema["required"] = required - return input_schema - - def _build_value_schema_json(value_schema: Any) -> dict[str, Any]: """Map a ValueSchema to a JSON schema fragment for outputSchema.""" schema: dict[str, Any] = { diff --git a/libs/arcade-mcp-server/arcade_mcp_server/managers/tool.py b/libs/arcade-mcp-server/arcade_mcp_server/managers/tool.py index 9cecf26c..2a100502 100644 --- a/libs/arcade-mcp-server/arcade_mcp_server/managers/tool.py +++ b/libs/arcade-mcp-server/arcade_mcp_server/managers/tool.py @@ -10,7 +10,7 @@ from typing import TypedDict from arcade_core.catalog import MaterializedTool, ToolCatalog -from arcade_mcp_server.convert import build_input_schema_from_definition +from arcade_mcp_server.convert import create_mcp_tool from arcade_mcp_server.exceptions import NotFoundError from arcade_mcp_server.managers.base import ComponentManager from arcade_mcp_server.types import MCPTool @@ -35,20 +35,13 @@ class ToolManager(ComponentManager[Key, ManagedTool]): def _sanitize_name(name: str) -> str: return name.replace(".", "_") - def _to_dto(self, tool: MaterializedTool) -> MCPTool: - # Extract requirements and build meta if needed - requirements = tool.definition.requirements - meta = None - if requirements.authorization or requirements.secrets or requirements.metadata: - meta = {"arcade_requirements": requirements.model_dump(exclude_none=True)} + @staticmethod + def _to_dto(materialized_tool: MaterializedTool) -> MCPTool: + """Convert a MaterializedTool to an MCPTool DTO. - return MCPTool( - name=self._sanitize_name(tool.definition.fully_qualified_name), - title=f"{tool.definition.toolkit.name}_{tool.definition.name}", - description=tool.definition.description, - inputSchema=build_input_schema_from_definition(tool.definition), - _meta=meta, - ) + Delegates to :func:`arcade_mcp_server.convert.create_mcp_tool`. + """ + return create_mcp_tool(materialized_tool) async def load_from_catalog(self, catalog: ToolCatalog) -> None: pairs: list[tuple[Key, ManagedTool]] = [] diff --git a/libs/arcade-mcp-server/arcade_mcp_server/mcp_app.py b/libs/arcade-mcp-server/arcade_mcp_server/mcp_app.py index 91c59471..36b07e0f 100644 --- a/libs/arcade-mcp-server/arcade_mcp_server/mcp_app.py +++ b/libs/arcade-mcp-server/arcade_mcp_server/mcp_app.py @@ -16,6 +16,7 @@ from types import ModuleType from typing import Any, Callable, Literal, ParamSpec, TypeVar, cast from arcade_core.catalog import MaterializedTool, ToolCatalog, ToolDefinitionError +from arcade_core.metadata import ToolMetadata from arcade_tdk.auth import ToolAuthorization from arcade_tdk.error_adapters import ErrorAdapter from arcade_tdk.tool import tool as tool_decorator @@ -225,6 +226,7 @@ class MCPApp: requires_secrets: list[str] | None = None, requires_metadata: list[str] | None = None, adapters: list[ErrorAdapter] | None = None, + metadata: ToolMetadata | None = None, ) -> Callable[P, T]: """Add a tool for build-time materialization (pre-server).""" if not hasattr(func, "__tool_name__"): @@ -236,6 +238,7 @@ class MCPApp: requires_secrets=requires_secrets, requires_metadata=requires_metadata, adapters=adapters, + metadata=metadata, ) try: self._catalog.add_tool( @@ -264,6 +267,7 @@ class MCPApp: requires_secrets: list[str] | None = None, requires_metadata: list[str] | None = None, adapters: list[ErrorAdapter] | None = None, + metadata: ToolMetadata | None = None, ) -> Callable[[Callable[P, T]], Callable[P, T]] | Callable[P, T]: """Decorator for adding tools with optional parameters.""" @@ -276,6 +280,7 @@ class MCPApp: requires_secrets=requires_secrets, requires_metadata=requires_metadata, adapters=adapters, + metadata=metadata, ) if func is not None: diff --git a/libs/arcade-mcp-server/arcade_mcp_server/metadata.py b/libs/arcade-mcp-server/arcade_mcp_server/metadata.py new file mode 100644 index 00000000..7f9c021e --- /dev/null +++ b/libs/arcade-mcp-server/arcade_mcp_server/metadata.py @@ -0,0 +1,15 @@ +from arcade_core.metadata import ( + Behavior, + Classification, + Operation, + ServiceDomain, + ToolMetadata, +) + +__all__ = [ + "Behavior", + "Classification", + "Operation", + "ServiceDomain", + "ToolMetadata", +] diff --git a/libs/arcade-mcp-server/pyproject.toml b/libs/arcade-mcp-server/pyproject.toml index eb8bdd1a..2d663a27 100644 --- a/libs/arcade-mcp-server/pyproject.toml +++ b/libs/arcade-mcp-server/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "arcade-mcp-server" -version = "1.16.0" +version = "1.17.0" description = "Model Context Protocol (MCP) server framework for Arcade.dev" readme = "README.md" authors = [{ name = "Arcade.dev" }] @@ -21,9 +21,9 @@ classifiers = [ ] requires-python = ">=3.10" dependencies = [ - "arcade-core>=4.3.0,<5.0.0", + "arcade-core>=4.4.0,<5.0.0", "arcade-serve>=3.2.0,<4.0.0", - "arcade-tdk>=3.4.0,<4.0.0", + "arcade-tdk>=3.6.0,<4.0.0", "arcadepy>=1.5.0", "pydantic>=2.0.0", "fastapi>=0.100.0", diff --git a/libs/arcade-tdk/arcade_tdk/tool.py b/libs/arcade-tdk/arcade_tdk/tool.py index efa4b89d..09f599e8 100644 --- a/libs/arcade-tdk/arcade_tdk/tool.py +++ b/libs/arcade-tdk/arcade_tdk/tool.py @@ -3,6 +3,8 @@ import inspect import logging from typing import Any, Callable, TypeVar +from arcade_core.metadata import ToolMetadata + from arcade_tdk.auth import ToolAuthorization from arcade_tdk.error_adapters import ErrorAdapter from arcade_tdk.error_adapters.utils import get_adapter_for_auth_provider @@ -112,6 +114,7 @@ def tool( requires_secrets: list[str] | None = None, requires_metadata: list[str] | None = None, adapters: list[ErrorAdapter] | None = None, + metadata: ToolMetadata | None = None, ) -> Callable: def decorator(func: Callable) -> Callable: func_name = str(getattr(func, "__name__", None)) @@ -122,6 +125,7 @@ def tool( func.__tool_requires_auth__ = requires_auth # type: ignore[attr-defined] func.__tool_requires_secrets__ = requires_secrets # type: ignore[attr-defined] func.__tool_requires_metadata__ = requires_metadata # type: ignore[attr-defined] + func.__tool_metadata__ = metadata # type: ignore[attr-defined] adapter_chain = _build_adapter_chain(adapters, requires_auth) diff --git a/libs/arcade-tdk/pyproject.toml b/libs/arcade-tdk/pyproject.toml index 16eb7299..e0dcc61a 100644 --- a/libs/arcade-tdk/pyproject.toml +++ b/libs/arcade-tdk/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "arcade-tdk" -version = "3.5.0" +version = "3.6.0" description = "Arcade TDK - Toolkit Development Kit for building Arcade tools" readme = "README.md" license = { text = "MIT" } @@ -16,7 +16,7 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] requires-python = ">=3.10" -dependencies = ["arcade-core>=4.3.0,<5.0.0", "pydantic>=2.7.0"] +dependencies = ["arcade-core>=4.4.0,<5.0.0", "pydantic>=2.7.0"] [project.optional-dependencies] dev = [ diff --git a/libs/tests/arcade_mcp_server/test_convert.py b/libs/tests/arcade_mcp_server/test_convert.py index 6e39865f..3305c3d3 100644 --- a/libs/tests/arcade_mcp_server/test_convert.py +++ b/libs/tests/arcade_mcp_server/test_convert.py @@ -371,50 +371,10 @@ class TestCreateMCPTool: assert schema["properties"] == {} assert schema.get("required", []) in ([], None) - def test_missing_input_attribute_fallback(self): - """Test tool with missing input attribute to trigger _build_input_schema_from_model fallback.""" - # Create a valid ToolDefinition first - tool_def = ToolDefinition( - name="test_fallback", - fully_qualified_name="Test.test_fallback", - description="Test fallback to input model", - toolkit=ToolkitDefinition(name="Test"), - input=ToolInput(parameters=[]), - output=ToolOutput(), - requirements=ToolRequirements(), - ) + def test_output_schema_included(self, materialized_tool): + """Test that output schema is included when definition has one.""" + mcp_tool = create_mcp_tool(materialized_tool) - @tool - def f( - name: Annotated[str, "User name"], age: Annotated[int, "User age"] = 25 - ) -> Annotated[str, "greeting"]: - return f"Hello {name}, you are {age} years old" - - input_model, output_model = create_func_models(f) - meta = ToolMeta(module=f.__module__, toolkit=tool_def.toolkit.name) - mat_tool = MaterializedTool( - tool=f, - definition=tool_def, - meta=meta, - input_model=input_model, - output_model=output_model, - ) - - # Remove the input attribute from the definition to simulate the missing attribute case - delattr(mat_tool.definition, "input") - - mcp_tool = create_mcp_tool(mat_tool) - schema = mcp_tool.inputSchema - - assert schema["type"] == "object" - assert "properties" in schema - assert "name" in schema["properties"] - assert "age" in schema["properties"] - - # Ensure the schema was built from the model and not the definition - assert schema["properties"]["name"]["type"] == "string" - assert schema["properties"]["age"]["type"] == "integer" - - if "required" in schema: - assert "name" in schema["required"] - assert "age" not in schema["required"] + # The fixture's output has value_schema=ValueSchema(val_type="number") + assert mcp_tool.outputSchema is not None + assert mcp_tool.outputSchema["type"] == "number" diff --git a/libs/tests/arcade_mcp_server/test_tool_metadata_serialization.py b/libs/tests/arcade_mcp_server/test_tool_metadata_serialization.py new file mode 100644 index 00000000..abc70cbd --- /dev/null +++ b/libs/tests/arcade_mcp_server/test_tool_metadata_serialization.py @@ -0,0 +1,372 @@ +"""Tests for tool metadata serialization to MCP format.""" + +import pytest +from arcade_core.catalog import MaterializedTool, ToolCatalog, ToolMeta, create_func_models +from arcade_core.metadata import ( + Behavior, + Classification, + Operation, + ServiceDomain, + ToolMetadata, +) +from arcade_mcp_server.managers.tool import ToolManager +from arcade_tdk import tool +from arcade_tdk.auth import OAuth2 + + +class TestToolMetadataSerialization: + """Test serialization of ToolMetadata to MCP format.""" + + @pytest.fixture + def tool_manager(self) -> ToolManager: + return ToolManager() + + def _create_materialized_tool(self, tool_func) -> MaterializedTool: + """Helper to create a MaterializedTool from a decorated function.""" + definition = ToolCatalog.create_tool_definition( + tool_func, toolkit_name="Test", toolkit_version="1.0.0" + ) + input_model, output_model = create_func_models(tool_func) + return MaterializedTool( + tool=tool_func, + definition=definition, + meta=ToolMeta(module="test"), + input_model=input_model, + output_model=output_model, + ) + + def test_annotations_computed_from_behavior(self, tool_manager: ToolManager): + """Annotations should be computed from behavior fields.""" + + @tool( + desc="Test tool", + metadata=ToolMetadata( + behavior=Behavior( + operations=[Operation.CREATE], + read_only=False, + destructive=False, + idempotent=True, + open_world=True, + ), + ), + ) + def create_item() -> str: + """Create an item.""" + return "created" + + materialized = self._create_materialized_tool(create_item) + dto = tool_manager._to_dto(materialized) + + assert dto.annotations is not None + assert dto.annotations.title == "CreateItem" + assert dto.annotations.readOnlyHint is False + assert dto.annotations.destructiveHint is False + assert dto.annotations.idempotentHint is True + assert dto.annotations.openWorldHint is True + + def test_meta_arcade_includes_classification(self, tool_manager: ToolManager): + """_meta.arcade.metadata should include classification with service_domains.""" + + @tool( + desc="Test tool", + metadata=ToolMetadata( + classification=Classification( + service_domains=[ServiceDomain.MESSAGING, ServiceDomain.DOCUMENTS], + ), + behavior=Behavior( + operations=[Operation.CREATE], + open_world=True, + ), + ), + ) + def forward_message() -> str: + """Forward a message.""" + return "forwarded" + + materialized = self._create_materialized_tool(forward_message) + dto = tool_manager._to_dto(materialized) + + assert dto.meta is not None + assert "arcade" in dto.meta + assert "metadata" in dto.meta["arcade"] + assert "classification" in dto.meta["arcade"]["metadata"] + assert dto.meta["arcade"]["metadata"]["classification"]["service_domains"] == [ + "messaging", + "documents", + ] + + def test_meta_arcade_includes_operations(self, tool_manager: ToolManager): + """_meta.arcade.metadata.behavior should include operations as lowercase strings.""" + + @tool( + desc="Test tool", + metadata=ToolMetadata( + behavior=Behavior(operations=[Operation.CREATE, Operation.UPDATE]), + ), + ) + def upsert_record() -> str: + """Upsert a record.""" + return "upserted" + + materialized = self._create_materialized_tool(upsert_record) + dto = tool_manager._to_dto(materialized) + + assert dto.meta is not None + assert "arcade" in dto.meta + assert "metadata" in dto.meta["arcade"] + assert "behavior" in dto.meta["arcade"]["metadata"] + assert dto.meta["arcade"]["metadata"]["behavior"]["operations"] == ["create", "update"] + + def test_meta_arcade_includes_extras(self, tool_manager: ToolManager): + """_meta.arcade.metadata should include extras dict unchanged.""" + + @tool( + desc="Test tool", + metadata=ToolMetadata( + extras={"idp": "entraID", "requires_mfa": True, "max_requests": 100}, + ), + ) + def secure_action() -> str: + """Perform secure action.""" + return "done" + + materialized = self._create_materialized_tool(secure_action) + dto = tool_manager._to_dto(materialized) + + assert dto.meta is not None + assert "arcade" in dto.meta + assert "metadata" in dto.meta["arcade"] + assert "extras" in dto.meta["arcade"]["metadata"] + assert dto.meta["arcade"]["metadata"]["extras"] == { + "idp": "entraID", + "requires_mfa": True, + "max_requests": 100, + } + + def test_tool_without_metadata_still_works(self, tool_manager: ToolManager): + """Tools without metadata should still serialize correctly with title.""" + + @tool(desc="Test tool") + def simple_tool() -> str: + """Simple tool.""" + return "simple" + + materialized = self._create_materialized_tool(simple_tool) + dto = tool_manager._to_dto(materialized) + + # Should have title in annotations even without behavior + assert dto.annotations is not None + assert dto.annotations.title == "SimpleTool" + # Hint fields should be None + assert dto.annotations.readOnlyHint is None + assert dto.annotations.destructiveHint is None + assert dto.annotations.idempotentHint is None + assert dto.annotations.openWorldHint is None + # Should not have arcade meta without metadata + assert dto.meta is None or "arcade" not in dto.meta + + def test_full_metadata_serialization(self, tool_manager: ToolManager): + """Test complete metadata serialization with all fields.""" + + @tool( + desc="Send an email using the Gmail API", + metadata=ToolMetadata( + classification=Classification( + service_domains=[ServiceDomain.EMAIL], + ), + behavior=Behavior( + operations=[Operation.CREATE], + read_only=False, + destructive=False, + idempotent=False, + open_world=True, + ), + extras={"idp": "entraID", "requires_mfa": True}, + ), + ) + def send_email() -> str: + """Send an email.""" + return "sent" + + materialized = self._create_materialized_tool(send_email) + dto = tool_manager._to_dto(materialized) + + # Verify annotations + assert dto.annotations is not None + assert dto.annotations.title == "SendEmail" + assert dto.annotations.readOnlyHint is False + assert dto.annotations.destructiveHint is False + assert dto.annotations.idempotentHint is False + assert dto.annotations.openWorldHint is True + + # Verify _meta.arcade structure (mirrors Arcade format) + assert dto.meta is not None + assert "arcade" in dto.meta + arcade = dto.meta["arcade"] + + assert "metadata" in arcade + metadata = arcade["metadata"] + + assert metadata["classification"]["service_domains"] == ["email"] + assert metadata["behavior"]["operations"] == ["create"] + assert metadata["behavior"]["read_only"] is False + assert metadata["behavior"]["destructive"] is False + assert metadata["behavior"]["idempotent"] is False + assert metadata["behavior"]["open_world"] is True + assert metadata["extras"] == {"idp": "entraID", "requires_mfa": True} + + def test_metadata_with_only_classification(self, tool_manager: ToolManager): + """Tools with only classification should serialize correctly.""" + + @tool( + desc="Test tool", + metadata=ToolMetadata( + classification=Classification( + service_domains=[ServiceDomain.WEB_SCRAPING], + ), + ), + ) + def search_web() -> str: + """Search the web.""" + return "results" + + materialized = self._create_materialized_tool(search_web) + dto = tool_manager._to_dto(materialized) + + # Annotations should still have title + assert dto.annotations is not None + assert dto.annotations.title == "SearchWeb" + # Hint fields should be None without behavior + assert dto.annotations.readOnlyHint is None + + # _meta.arcade.metadata should have classification but not behavior + assert dto.meta is not None + assert "arcade" in dto.meta + assert "metadata" in dto.meta["arcade"] + assert "classification" in dto.meta["arcade"]["metadata"] + assert "behavior" not in dto.meta["arcade"]["metadata"] + + def test_metadata_with_only_extras(self, tool_manager: ToolManager): + """Tools with only extras should serialize correctly.""" + + @tool( + desc="Test tool", + metadata=ToolMetadata( + extras={"custom_key": "custom_value"}, + ), + ) + def custom_tool() -> str: + """Custom tool.""" + return "custom" + + materialized = self._create_materialized_tool(custom_tool) + dto = tool_manager._to_dto(materialized) + + # _meta.arcade.metadata should have only extras + assert dto.meta is not None + assert "arcade" in dto.meta + assert "metadata" in dto.meta["arcade"] + assert "classification" not in dto.meta["arcade"]["metadata"] + assert "behavior" not in dto.meta["arcade"]["metadata"] + assert dto.meta["arcade"]["metadata"]["extras"] == {"custom_key": "custom_value"} + + def test_meta_arcade_includes_requirements(self, tool_manager: ToolManager): + """_meta.arcade should include requirements when tool has auth.""" + + @tool( + desc="Tool requiring OAuth", + requires_auth=OAuth2( + id="google", + scopes=["https://www.googleapis.com/auth/gmail.send"], + ), + ) + def authenticated_tool() -> str: + """Tool requiring authentication.""" + return "authenticated" + + materialized = self._create_materialized_tool(authenticated_tool) + dto = tool_manager._to_dto(materialized) + + # _meta.arcade should have requirements + assert dto.meta is not None + assert "arcade" in dto.meta + assert "requirements" in dto.meta["arcade"] + assert "authorization" in dto.meta["arcade"]["requirements"] + assert dto.meta["arcade"]["requirements"]["authorization"]["id"] == "google" + + def test_meta_arcade_includes_secrets_requirements(self, tool_manager: ToolManager): + """_meta.arcade should include requirements when tool has secrets.""" + + @tool( + desc="Tool requiring secrets", + requires_secrets=["API_KEY", "API_SECRET"], + ) + def secret_tool() -> str: + """Tool requiring secrets.""" + return "secret" + + materialized = self._create_materialized_tool(secret_tool) + dto = tool_manager._to_dto(materialized) + + # _meta.arcade should have requirements + assert dto.meta is not None + assert "arcade" in dto.meta + assert "requirements" in dto.meta["arcade"] + assert "secrets" in dto.meta["arcade"]["requirements"] + secrets_req = dto.meta["arcade"]["requirements"]["secrets"] + assert "API_KEY" in [s["key"] for s in secrets_req] + assert "API_SECRET" in [s["key"] for s in secrets_req] + + def test_full_metadata_with_requirements(self, tool_manager: ToolManager): + """Test complete serialization with both metadata and requirements.""" + + @tool( + desc="Full featured tool", + requires_auth=OAuth2( + id="google", + scopes=["https://www.googleapis.com/auth/gmail.send"], + ), + metadata=ToolMetadata( + classification=Classification( + service_domains=[ServiceDomain.EMAIL], + ), + behavior=Behavior( + operations=[Operation.CREATE], + read_only=False, + destructive=False, + open_world=True, + ), + extras={"idp": "google"}, + ), + ) + def full_tool() -> str: + """Full featured tool.""" + return "full" + + materialized = self._create_materialized_tool(full_tool) + dto = tool_manager._to_dto(materialized) + + # Verify structure: requirements at top level, metadata container for rest + assert dto.meta is not None + assert "arcade" in dto.meta + arcade = dto.meta["arcade"] + + # Requirements at top level of arcade + assert "requirements" in arcade + assert arcade["requirements"]["authorization"]["id"] == "google" + + # metadata container holds classification, behavior, extras + assert "metadata" in arcade + metadata = arcade["metadata"] + + assert "classification" in metadata + assert "behavior" in metadata + assert "extras" in metadata + + # Verify specific values + assert metadata["classification"]["service_domains"] == ["email"] + assert metadata["behavior"]["operations"] == ["create"] + assert metadata["behavior"]["read_only"] is False + assert metadata["behavior"]["destructive"] is False + assert metadata["behavior"]["open_world"] is True + assert metadata["extras"] == {"idp": "google"} diff --git a/libs/tests/tool/test_tool_metadata.py b/libs/tests/tool/test_tool_metadata.py new file mode 100644 index 00000000..c8193480 --- /dev/null +++ b/libs/tests/tool/test_tool_metadata.py @@ -0,0 +1,248 @@ +import pytest +from arcade_core.catalog import ToolCatalog +from arcade_core.errors import ToolDefinitionError +from arcade_core.metadata import ( + _INDETERMINATE_OPERATIONS, + _MUTATING_OPERATIONS, + _READ_ONLY_OPERATIONS, + Behavior, + Classification, + Operation, + ServiceDomain, + ToolMetadata, +) +from arcade_tdk import tool + + +class TestEnumCoverage: + """ + Tests to ensure all enum values are accounted for in validation helper sets. + + These tests will fail if new enum values are added without updating the + corresponding helper sets, ensuring future maintainers don't forget to + categorize new values. + """ + + def test_all_operations_are_categorized(self): + """Every Operation must be in _READ_ONLY_OPERATIONS, _MUTATING_OPERATIONS, or _INDETERMINATE_OPERATIONS.""" + all_operations = set(Operation) + categorized_operations = _READ_ONLY_OPERATIONS | _MUTATING_OPERATIONS | _INDETERMINATE_OPERATIONS + + # Check that every operation is categorized + uncategorized = all_operations - categorized_operations + assert not uncategorized, ( + f"The following Operation values are not categorized in _READ_ONLY_OPERATIONS, " + f"_MUTATING_OPERATIONS, or _INDETERMINATE_OPERATIONS: {uncategorized}. " + f"Please add them to the appropriate set in arcade_core/metadata.py" + ) + + # Check that there are no extra operations in the sets that don't exist in the enum + extra = categorized_operations - all_operations + assert not extra, ( + f"The following values are in _READ_ONLY_OPERATIONS, _MUTATING_OPERATIONS, or " + f"_INDETERMINATE_OPERATIONS but don't exist in the Operation enum: {extra}" + ) + + def test_operation_categories_are_disjoint(self): + """_READ_ONLY_OPERATIONS, _MUTATING_OPERATIONS, and _INDETERMINATE_OPERATIONS should not overlap.""" + ro_mut = _READ_ONLY_OPERATIONS & _MUTATING_OPERATIONS + assert not ro_mut, ( + f"The following Operation values appear in both _READ_ONLY_OPERATIONS and " + f"_MUTATING_OPERATIONS: {ro_mut}. An operation should be in exactly one category." + ) + + ro_ind = _READ_ONLY_OPERATIONS & _INDETERMINATE_OPERATIONS + assert not ro_ind, ( + f"The following Operation values appear in both _READ_ONLY_OPERATIONS and " + f"_INDETERMINATE_OPERATIONS: {ro_ind}. An operation should be in exactly one category." + ) + + mut_ind = _MUTATING_OPERATIONS & _INDETERMINATE_OPERATIONS + assert not mut_ind, ( + f"The following Operation values appear in both _MUTATING_OPERATIONS and " + f"_INDETERMINATE_OPERATIONS: {mut_ind}. An operation should be in exactly one category." + ) + + +class TestToolMetadataValidation: + """Test strict mode validation rules for ToolMetadata.""" + + def test_valid_metadata_passes(self): + """Valid metadata with consistent values should not raise.""" + metadata = ToolMetadata( + classification=Classification( + service_domains=[ServiceDomain.EMAIL], + ), + behavior=Behavior( + operations=[Operation.CREATE], + read_only=False, + destructive=False, + open_world=True, + ), + ) + assert metadata is not None + + def test_mutating_operation_with_read_only_raises(self): + """Mutating operations with read_only=True should raise when validated.""" + metadata = ToolMetadata( + behavior=Behavior(operations=[Operation.CREATE], read_only=True), + ) + with pytest.raises( + ToolDefinitionError, match="mutating operation.*but is marked read_only=True" + ): + metadata.validate_for_tool() + + def test_opaque_with_read_only_raises(self): + """OPAQUE operation with read_only=True should raise when validated.""" + metadata = ToolMetadata( + behavior=Behavior(operations=[Operation.OPAQUE], read_only=True), + ) + with pytest.raises( + ToolDefinitionError, match="OPAQUE operation but is marked read_only=True" + ): + metadata.validate_for_tool() + + def test_delete_without_destructive_raises(self): + """DELETE operation without destructive=True should raise when validated.""" + metadata = ToolMetadata( + behavior=Behavior(operations=[Operation.DELETE], destructive=False), + ) + with pytest.raises( + ToolDefinitionError, match="'DELETE' operation.*but is not marked destructive=True" + ): + metadata.validate_for_tool() + + def test_service_domain_with_open_world_false_raises(self): + """ServiceDomain present with open_world=False should raise when validated.""" + metadata = ToolMetadata( + classification=Classification(service_domains=[ServiceDomain.EMAIL]), + behavior=Behavior(open_world=False), + ) + with pytest.raises( + ToolDefinitionError, match="ServiceDomain.*but is marked open_world=False" + ): + metadata.validate_for_tool() + + def test_strict_false_bypasses_validation(self): + """Setting strict=False should bypass all validation rules.""" + # This would normally raise due to contradiction + metadata = ToolMetadata( + behavior=Behavior(operations=[Operation.CREATE], read_only=True), + strict=False, + ) + # No error should be raised when validate_for_tool is called + metadata.validate_for_tool() # Should not raise + assert metadata is not None + + def test_error_message_includes_operation_name(self): + """Error messages should include the operation name for debugging.""" + metadata = ToolMetadata( + behavior=Behavior(operations=[Operation.CREATE], read_only=True), + ) + with pytest.raises(ToolDefinitionError, match="Tool has the mutating operation"): + metadata.validate_for_tool() + + def test_read_only_operation_with_read_only_true_passes(self): + """READ operation with read_only=True should pass validation.""" + metadata = ToolMetadata( + behavior=Behavior(operations=[Operation.READ], read_only=True), + ) + assert metadata is not None + assert metadata.behavior.read_only is True + + def test_multiple_service_domains_allowed(self): + """Tools can have multiple service domains.""" + metadata = ToolMetadata( + classification=Classification( + service_domains=[ServiceDomain.CLOUD_STORAGE, ServiceDomain.DOCUMENTS], + ), + behavior=Behavior(operations=[Operation.READ], read_only=True, open_world=True), + ) + assert len(metadata.classification.service_domains) == 2 + + def test_extras_accepts_arbitrary_dict(self): + """Extras field accepts arbitrary key/value pairs.""" + metadata = ToolMetadata( + extras={"idp": "entraID", "requires_mfa": True, "max_requests": 100}, + ) + assert metadata.extras["idp"] == "entraID" + assert metadata.extras["requires_mfa"] is True + assert metadata.extras["max_requests"] == 100 + + +class TestToolDecoratorWithMetadata: + """Test @tool decorator with metadata parameter.""" + + def test_decorator_accepts_metadata(self): + """Decorator should store metadata as __tool_metadata__ attribute.""" + + @tool( + desc="Test tool", + metadata=ToolMetadata( + classification=Classification(service_domains=[ServiceDomain.MESSAGING]), + behavior=Behavior(operations=[Operation.CREATE], open_world=True), + ), + ) + def my_tool() -> str: + return "test" + + assert hasattr(my_tool, "__tool_metadata__") + assert my_tool.__tool_metadata__.classification.service_domains == [ServiceDomain.MESSAGING] + + def test_decorator_without_metadata_is_backward_compatible(self): + """Decorator should work without metadata (existing tools unchanged).""" + + @tool(desc="Test tool") + def my_tool() -> str: + return "test" + + assert getattr(my_tool, "__tool_metadata__", None) is None + + +class TestToolDefinitionWithMetadata: + """Test ToolDefinition includes metadata from decorator.""" + + def test_tool_definition_includes_metadata(self): + """ToolDefinition.metadata should be populated from decorator.""" + + @tool( + desc="Send a message", + metadata=ToolMetadata( + classification=Classification( + service_domains=[ServiceDomain.MESSAGING], + ), + behavior=Behavior( + operations=[Operation.CREATE], + read_only=False, + destructive=False, + open_world=True, + ), + extras={"idp": "entraID"}, + ), + ) + def send_message() -> str: + """Send a message.""" + return "sent" + + definition = ToolCatalog.create_tool_definition( + send_message, toolkit_name="TestToolkit", toolkit_version="1.0.0" + ) + + assert definition.metadata is not None + assert definition.metadata.classification.service_domains == [ServiceDomain.MESSAGING] + assert definition.metadata.behavior.operations == [Operation.CREATE] + assert definition.metadata.extras == {"idp": "entraID"} + + def test_tool_definition_without_metadata_is_none(self): + """ToolDefinition.metadata should be None when not provided.""" + + @tool(desc="Simple tool") + def simple_tool() -> str: + """A simple tool.""" + return "done" + + definition = ToolCatalog.create_tool_definition( + simple_tool, toolkit_name="TestToolkit", toolkit_version="1.0.0" + ) + + assert definition.metadata is None diff --git a/pyproject.toml b/pyproject.toml index 40e18aeb..67cb7ce7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "arcade-mcp" -version = "1.9.0" +version = "1.10.0" description = "Arcade.dev - Tool Calling platform for Agents" readme = "README.md" license = { file = "LICENSE" } @@ -19,8 +19,8 @@ requires-python = ">=3.10" dependencies = [ # CLI dependencies - "arcade-mcp-server>=1.15.1,<2.0.0", - "arcade-core>=4.2.2,<5.0.0", + "arcade-mcp-server>=1.17.0,<2.0.0", + "arcade-core>=4.4.0,<5.0.0", "typer==0.10.0", "rich>=14.0.0,<15.0.0", "Jinja2==3.1.6", @@ -42,12 +42,6 @@ all = [ "numpy>=2.0.0", "scikit-learn>=1.5.0", "pytz>=2024.1", - # mcp server - "arcade-mcp-server>=1.14.0,<2.0.0", - # serve - "arcade-serve>=3.2.0,<4.0.0", - # tdk - "arcade-tdk>=3.4.0,<4.0.0", ] evals = [