arcade-mcp/toolkits/math/evals/eval_math_tools.py
Sam Partee 2eb46a3a98
Client Fixes and LangGraph Examples (#50)
This PR includes several improvements to the Arcade client and adds
LangGraph examples:

1. Enhanced error handling in the Arcade client:
   - Improved HTTP error handling in `BaseArcadeClient`
- Simplified request methods in `SyncArcadeClient` and
`AsyncArcadeClient`

2. Updated `ToolResource` class:
   - Changed base path from `/v1/tool` to `/v1/tools`
   - Added `tool_version` parameter to `authorize` method

3. Improved Toolkit discovery:
- Updated `find_all_arcade_toolkits` to search only in the current
Python interpreter's site-packages

5. Added LangGraph examples:
   - New `langgraph_auth.py` example demonstrating Gmail authentication
- New `langgraph_with_tool_exec.py` example showing tool execution
within a LangGraph

6. Minor updates:
   - Changed default `BASE_URL` to `https://api.arcade.com/`
   - Updated import error message for eval dependencies

---------

Co-authored-by: Nate Barbettini <nate@arcade-ai.com>
2024-09-24 10:13:45 -07:00

70 lines
1.6 KiB
Python

import arcade_math
from arcade_math.tools.arithmetic import add, sqrt
from arcade.core.catalog import ToolCatalog
from arcade.sdk.eval import (
BinaryCritic,
EvalRubric,
EvalSuite,
tool_eval,
)
# Evaluation rubric
rubric = EvalRubric(
fail_threshold=0.85,
warn_threshold=0.95,
)
catalog = ToolCatalog()
catalog.add_module(arcade_math)
@tool_eval()
def math_eval_suite():
suite = EvalSuite(
name="Math Tools Evaluation",
system_message="You are an AI assistant with access to math tools. Use them to help the user with their math-related tasks.",
catalog=catalog,
rubric=rubric,
)
suite.add_case(
name="Add two large numbers",
user_message="Add 12345 and 987654321",
expected_tool_calls=[
(
add,
{
"a": 12345,
"b": 987654321,
},
)
],
rubric=rubric,
critics=[
BinaryCritic(
critic_field="a", weight=0.5
), # TODO: weight should be optional
BinaryCritic(critic_field="b", weight=0.5),
],
)
suite.add_case(
name="Take the square root of a large number",
user_message="What is the square root of 3224990521?",
expected_tool_calls=[
(
sqrt,
{
"a": 3224990521,
},
)
],
rubric=rubric,
critics=[
BinaryCritic(critic_field="a", weight=1.0),
],
)
return suite