### Overview Major restructuring from monolithic `arcade-ai` package to modular library architecture with standardized uv-based dependency management.  ### New Package Structure - **`arcade-tdk`** - Lightweight toolkit development kit (core decorators, auth) - **`arcade-core`** - Core execution engine and catalog functionality - **`arcade-serve`** - FastAPI/MCP server components - **`arcade-ai`** - Meta package that includes CLI functionality. Optionally include evals via the `evals` extra. Optionally include all packages via the `all` extra. ### Key Benefits - **Lighter Dependencies**: Toolkits now depend only on `arcade-tdk` (~2 deps) vs full `arcade-ai` (~30+ deps) - **Faster Builds**: uv provides 10-100x faster dependency resolution and installation - **Better Modularity**: Clear separation of concerns, consumers import only what they need - **Standard Tooling**: Eliminates custom poetry scripts, uses standard Python packaging ### Migration Impact - All 20 toolkits converted from poetry → uv with `arcade-tdk` dependencies plus `arcade-ai[evals]` and `arcade-serve` dev dependencies. When developing locally, devs should install toolkits via `make install-local`. - Modern Python 3.10+ type hints throughout - Standardized build system with hatchling backend - Enhanced Makefile with robust toolkit management commands - Removed `arcade dev` CLI command - Reduce the number of files created by `arcade new` and add an option to not generate a tests and evals folder. This foundation enables faster development cycles and cleaner dependency chains for the growing toolkit ecosystem. ### Todo After this PR is merged - [ ] Post-merge workflow(s) (release & publish containers, etc) - [ ] Release order plan. @EricGustin suggests releasing in the following order: 1. `arcade-core` version 0.1.0 2. `arcade-serve` version 0.1.0 and `arcade-tdk` version 0.1.0 3. `arcade-ai` version 2.0.0 4. Patch release for all toolkits (all changes in toolkits are internal refactors) - [ ] [Update docs](https://github.com/ArcadeAI/docs/pull/318) --------- Co-authored-by: Eric Gustin <eric@arcade.dev> Co-authored-by: Eric Gustin <34000337+EricGustin@users.noreply.github.com>
131 lines
3.8 KiB
Python
131 lines
3.8 KiB
Python
from arcade_evals import (
|
|
BinaryCritic,
|
|
EvalRubric,
|
|
EvalSuite,
|
|
ExpectedToolCall,
|
|
tool_eval,
|
|
)
|
|
from arcade_tdk import ToolCatalog
|
|
|
|
import arcade_math
|
|
from arcade_math.tools.arithmetic import (
|
|
add,
|
|
divide,
|
|
mod,
|
|
multiply,
|
|
subtract,
|
|
sum_list,
|
|
sum_range,
|
|
)
|
|
from arcade_math.tools.exponents import (
|
|
log,
|
|
power,
|
|
)
|
|
from arcade_math.tools.miscellaneous import (
|
|
abs_val,
|
|
factorial,
|
|
sqrt,
|
|
)
|
|
from arcade_math.tools.rational import (
|
|
gcd,
|
|
lcm,
|
|
)
|
|
from arcade_math.tools.rounding import (
|
|
ceil,
|
|
floor,
|
|
round_num,
|
|
)
|
|
from arcade_math.tools.statistics import (
|
|
avg,
|
|
median,
|
|
)
|
|
from arcade_math.tools.trigonometry import (
|
|
deg_to_rad,
|
|
rad_to_deg,
|
|
)
|
|
|
|
# Evaluation rubric
|
|
rubric = EvalRubric(
|
|
fail_threshold=0.85,
|
|
warn_threshold=0.95,
|
|
)
|
|
|
|
|
|
catalog = ToolCatalog()
|
|
catalog.add_module(arcade_math)
|
|
|
|
|
|
@tool_eval()
|
|
def math_eval_suite():
|
|
suite = EvalSuite(
|
|
name="Math Tools Evaluation",
|
|
system_message="You're an AI assistant with access to math tools. Use them to help the user with their math-related tasks.",
|
|
catalog=catalog,
|
|
rubric=rubric,
|
|
)
|
|
|
|
list_param = ["1", "2", "3", "4", "5"]
|
|
funcs_to_expression_and_params = [
|
|
# unary
|
|
(sqrt, "What's the square root of {a}?", {"a": "25"}),
|
|
(abs_val, "What's the absolute value of {a}?", {"a": "-10"}),
|
|
(factorial, "What's the factorial of {a}?", {"a": "5"}),
|
|
(deg_to_rad, "Convert {degrees} from degrees to radians", {"degrees": "180"}),
|
|
(rad_to_deg, "Convert {radians} from radias to degrees", {"radians": "3.14"}),
|
|
(ceil, "Compute the ceiling of {a}", {"a": "3.14"}),
|
|
(floor, "Compute the floor of {a}", {"a": "3.14"}),
|
|
# binary
|
|
(add, "Add {a} and {b}", {"a": "12345", "b": "987654321"}),
|
|
(subtract, "Subtract {b} from {a}", {"a": "987654321", "b": "12345"}),
|
|
(multiply, "Multiply {a} and {b}", {"a": "12345", "b": "567890"}),
|
|
(divide, "What is {a} divided by {b}?", {"a": "1234123479", "b": "123"}),
|
|
(
|
|
sum_range,
|
|
"What's the sum of all numbers from {start} to {end}?",
|
|
{"start": "10", "end": "345"},
|
|
),
|
|
(mod, "What's the remainder of dividing {a} by {b}?", {"a": "234", "b": "17"}),
|
|
(power, "Raise {a} to the power of {b}", {"a": "2", "b": "8"}),
|
|
(log, "What's the logarithm of {a} with base {base}?", {"a": "8", "base": "2"}),
|
|
(
|
|
round_num,
|
|
"Round {value} to {ndigits} decimal places",
|
|
{"value": "12.23746234", "ndigits": "3"},
|
|
),
|
|
(gcd, "Find the greatest common divisor of {a} and {b}", {"a": "50", "b": "10"}),
|
|
(lcm, "FInd the least common multiple of {a} and {b}", {"a": "7", "b": "13"}),
|
|
# n-nary
|
|
(
|
|
sum_list,
|
|
f"Calculate the sum of these numbers: {' '.join(list_param)}",
|
|
{"numbers": list_param},
|
|
),
|
|
(
|
|
avg,
|
|
f"Find the average of these numbers: {' '.join(list_param)}",
|
|
{"numbers": list_param},
|
|
),
|
|
(
|
|
median,
|
|
f"Find the median of these numbers: {' '.join(list_param)}",
|
|
{"numbers": list_param},
|
|
),
|
|
]
|
|
|
|
for func, expression, params in funcs_to_expression_and_params:
|
|
parametrized_expression = expression.format(**params)
|
|
num_params = len(params)
|
|
suite.add_case(
|
|
name=parametrized_expression,
|
|
user_message=parametrized_expression,
|
|
expected_tool_calls=[
|
|
ExpectedToolCall(
|
|
func=func,
|
|
args=params,
|
|
)
|
|
],
|
|
rubric=rubric,
|
|
critics=[BinaryCritic(critic_field=param, weight=1.0 / num_params) for param in params],
|
|
)
|
|
|
|
return suite
|