### Overview Major restructuring from monolithic `arcade-ai` package to modular library architecture with standardized uv-based dependency management.  ### New Package Structure - **`arcade-tdk`** - Lightweight toolkit development kit (core decorators, auth) - **`arcade-core`** - Core execution engine and catalog functionality - **`arcade-serve`** - FastAPI/MCP server components - **`arcade-ai`** - Meta package that includes CLI functionality. Optionally include evals via the `evals` extra. Optionally include all packages via the `all` extra. ### Key Benefits - **Lighter Dependencies**: Toolkits now depend only on `arcade-tdk` (~2 deps) vs full `arcade-ai` (~30+ deps) - **Faster Builds**: uv provides 10-100x faster dependency resolution and installation - **Better Modularity**: Clear separation of concerns, consumers import only what they need - **Standard Tooling**: Eliminates custom poetry scripts, uses standard Python packaging ### Migration Impact - All 20 toolkits converted from poetry → uv with `arcade-tdk` dependencies plus `arcade-ai[evals]` and `arcade-serve` dev dependencies. When developing locally, devs should install toolkits via `make install-local`. - Modern Python 3.10+ type hints throughout - Standardized build system with hatchling backend - Enhanced Makefile with robust toolkit management commands - Removed `arcade dev` CLI command - Reduce the number of files created by `arcade new` and add an option to not generate a tests and evals folder. This foundation enables faster development cycles and cleaner dependency chains for the growing toolkit ecosystem. ### Todo After this PR is merged - [ ] Post-merge workflow(s) (release & publish containers, etc) - [ ] Release order plan. @EricGustin suggests releasing in the following order: 1. `arcade-core` version 0.1.0 2. `arcade-serve` version 0.1.0 and `arcade-tdk` version 0.1.0 3. `arcade-ai` version 2.0.0 4. Patch release for all toolkits (all changes in toolkits are internal refactors) - [ ] [Update docs](https://github.com/ArcadeAI/docs/pull/318) --------- Co-authored-by: Eric Gustin <eric@arcade.dev> Co-authored-by: Eric Gustin <34000337+EricGustin@users.noreply.github.com>
119 lines
2.8 KiB
Python
119 lines
2.8 KiB
Python
from arcade_evals import (
|
|
BinaryCritic,
|
|
EvalRubric,
|
|
EvalSuite,
|
|
ExpectedToolCall,
|
|
SimilarityCritic,
|
|
tool_eval,
|
|
)
|
|
from arcade_tdk import ToolCatalog
|
|
|
|
import arcade_code_sandbox
|
|
from arcade_code_sandbox.tools.e2b import create_static_matplotlib_chart, run_code
|
|
from arcade_code_sandbox.tools.models import E2BSupportedLanguage
|
|
|
|
merge_sort_code = """
|
|
def merge_sort(arr):
|
|
if len(arr) <= 1:
|
|
return arr
|
|
|
|
mid = len(arr) // 2
|
|
left = merge_sort(arr[:mid])
|
|
right = merge_sort(arr[mid:])
|
|
|
|
return merge(left, right)
|
|
|
|
def merge(left, right):
|
|
result = []
|
|
i, j = 0, 0
|
|
|
|
while i < len(left) and j < len(right):
|
|
if left[i] < right[j]:
|
|
result.append(left[i])
|
|
i += 1
|
|
else:
|
|
result.append(right[j])
|
|
j += 1
|
|
|
|
result.extend(left[i:])
|
|
result.extend(right[j:])
|
|
|
|
return result
|
|
|
|
sample_list = ["banana", "apple", "cherry", "date", "elderberry"]
|
|
|
|
sorted_list = merge_sort(sample_list)
|
|
print("Sorted list:", sorted_list)
|
|
"""
|
|
|
|
matplotlib_chart_code = """
|
|
import matplotlib.pyplot as plt
|
|
|
|
labels = ['Apples', 'Bananas', 'Cherries', 'Dates']
|
|
sizes = [30, 25, 20, 25]
|
|
colors = ['red', 'yellow', 'purple', 'brown']
|
|
|
|
plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
|
|
|
|
plt.axis('equal')
|
|
|
|
plt.title('Fruit Distribution')
|
|
|
|
plt.savefig('fruit_pie_chart.png')
|
|
"""
|
|
|
|
# Evaluation rubric
|
|
rubric = EvalRubric(
|
|
fail_threshold=0.85,
|
|
warn_threshold=0.95,
|
|
)
|
|
|
|
|
|
catalog = ToolCatalog()
|
|
catalog.add_module(arcade_code_sandbox)
|
|
|
|
|
|
@tool_eval()
|
|
def code_sandbox_eval_suite():
|
|
suite = EvalSuite(
|
|
name="code_sandbox Tools Evaluation",
|
|
system_message="You are an AI assistant with access to code_sandbox tools. Use them to help the user with their tasks.",
|
|
catalog=catalog,
|
|
rubric=rubric,
|
|
)
|
|
|
|
suite.add_case(
|
|
name="Run code",
|
|
user_message=f"Can you please run my merge sort algo?\n\n{merge_sort_code}",
|
|
expected_tool_calls=[
|
|
ExpectedToolCall(
|
|
func=run_code,
|
|
args={
|
|
"code": merge_sort_code,
|
|
"language": E2BSupportedLanguage.PYTHON,
|
|
},
|
|
)
|
|
],
|
|
critics=[
|
|
SimilarityCritic(critic_field="code", weight=0.8),
|
|
BinaryCritic(critic_field="language", weight=0.2),
|
|
],
|
|
)
|
|
|
|
suite.add_case(
|
|
name="Create static matplotlib chart",
|
|
user_message=f"Run this code:\n\n{matplotlib_chart_code}",
|
|
expected_tool_calls=[
|
|
ExpectedToolCall(
|
|
func=create_static_matplotlib_chart,
|
|
args={
|
|
"code": matplotlib_chart_code,
|
|
},
|
|
)
|
|
],
|
|
critics=[
|
|
SimilarityCritic(critic_field="code", weight=1.0),
|
|
],
|
|
)
|
|
|
|
return suite
|