arcade-mcp/arcade/pyproject.toml
Sam Partee db948125d5
Tool Evalulation SDK (#35)
1. New Eval SDK (`arcade/sdk/eval.py`):
- Introduces `EvalSuite`, `EvalCase`, and `EvalRubric` classes for
structured evaluation.
- Implements various Critic classes (Binary, Numeric, Similarity) for
flexible scoring.
- Adds a `tool_eval` decorator for easy integration with existing tools.

2. CLI Integration (`arcade/cli/main.py` and `arcade/cli/utils.py`):
   - Adds an `evals` command to run evaluation suites from the CLI.
   - Implements result display functionality for evaluation outcomes.

3. Toolkit Updates:
- Adds evaluation scripts for Gmail
([toolkits/gmail/evals/eval_gmail_tools.py](file:///Users/spartee/Dropbox/Arcade/platform/Team/arcade-ai/toolkits/gmail/evals/eval_gmail_tools.py#1%2C1-1%2C1))
and Slack
([toolkits/slack/evals/eval_slack_messaging.py](file:///Users/spartee/Dropbox/Arcade/platform/Team/arcade-ai/toolkits/slack/evals/eval_slack_messaging.py#1%2C1-1%2C1))
toolkits.
- Demonstrates practical usage of the Eval SDK with real-world
scenarios.

4. Miscellaneous:
- Updates `arcade/cli/new.py` to optionally generate an `evals`
directory for new toolkits.

---------

Co-authored-by: Nate Barbettini <nate@arcade-ai.com>
2024-09-19 03:36:44 -07:00

133 lines
2.4 KiB
TOML

[tool.poetry]
name = "arcade-ai"
version = "0.1.0"
description = ""
packages = [
{include="arcade", from="."}
]
authors = ["Arcade AI <sam@arcade-ai.com>"]
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.dependencies]
python = ">=3.10,<4.0"
pydantic = "^2.7.0"
pydantic-settings = "^2.2.1"
typer = "^0.9.0"
rich = "^13.7.1"
toml = "^0.10.2"
tomlkit = "^0.12.4"
requests = "^2.26.0" # TODO: is this really needed?
openai = "^1.36.0" # TODO: relax to an earlier version that still has what we need
pyjwt = "^2.8.0"
[tool.poetry.group.fastapi.dependencies]
fastapi = "^0.110.0"
[tool.poetry.group.flask.dependencies]
flask = "^3.0.3"
[tool.poetry.group.dev.dependencies]
pytest = "^8.1.1"
pytest-cov = "^4.0.0"
mypy = "^1.5.1"
pre-commit = "^3.4.0"
tox = "^4.11.1"
pytest-asyncio = "^0.23.7"
types-toml = "^0.10.8"
uvicorn = "^0.22.0"
mkdocs = ">=1.5.2"
mkdocs-material = ">=9.3.0"
mkdocstrings = {extras = ["python"], version = ">=0.23.1"}
[tool.poetry.group.evals.dependencies]
scipy = "^1.14.0"
numpy = "^2.0.0"
scikit-learn = "^1.5.0"
[tool.poetry.scripts]
arcade = "arcade.cli.main:cli"
[tool.mypy]
files = ["arcade"]
python_version = "3.10"
disallow_untyped_defs = "True"
disallow_any_unimported = "True"
no_implicit_optional = "True"
check_untyped_defs = "True"
warn_return_any = "True"
warn_unused_ignores = "True"
show_error_codes = "True"
ignore_missing_imports = "True"
[tool.pytest.ini_options]
testpaths = ["tests"]
[tool.ruff]
target-version = "py39"
line-length = 100
fix = true
select = [
# flake8-2020
"YTT",
# flake8-bandit
"S",
# flake8-bugbear
"B",
# flake8-builtins
"A",
# flake8-comprehensions
"C4",
# flake8-debugger
"T10",
# flake8-simplify
"SIM",
# isort
"I",
# mccabe
"C90",
# pycodestyle
"E", "W",
# pyflakes
"F",
# pygrep-hooks
"PGH",
# pyupgrade
"UP",
# ruff
"RUF",
# tryceratops
"TRY",
]
ignore = [ # TODO work to remove these
# LineTooLong
"E501",
# DoNotAssignLambda
"E731",
# raise from (cli specific)
"TRY200",
# Depends function in arg string
"B008",
# raise from (cli specific)
"B904",
# long message exceptions
"TRY003"
]
[tool.ruff.format]
preview = true
[tool.coverage.report]
skip_empty = true
[tool.coverage.run]
branch = true
source = ["arcade"]
[tool.ruff.per-file-ignores]
"tests/*" = ["S101"]