Improve arcade new (#104)

# PR Description
This PR improves the CLI command `arcade new` in multiple ways.
1. When prompting the developer if they want to create a test/evals
folder, the default was printed twice. This is now fixed.
2. Whether to generate a test/eval directory, the user must enter "yes".
This PR relaxes this to accept other variations.
3. Creates an eval suite instead of an empty folder.
4. Prevents a developer from overwriting an existing toolkit.
This commit is contained in:
Eric Gustin 2024-10-11 16:22:10 -07:00 committed by GitHub
parent ff092ac303
commit 74a47f1062
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -21,22 +21,26 @@ def ask_question(question: str, default: Optional[str] = None) -> str:
"""
Ask a question via input() and return the answer.
"""
if default:
question = f"{question} [{default}]"
answer = typer.prompt(question, default=default)
if not answer and default:
return default
return str(answer)
def create_directory(path: str) -> None:
def create_directory(path: str) -> bool:
"""
Create a directory if it doesn't exist.
Returns True if the directory was created, False if failed to create.
"""
try:
os.makedirs(path, exist_ok=True)
os.makedirs(path, exist_ok=False)
except FileExistsError:
console.print(f"[red]Directory '{path}' already exists.[/red]")
return False
except Exception as e:
console.print(f"[red]Failed to create directory {path}: {e}[/red]")
return False
return True
def create_file(path: str, content: str) -> None:
@ -96,14 +100,20 @@ def create_new_toolkit(directory: str) -> None:
author_email = ask_question("Author's email?")
author = f"{author_name} <{author_email}>"
generate_test_dir = ask_question("Generate test directory? (yes/no)", "yes") == "yes"
generate_eval_dir = ask_question("Generate eval directory? (yes/no)", "yes") == "yes"
yes_options = ["yes", "y", "ye", "yea", "yeah", "true"]
generate_test_dir = (
ask_question("Generate test directory? (yes/no)", "yes").lower() in yes_options
)
generate_eval_dir = (
ask_question("Generate eval directory? (yes/no)", "yes").lower() in yes_options
)
top_level_dir = os.path.join(directory, name)
toolkit_dir = os.path.join(directory, name, toolkit_name)
# Create the top level toolkit directory
create_directory(top_level_dir)
if not create_directory(top_level_dir):
return
# Create the toolkit directory
create_directory(toolkit_dir)
@ -123,13 +133,14 @@ def create_new_toolkit(directory: str) -> None:
os.path.join(toolkit_dir, "tools", "hello.py"),
dedent(
f"""
from typing import Annotated
from arcade.sdk import tool
@tool
def hello() -> str:
def hello(name: Annotated[str, "The name of the person to greet"]) -> str:
{docstring}
return "Hello, World!"
return "Hello, " + name + "!"
"""
).strip(),
)
@ -141,8 +152,90 @@ def create_new_toolkit(directory: str) -> None:
if generate_test_dir:
create_directory(os.path.join(top_level_dir, "tests"))
# Create the __init__.py file in the tests directory
create_file(os.path.join(top_level_dir, "tests", "__init__.py"), "")
# Create the test_hello.py file in the tests directory
stripped_toolkit_name = toolkit_name.replace("arcade_", "")
create_file(
os.path.join(top_level_dir, "tests", f"test_{stripped_toolkit_name}.py"),
dedent(
f"""
import pytest
from arcade.sdk.error import ToolExecutionError
from {toolkit_name}.tools.hello import hello
def test_hello():
assert hello("developer") == "Hello, developer!"
def test_hello_raises_error():
with pytest.raises(ToolExecutionError):
hello(1)
"""
).strip(),
)
# If the user wants to generate an eval directory
if generate_eval_dir:
create_directory(os.path.join(top_level_dir, "evals"))
# Create the eval_hello.py file
stripped_toolkit_name = toolkit_name.replace("arcade_", "")
create_file(
os.path.join(top_level_dir, "evals", "eval_hello.py"),
dedent(
f"""
import {toolkit_name}
from {toolkit_name}.tools.hello import hello
from arcade.core.catalog import ToolCatalog
from arcade.sdk.eval import (
EvalRubric,
EvalSuite,
SimilarityCritic,
tool_eval,
)
# Evaluation rubric
rubric = EvalRubric(
fail_threshold=0.85,
warn_threshold=0.95,
)
catalog = ToolCatalog()
catalog.add_module({toolkit_name})
@tool_eval()
def {stripped_toolkit_name}_eval_suite():
suite = EvalSuite(
name="{stripped_toolkit_name} Tools Evaluation",
system_message="You are an AI assistant with access to {stripped_toolkit_name} tools. Use them to help the user with their tasks.",
catalog=catalog,
rubric=rubric,
)
suite.add_case(
name="Saying hello",
user_message="Say hello to the developer!!!!",
expected_tool_calls=[
(
hello,
{{
"name": "developer"
}}
)
],
rubric=rubric,
critics=[
SimilarityCritic(critic_field="name", weight=0.5),
],
)
return suite
"""
).strip(),
)
console.print(f"[green]Toolkit {toolkit_name} has been created.[/green]")