Improve arcade new (#104)
# PR Description This PR improves the CLI command `arcade new` in multiple ways. 1. When prompting the developer if they want to create a test/evals folder, the default was printed twice. This is now fixed. 2. Whether to generate a test/eval directory, the user must enter "yes". This PR relaxes this to accept other variations. 3. Creates an eval suite instead of an empty folder. 4. Prevents a developer from overwriting an existing toolkit.
This commit is contained in:
parent
ff092ac303
commit
74a47f1062
1 changed files with 102 additions and 9 deletions
|
|
@ -21,22 +21,26 @@ def ask_question(question: str, default: Optional[str] = None) -> str:
|
|||
"""
|
||||
Ask a question via input() and return the answer.
|
||||
"""
|
||||
if default:
|
||||
question = f"{question} [{default}]"
|
||||
answer = typer.prompt(question, default=default)
|
||||
if not answer and default:
|
||||
return default
|
||||
return str(answer)
|
||||
|
||||
|
||||
def create_directory(path: str) -> None:
|
||||
def create_directory(path: str) -> bool:
|
||||
"""
|
||||
Create a directory if it doesn't exist.
|
||||
Returns True if the directory was created, False if failed to create.
|
||||
"""
|
||||
try:
|
||||
os.makedirs(path, exist_ok=True)
|
||||
os.makedirs(path, exist_ok=False)
|
||||
except FileExistsError:
|
||||
console.print(f"[red]Directory '{path}' already exists.[/red]")
|
||||
return False
|
||||
except Exception as e:
|
||||
console.print(f"[red]Failed to create directory {path}: {e}[/red]")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def create_file(path: str, content: str) -> None:
|
||||
|
|
@ -96,14 +100,20 @@ def create_new_toolkit(directory: str) -> None:
|
|||
author_email = ask_question("Author's email?")
|
||||
author = f"{author_name} <{author_email}>"
|
||||
|
||||
generate_test_dir = ask_question("Generate test directory? (yes/no)", "yes") == "yes"
|
||||
generate_eval_dir = ask_question("Generate eval directory? (yes/no)", "yes") == "yes"
|
||||
yes_options = ["yes", "y", "ye", "yea", "yeah", "true"]
|
||||
generate_test_dir = (
|
||||
ask_question("Generate test directory? (yes/no)", "yes").lower() in yes_options
|
||||
)
|
||||
generate_eval_dir = (
|
||||
ask_question("Generate eval directory? (yes/no)", "yes").lower() in yes_options
|
||||
)
|
||||
|
||||
top_level_dir = os.path.join(directory, name)
|
||||
toolkit_dir = os.path.join(directory, name, toolkit_name)
|
||||
|
||||
# Create the top level toolkit directory
|
||||
create_directory(top_level_dir)
|
||||
if not create_directory(top_level_dir):
|
||||
return
|
||||
|
||||
# Create the toolkit directory
|
||||
create_directory(toolkit_dir)
|
||||
|
|
@ -123,13 +133,14 @@ def create_new_toolkit(directory: str) -> None:
|
|||
os.path.join(toolkit_dir, "tools", "hello.py"),
|
||||
dedent(
|
||||
f"""
|
||||
from typing import Annotated
|
||||
from arcade.sdk import tool
|
||||
|
||||
@tool
|
||||
def hello() -> str:
|
||||
def hello(name: Annotated[str, "The name of the person to greet"]) -> str:
|
||||
{docstring}
|
||||
|
||||
return "Hello, World!"
|
||||
return "Hello, " + name + "!"
|
||||
"""
|
||||
).strip(),
|
||||
)
|
||||
|
|
@ -141,8 +152,90 @@ def create_new_toolkit(directory: str) -> None:
|
|||
if generate_test_dir:
|
||||
create_directory(os.path.join(top_level_dir, "tests"))
|
||||
|
||||
# Create the __init__.py file in the tests directory
|
||||
create_file(os.path.join(top_level_dir, "tests", "__init__.py"), "")
|
||||
|
||||
# Create the test_hello.py file in the tests directory
|
||||
stripped_toolkit_name = toolkit_name.replace("arcade_", "")
|
||||
create_file(
|
||||
os.path.join(top_level_dir, "tests", f"test_{stripped_toolkit_name}.py"),
|
||||
dedent(
|
||||
f"""
|
||||
import pytest
|
||||
from arcade.sdk.error import ToolExecutionError
|
||||
from {toolkit_name}.tools.hello import hello
|
||||
|
||||
def test_hello():
|
||||
assert hello("developer") == "Hello, developer!"
|
||||
|
||||
def test_hello_raises_error():
|
||||
with pytest.raises(ToolExecutionError):
|
||||
hello(1)
|
||||
"""
|
||||
).strip(),
|
||||
)
|
||||
|
||||
# If the user wants to generate an eval directory
|
||||
if generate_eval_dir:
|
||||
create_directory(os.path.join(top_level_dir, "evals"))
|
||||
|
||||
# Create the eval_hello.py file
|
||||
stripped_toolkit_name = toolkit_name.replace("arcade_", "")
|
||||
create_file(
|
||||
os.path.join(top_level_dir, "evals", "eval_hello.py"),
|
||||
dedent(
|
||||
f"""
|
||||
import {toolkit_name}
|
||||
from {toolkit_name}.tools.hello import hello
|
||||
|
||||
from arcade.core.catalog import ToolCatalog
|
||||
from arcade.sdk.eval import (
|
||||
EvalRubric,
|
||||
EvalSuite,
|
||||
SimilarityCritic,
|
||||
tool_eval,
|
||||
)
|
||||
|
||||
# Evaluation rubric
|
||||
rubric = EvalRubric(
|
||||
fail_threshold=0.85,
|
||||
warn_threshold=0.95,
|
||||
)
|
||||
|
||||
|
||||
catalog = ToolCatalog()
|
||||
catalog.add_module({toolkit_name})
|
||||
|
||||
|
||||
@tool_eval()
|
||||
def {stripped_toolkit_name}_eval_suite():
|
||||
suite = EvalSuite(
|
||||
name="{stripped_toolkit_name} Tools Evaluation",
|
||||
system_message="You are an AI assistant with access to {stripped_toolkit_name} tools. Use them to help the user with their tasks.",
|
||||
catalog=catalog,
|
||||
rubric=rubric,
|
||||
)
|
||||
|
||||
suite.add_case(
|
||||
name="Saying hello",
|
||||
user_message="Say hello to the developer!!!!",
|
||||
expected_tool_calls=[
|
||||
(
|
||||
hello,
|
||||
{{
|
||||
"name": "developer"
|
||||
}}
|
||||
)
|
||||
],
|
||||
rubric=rubric,
|
||||
critics=[
|
||||
SimilarityCritic(critic_field="name", weight=0.5),
|
||||
],
|
||||
)
|
||||
|
||||
return suite
|
||||
"""
|
||||
).strip(),
|
||||
)
|
||||
|
||||
console.print(f"[green]Toolkit {toolkit_name} has been created.[/green]")
|
||||
|
|
|
|||
Loading…
Reference in a new issue