### Adds the following tools to the Github Toolkit:
1. CreateIssueComment
2. SetStarred
3. CountStargazers
4. ListOrgRepositories
5. GetRepository
6. ListRepositoryActivities
7. ListReviewCommentsInARepository
8. ListPullRequests
9. GetPullRequest
10. UpdatePullRequest
11. ListPullRequestCommits
12. CreateReplyForReviewComment
13. ListReviewCommentsOnPullRequest
14. CreateReviewComment
Adds evals for all of these tools and unit tests.
---------
Co-authored-by: Sam Partee <sam@arcade-ai.com>
74 lines
1.9 KiB
Python
74 lines
1.9 KiB
Python
import arcade_github
|
|
from arcade_github.tools.activity import set_starred
|
|
|
|
from arcade.core.catalog import ToolCatalog
|
|
from arcade.sdk.eval import (
|
|
BinaryCritic,
|
|
EvalRubric,
|
|
EvalSuite,
|
|
tool_eval,
|
|
)
|
|
|
|
# Evaluation rubric
|
|
rubric = EvalRubric(
|
|
fail_threshold=0.9,
|
|
warn_threshold=0.95,
|
|
)
|
|
|
|
catalog = ToolCatalog()
|
|
# Register the GitHub tools
|
|
catalog.add_module(arcade_github)
|
|
|
|
|
|
@tool_eval()
|
|
def github_activity_eval_suite() -> EvalSuite:
|
|
"""Evaluation suite for GitHub Activity tools."""
|
|
suite = EvalSuite(
|
|
name="GitHub Activity Tools Evaluation Suite",
|
|
system_message="You are an AI assistant that helps users interact with GitHub repositories using the provided tools.",
|
|
catalog=catalog,
|
|
rubric=rubric,
|
|
)
|
|
|
|
# Set Starred
|
|
suite.add_case(
|
|
name="Star a repository",
|
|
user_message="Star the test repository that is owned by ArcadeAI.",
|
|
expected_tool_calls=[
|
|
(
|
|
set_starred,
|
|
{
|
|
"owner": "ArcadeAI",
|
|
"name": "test",
|
|
"starred": True,
|
|
},
|
|
)
|
|
],
|
|
critics=[
|
|
BinaryCritic(critic_field="owner", weight=0.3),
|
|
BinaryCritic(critic_field="name", weight=0.3),
|
|
BinaryCritic(critic_field="starred", weight=0.4),
|
|
],
|
|
)
|
|
|
|
suite.add_case(
|
|
name="Unstar a repository",
|
|
user_message="Unstar the ArcadeAI/test repository.",
|
|
expected_tool_calls=[
|
|
(
|
|
set_starred,
|
|
{
|
|
"owner": "ArcadeAI",
|
|
"name": "test",
|
|
"starred": False,
|
|
},
|
|
)
|
|
],
|
|
critics=[
|
|
BinaryCritic(critic_field="owner", weight=0.3),
|
|
BinaryCritic(critic_field="name", weight=0.3),
|
|
BinaryCritic(critic_field="starred", weight=0.4),
|
|
],
|
|
)
|
|
|
|
return suite
|