### Adds the following tools to the Github Toolkit:
1. CreateIssueComment
2. SetStarred
3. CountStargazers
4. ListOrgRepositories
5. GetRepository
6. ListRepositoryActivities
7. ListReviewCommentsInARepository
8. ListPullRequests
9. GetPullRequest
10. UpdatePullRequest
11. ListPullRequestCommits
12. CreateReplyForReviewComment
13. ListReviewCommentsOnPullRequest
14. CreateReviewComment
Adds evals for all of these tools and unit tests.
---------
Co-authored-by: Sam Partee <sam@arcade-ai.com>
245 lines
8.7 KiB
Python
245 lines
8.7 KiB
Python
import arcade_github
|
|
from arcade_github.tools.models import DiffSide, ReviewCommentSubjectType # Add these imports
|
|
from arcade_github.tools.pull_requests import (
|
|
create_reply_for_review_comment,
|
|
create_review_comment, # Add this import
|
|
get_pull_request,
|
|
list_pull_request_commits,
|
|
list_pull_requests,
|
|
list_review_comments_on_pull_request,
|
|
update_pull_request,
|
|
)
|
|
|
|
from arcade.core.catalog import ToolCatalog
|
|
from arcade.sdk.eval import (
|
|
BinaryCritic,
|
|
EvalRubric,
|
|
EvalSuite,
|
|
SimilarityCritic,
|
|
tool_eval,
|
|
)
|
|
|
|
# Evaluation rubric
|
|
rubric = EvalRubric(
|
|
fail_threshold=0.9,
|
|
warn_threshold=0.95,
|
|
)
|
|
|
|
catalog = ToolCatalog()
|
|
# Register the GitHub tools
|
|
catalog.add_module(arcade_github)
|
|
|
|
|
|
@tool_eval()
|
|
def github_pull_requests_eval_suite() -> EvalSuite:
|
|
"""Evaluation suite for GitHub Pull Requests tools."""
|
|
suite = EvalSuite(
|
|
name="GitHub Pull Requests Tools Evaluation Suite",
|
|
system_message="You are an AI assistant that helps users interact with GitHub pull requests using the provided tools.",
|
|
catalog=catalog,
|
|
rubric=rubric,
|
|
)
|
|
|
|
# List Pull Requests
|
|
suite.add_case(
|
|
name="List all open pull requests",
|
|
user_message="List all open pull requests in the test repository under the ArcadeAI account that are proposing to merge into main.",
|
|
expected_tool_calls=[
|
|
(
|
|
list_pull_requests,
|
|
{
|
|
"owner": "ArcadeAI",
|
|
"repo": "test",
|
|
"state": "open",
|
|
"base": "main",
|
|
},
|
|
)
|
|
],
|
|
critics=[
|
|
BinaryCritic(critic_field="owner", weight=0.2),
|
|
BinaryCritic(critic_field="repo", weight=0.2),
|
|
BinaryCritic(critic_field="state", weight=0.2),
|
|
BinaryCritic(critic_field="base", weight=0.1),
|
|
],
|
|
)
|
|
|
|
# Get Pull Request
|
|
suite.add_case(
|
|
name="Get details of a pull request",
|
|
user_message="Get diff of pull request #72 in the 'ArcadeAI/test' repository. Include all the data that is available in your response.",
|
|
expected_tool_calls=[
|
|
(
|
|
get_pull_request,
|
|
{
|
|
"owner": "ArcadeAI",
|
|
"repo": "test",
|
|
"pull_number": 72,
|
|
"include_diff_content": True,
|
|
"include_extra_data": True,
|
|
},
|
|
)
|
|
],
|
|
critics=[
|
|
BinaryCritic(critic_field="owner", weight=0.2),
|
|
BinaryCritic(critic_field="repo", weight=0.2),
|
|
BinaryCritic(critic_field="pull_number", weight=0.3),
|
|
BinaryCritic(critic_field="include_extra_data", weight=0.1),
|
|
BinaryCritic(critic_field="include_diff_content", weight=0.2),
|
|
],
|
|
)
|
|
|
|
# Update Pull Request
|
|
suite.add_case(
|
|
name="Update a pull request",
|
|
user_message="Update the title of pull request #72 in the 'ArcadeAI/test' repository to 'Updated Title'.",
|
|
expected_tool_calls=[
|
|
(
|
|
update_pull_request,
|
|
{
|
|
"owner": "ArcadeAI",
|
|
"repo": "test",
|
|
"pull_number": 72,
|
|
"title": "Updated Title",
|
|
},
|
|
)
|
|
],
|
|
critics=[
|
|
BinaryCritic(critic_field="owner", weight=0.2),
|
|
BinaryCritic(critic_field="repo", weight=0.2),
|
|
BinaryCritic(critic_field="pull_number", weight=0.3),
|
|
BinaryCritic(critic_field="title", weight=0.3),
|
|
],
|
|
)
|
|
|
|
# List Pull Request Commits
|
|
suite.add_case(
|
|
name="List commits on a pull request",
|
|
user_message="List all commits for PR 72 in the test repository under ArcadeAI.",
|
|
expected_tool_calls=[
|
|
(
|
|
list_pull_request_commits,
|
|
{
|
|
"owner": "ArcadeAI",
|
|
"repo": "test",
|
|
"pull_number": 72,
|
|
},
|
|
)
|
|
],
|
|
critics=[
|
|
BinaryCritic(critic_field="owner", weight=0.2),
|
|
BinaryCritic(critic_field="repo", weight=0.2),
|
|
BinaryCritic(critic_field="pull_number", weight=0.3),
|
|
],
|
|
)
|
|
|
|
# Create Reply for Review Comment
|
|
suite.add_case(
|
|
name="Create a reply to a review comment",
|
|
user_message="Create a reply to the review comment 1778019974 in 'ArcadeAI/test' for pr 72 saying 'Thanks for the suggestion.'",
|
|
expected_tool_calls=[
|
|
(
|
|
create_reply_for_review_comment,
|
|
{
|
|
"owner": "ArcadeAI",
|
|
"repo": "test",
|
|
"pull_number": 72,
|
|
"comment_id": 1778019974,
|
|
"body": "Thanks for the suggestion.",
|
|
},
|
|
)
|
|
],
|
|
critics=[
|
|
BinaryCritic(critic_field="owner", weight=0.2),
|
|
BinaryCritic(critic_field="repo", weight=0.2),
|
|
BinaryCritic(critic_field="pull_number", weight=0.2),
|
|
BinaryCritic(critic_field="comment_id", weight=0.2),
|
|
SimilarityCritic(critic_field="body", weight=0.2),
|
|
],
|
|
)
|
|
|
|
# List Review Comments on Pull Request
|
|
suite.add_case(
|
|
name="List all review comments on a pull request",
|
|
user_message="List review comments for pr 72 in the ArcadeAI/test repo. Sort by updated time in ascending order.",
|
|
expected_tool_calls=[
|
|
(
|
|
list_review_comments_on_pull_request,
|
|
{
|
|
"owner": "ArcadeAI",
|
|
"repo": "test",
|
|
"pull_number": 72,
|
|
"sort": "updated",
|
|
"direction": "asc",
|
|
},
|
|
)
|
|
],
|
|
critics=[
|
|
BinaryCritic(critic_field="owner", weight=0.2),
|
|
BinaryCritic(critic_field="repo", weight=0.2),
|
|
BinaryCritic(critic_field="pull_number", weight=0.2),
|
|
BinaryCritic(critic_field="sort", weight=0.2),
|
|
BinaryCritic(critic_field="direction", weight=0.2),
|
|
],
|
|
)
|
|
|
|
# Create Review Comment
|
|
suite.add_case(
|
|
name="Create a review comment on a pull request file",
|
|
user_message="Create a review comment on pr 72 in the 'ArcadeAI/test' repo. The comment should be on the file 'README.md' and says 'nit: you misspelled the word 'intelligence'",
|
|
expected_tool_calls=[
|
|
(
|
|
create_review_comment,
|
|
{
|
|
"owner": "ArcadeAI",
|
|
"repo": "test",
|
|
"pull_number": 72,
|
|
"body": "nit: you misspelled the word 'intelligence'",
|
|
"path": "README.md",
|
|
"subject_type": ReviewCommentSubjectType.FILE,
|
|
},
|
|
)
|
|
],
|
|
critics=[
|
|
BinaryCritic(critic_field="owner", weight=0.15),
|
|
BinaryCritic(critic_field="repo", weight=0.15),
|
|
BinaryCritic(critic_field="pull_number", weight=0.2),
|
|
SimilarityCritic(critic_field="body", weight=0.1),
|
|
BinaryCritic(critic_field="path", weight=0.2),
|
|
BinaryCritic(critic_field="subject_type", weight=0.2),
|
|
],
|
|
)
|
|
|
|
# Create Review Comment with Line Numbers
|
|
suite.add_case(
|
|
name="Create a review comment on specific lines of a pull request",
|
|
user_message="Create a review comment on pull request #72 in the 'ArcadeAI/test' repository. The comment should be on the file 'src/main.py', lines 10-15, and say 'Move these to constants.py.'",
|
|
expected_tool_calls=[
|
|
(
|
|
create_review_comment,
|
|
{
|
|
"owner": "ArcadeAI",
|
|
"repo": "test",
|
|
"pull_number": 72,
|
|
"body": "Move these to constants.py.",
|
|
"path": "src/main.py",
|
|
"start_line": 10,
|
|
"end_line": 15,
|
|
"side": DiffSide.RIGHT,
|
|
"subject_type": ReviewCommentSubjectType.LINE,
|
|
},
|
|
)
|
|
],
|
|
critics=[
|
|
BinaryCritic(critic_field="owner", weight=0.1),
|
|
BinaryCritic(critic_field="repo", weight=0.1),
|
|
BinaryCritic(critic_field="pull_number", weight=0.15),
|
|
SimilarityCritic(critic_field="body", weight=0.15),
|
|
BinaryCritic(critic_field="path", weight=0.1),
|
|
BinaryCritic(critic_field="start_line", weight=0.1),
|
|
BinaryCritic(critic_field="end_line", weight=0.1),
|
|
BinaryCritic(critic_field="side", weight=0.1),
|
|
BinaryCritic(critic_field="subject_type", weight=0.1),
|
|
],
|
|
)
|
|
|
|
return suite
|