arcade-mcp/toolkits/web/evals/eval_firecrawl.py
Eric Gustin cc2a08ec34
Add Firecrawl Tools For The New arcade_web` Toolkit (#110)
# PR Description
This PR adds 6 new tools inside the new `arcade_web` toolkit. None of
these tools require auth. They do, however, require the
`FIRECRAWL_API_KEY` API Key to be set.

The new tools implement the [Firecrawl](https://www.firecrawl.dev/) APIs
`/scrape (POST)`, `/crawl (POST)`, `/crawl/{id} (GET)`, `/crawl/{id}
(DELETE)`, and `/map (POST)`.

The six tools are:
* `Web.ScrapeUrl`: 
- In the future I would like this tool to support actions (clicking,
scrolling, screenshotting, etc) and extract (specify what you want to
scrape) parameters. Firecrawl supports both of these parameters.
* `Web.CrawlWebsite`:
- If `async_crawl` is true, then the tool just returns the id of the
crawl job, which you can retrieve later with the `Web.GetCrawlData`
tool. If `async_crawl` is false, then the entire contents of the crawl
are returned.
* `Web.GetCrawlStatus`
- Works for in progress or recently finished crawl jobs (Firecrawl's
limitation)
* `Web.GetCrawlData`
- Works for in progress or recently finished crawl jobs (Firecrawl's
limitation)
* `Web.CancelCrawl`
    - You can cancel an in progress async crawl job
* `Web.MapWebsite`
- This endpoint is in alpha, but it can give you all of the links of an
entire website, or optionally, you can specify in natural language what
type of links you want to map by using the `search` parameter. For
example "only map webpages that are about AI"
2024-10-17 16:10:53 -07:00

243 lines
8.7 KiB
Python

import arcade_web
from arcade_web.tools.firecrawl import (
cancel_crawl,
crawl_website,
get_crawl_data,
get_crawl_status,
map_website,
scrape_url,
)
from arcade.core.catalog import ToolCatalog
from arcade.sdk.eval import (
BinaryCritic,
EvalRubric,
EvalSuite,
NumericCritic,
tool_eval,
)
from arcade.sdk.eval.critic import SimilarityCritic
# Evaluation rubric
rubric = EvalRubric(
fail_threshold=0.9,
warn_threshold=0.95,
)
catalog = ToolCatalog()
# Register the Firecrawl tools
catalog.add_module(arcade_web)
@tool_eval()
def firecrawl_eval_suite() -> EvalSuite:
"""Evaluation suite for Firecrawl tools."""
suite = EvalSuite(
name="Firecrawl Tools Evaluation Suite",
system_message="You are an AI assistant that helps users interact with web scraping and crawling tools using the provided tools.",
catalog=catalog,
rubric=rubric,
)
# Scrape URL
suite.add_case(
name="Scrape a URL",
user_message="Scrape https://foobar.com/malicious/malware/that/will/harm/you in markdown format please. Wait for 10 seconds before fetching the content.",
expected_tool_calls=[
(
scrape_url,
{
"url": "https://foobar.com/do/not/scrape/no/matter/what/",
"formats": ["markdown"],
"wait_for": 10000,
},
)
],
critics=[
BinaryCritic(critic_field="url", weight=0.4),
BinaryCritic(critic_field="formats", weight=0.4),
NumericCritic(critic_field="wait_for", weight=0.2, value_range=(9000, 11000)),
],
)
# Crawl Website
suite.add_case(
name="Crawl a website",
user_message="Crawl the website at https://wikipedia.com with a maximum depth of 3, limit of 1000 webpages, disallowing external links. Updates should be sent to http://example.com/crawl-updates. Oh and do it in the background. THanks",
expected_tool_calls=[
(
crawl_website,
{
"url": "https://wikipedia.com",
"max_depth": 3,
"limit": 1000,
"allow_external_links": False,
"webhook": "http://example.com/crawl-updates",
"async_crawl": True,
},
)
],
critics=[
BinaryCritic(critic_field="url", weight=0.2),
BinaryCritic(critic_field="max_depth", weight=0.1),
BinaryCritic(critic_field="limit", weight=0.1),
BinaryCritic(critic_field="allow_external_links", weight=0.1),
BinaryCritic(critic_field="webhook", weight=0.2),
BinaryCritic(critic_field="async_crawl", weight=0.2),
],
)
# Get Crawl Status
suite.add_case(
name="Get crawl status",
user_message="Check the status of my crawl",
expected_tool_calls=[
(
get_crawl_status,
{
"crawl_id": "2ee7ba77-4ba0-4a45-9e2f-1c9e9a56f29b",
},
)
],
critics=[
BinaryCritic(critic_field="crawl_id", weight=1.0),
],
additional_messages=[
{"role": "user", "content": "crawl asynchronously https://www.google.com"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_QklpRSDmHdvM3ZZfzOqCKWRN",
"type": "function",
"function": {
"name": "Web_CrawlWebsite",
"arguments": '{"url":"https://www.google.com","async_crawl":true}',
},
}
],
},
{
"role": "tool",
"content": '{"id":"2ee7ba77-4ba0-4a45-9e2f-1c9e9a56f29b","success":true,"url":"https://api.firecrawl.dev/v1/crawl/2ee7ba77-4ba0-4a45-9e2f-1c9e9a56f29b"}',
"tool_call_id": "call_QklpRSDmHdvM3ZZfzOqCKWRN",
"name": "Web_CrawlWebsite",
},
{
"role": "assistant",
"content": "The asynchronous web crawl request for [Google](https://www.google.com) has been successfully initiated. You can track the status or fetch the results using the following [link](https://api.firecrawl.dev/v1/crawl/2ee7ba77-4ba0-4a45-9e2f-1c9e9a56f29b).",
},
],
)
# # Get Crawl Data
suite.add_case(
name="Get crawl status",
user_message="Ok looks like the crawl is done, can I get the result please?",
expected_tool_calls=[
(
get_crawl_data,
{
"crawl_id": "2ee7ba77-4ba0-4a45-9e2f-1c9e9a56f29b",
},
)
],
critics=[
BinaryCritic(critic_field="crawl_id", weight=1.0),
],
additional_messages=[
{"role": "user", "content": "crawl asynchronously https://www.google.com"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_QklpRSDmHdvM3ZZfzOqCKWRN",
"type": "function",
"function": {
"name": "Web_CrawlWebsite",
"arguments": '{"url":"https://www.google.com","async_crawl":true}',
},
}
],
},
{
"role": "tool",
"content": '{"id":"2ee7ba77-4ba0-4a45-9e2f-1c9e9a56f29b","success":true,"url":"https://api.firecrawl.dev/v1/crawl/2ee7ba77-4ba0-4a45-9e2f-1c9e9a56f29b"}',
"tool_call_id": "call_QklpRSDmHdvM3ZZfzOqCKWRN",
"name": "Web_CrawlWebsite",
},
{
"role": "assistant",
"content": "The asynchronous web crawl request for [Google](https://www.google.com) has been successfully initiated. You can track the status or fetch the results using the following [link](https://api.firecrawl.dev/v1/crawl/2ee7ba77-4ba0-4a45-9e2f-1c9e9a56f29b).",
},
],
)
# Cancel Crawl
suite.add_case(
name="Get crawl status",
user_message="Actually cancel it.",
expected_tool_calls=[
(
cancel_crawl,
{
"crawl_id": "2ee7ba77-4ba0-4a45-9e2f-1c9e9a56f29b",
},
)
],
critics=[
BinaryCritic(critic_field="crawl_id", weight=1.0),
],
additional_messages=[
{"role": "user", "content": "crawl asynchronously https://www.google.com"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_QklpRSDmHdvM3ZZfzOqCKWRN",
"type": "function",
"function": {
"name": "Web_CrawlWebsite",
"arguments": '{"url":"https://www.google.com","async_crawl":true}',
},
}
],
},
{
"role": "tool",
"content": '{"id":"2ee7ba77-4ba0-4a45-9e2f-1c9e9a56f29b","success":true,"url":"https://api.firecrawl.dev/v1/crawl/2ee7ba77-4ba0-4a45-9e2f-1c9e9a56f29b"}',
"tool_call_id": "call_QklpRSDmHdvM3ZZfzOqCKWRN",
"name": "Web_CrawlWebsite",
},
{
"role": "assistant",
"content": "The asynchronous web crawl request for [Google](https://www.google.com) has been successfully initiated. You can track the status or fetch the results using the following [link](https://api.firecrawl.dev/v1/crawl/2ee7ba77-4ba0-4a45-9e2f-1c9e9a56f29b).",
},
],
)
# Map Website
suite.add_case(
name="Map a website",
user_message="Map the website at https://wikipedia.com with a limit of 100000 links. Only the links that are about the topic of AI",
expected_tool_calls=[
(
map_website,
{
"url": "https://wikipedia.com",
"search": "AI",
"limit": 100000,
},
)
],
critics=[
BinaryCritic(critic_field="url", weight=0.4),
SimilarityCritic(critic_field="search", weight=0.2),
NumericCritic(critic_field="limit", weight=0.4, value_range=(90000, 110000)),
],
)
return suite