arcade-mcp/toolkits/web/tests/test_firecrawl.py
Eric Gustin cc2a08ec34
Add Firecrawl Tools For The New arcade_web` Toolkit (#110)
# PR Description
This PR adds 6 new tools inside the new `arcade_web` toolkit. None of
these tools require auth. They do, however, require the
`FIRECRAWL_API_KEY` API Key to be set.

The new tools implement the [Firecrawl](https://www.firecrawl.dev/) APIs
`/scrape (POST)`, `/crawl (POST)`, `/crawl/{id} (GET)`, `/crawl/{id}
(DELETE)`, and `/map (POST)`.

The six tools are:
* `Web.ScrapeUrl`: 
- In the future I would like this tool to support actions (clicking,
scrolling, screenshotting, etc) and extract (specify what you want to
scrape) parameters. Firecrawl supports both of these parameters.
* `Web.CrawlWebsite`:
- If `async_crawl` is true, then the tool just returns the id of the
crawl job, which you can retrieve later with the `Web.GetCrawlData`
tool. If `async_crawl` is false, then the entire contents of the crawl
are returned.
* `Web.GetCrawlStatus`
- Works for in progress or recently finished crawl jobs (Firecrawl's
limitation)
* `Web.GetCrawlData`
- Works for in progress or recently finished crawl jobs (Firecrawl's
limitation)
* `Web.CancelCrawl`
    - You can cancel an in progress async crawl job
* `Web.MapWebsite`
- This endpoint is in alpha, but it can give you all of the links of an
entire website, or optionally, you can specify in natural language what
type of links you want to map by using the `search` parameter. For
example "only map webpages that are about AI"
2024-10-17 16:10:53 -07:00

97 lines
3 KiB
Python

from unittest.mock import AsyncMock, patch
import pytest
from arcade_web.tools.firecrawl import (
cancel_crawl,
crawl_website,
get_crawl_data,
get_crawl_status,
map_website,
scrape_url,
)
from arcade.sdk.error import ToolExecutionError
@pytest.fixture
def mock_context():
context = AsyncMock()
context.authorization.token = "mock_token" # noqa: S105
return context
@pytest.fixture
def mock_firecrawl_app():
with patch("arcade_web.tools.firecrawl.FirecrawlApp") as app:
yield app.return_value
@pytest.mark.asyncio
async def test_scrape_url_success(mock_firecrawl_app):
mock_firecrawl_app.scrape_url.return_value = {"data": "scraped content"}
result = await scrape_url("http://example.com")
assert result == {"data": "scraped content"}
@pytest.mark.asyncio
async def test_crawl_website_success(mock_firecrawl_app):
mock_firecrawl_app.async_crawl_url.return_value = {"crawl_id": "12345"}
result = await crawl_website("http://example.com")
assert result == {"crawl_id": "12345"}
@pytest.mark.asyncio
async def test_get_crawl_status_success(mock_firecrawl_app):
mock_firecrawl_app.check_crawl_status.return_value = {"status": "completed"}
result = await get_crawl_status("12345")
assert result == {"status": "completed"}
@pytest.mark.asyncio
async def test_get_crawl_data_success(mock_firecrawl_app):
mock_firecrawl_app.check_crawl_status.return_value = {"data": "crawl data"}
result = await get_crawl_data("12345")
assert result == {"data": "crawl data"}
@pytest.mark.asyncio
async def test_cancel_crawl_success(mock_firecrawl_app):
mock_firecrawl_app.cancel_crawl.return_value = {"status": "cancelled"}
result = await cancel_crawl("12345")
assert result == {"status": "cancelled"}
@pytest.mark.asyncio
async def test_map_website_success(mock_firecrawl_app):
mock_firecrawl_app.map_url.return_value = {"map": "website map"}
result = await map_website("http://example.com")
assert result == {"map": "website map"}
@pytest.mark.asyncio
@pytest.mark.parametrize(
"method,params,error_message",
[
(scrape_url, ("http://example.com",), "Error scraping URL"),
(crawl_website, ("http://example.com",), "Error crawling website"),
(get_crawl_status, ("12345",), "Error getting crawl status"),
(get_crawl_data, ("12345",), "Error getting crawl data"),
(cancel_crawl, ("12345",), "Error cancelling crawl"),
(map_website, ("http://example.com",), "Error mapping website"),
],
)
async def test_firecrawl_error(mock_firecrawl_app, method, params, error_message):
mock_firecrawl_app.scrape_url.side_effect = Exception(error_message)
mock_firecrawl_app.async_crawl_url.side_effect = Exception(error_message)
mock_firecrawl_app.check_crawl_status.side_effect = Exception(error_message)
mock_firecrawl_app.cancel_crawl.side_effect = Exception(error_message)
mock_firecrawl_app.map_url.side_effect = Exception(error_message)
with pytest.raises(ToolExecutionError):
await method(*params)