# PR Description ## Split toolkits This PR splits the `Microsoft`, `Google`, and `Search` toolkits into multiple toolkits each. * `Microsoft` --> `OutlookCalendar`, `OutlookMail`. * `Google` -----> `GoogleCalendar`, `GoogleContacts`, `GoogleDocs`, `GoogleDrive`, `Gmail`, `GoogleSheets` * `Search` -----> `GoogleFinance`, `GoogleFlights`, `GoogleHotels`, `GoogleJobs`, `GoogleMaps`, `GoogleNews`, `GoogleSearch`, `GoogleShopping`, `Walmart`, `Youtube` > The original monolithic toolkits (`Microsoft`, `Google`, `Search`) are not removed in this PR. The plan is to keep those toolkits around while we > 1. Stop documenting the toolkits, > 2. Stop displaying the toolkits in the dashboard, and > 3. Help customers migrate over to the new split toolkits. ## Rename toolkits This PR renames the following toolkits * `Web` ------------> `Firecrawl` * `CodeSandbox` ---> `E2B` > The `Web` and `CodeSandbox` toolkits are not removed in this PR. The plan is to keep them around while we > 1. Stop documenting the toolkits, > 2. Stop displaying the toolkits in the dashboard, and > 3. Help customers migrate over to the new renamed toolkits. ## Rename tools Since toolkit names were changed, this called for some tools to be renamed as well. * `GoogleSearch.SearchGoogle` ----------------> `GoogleSearch.Search` * `GoogleShopping.SearchShoppingProducts` ---> `GoogleShopping.SearchProducts` * `Walmart.SearchWalmartProducts` ------------> `Walmart.SearchProducts` * `Walmart.GetWalmartProductDetails` ---------> `Walmart.GetProductDetails` * `Youtube.SearchYoutubeVideos` --------------> `Youtube.SearchForVideos` ## Google File Picker Improvements to the Google File Picker experience were also added in this PR. The following tools will ALWAYS provide llm_instructions in their response to "let the end-user know that they have the option to select more files via the file picker url if they want to": * `GoogleDocs.SearchDocuments` * `GoogleDocs.SearchAndRetrieveDocuments` * `GoogleDrive.GetFileTreeStructure` The following tools will only provide the file picker URL if a 404 or 403 from the Google API: * `GoogleDocs.InsertTextAtEndOfDocument` * `GoogleDocs.GetDocumentById` * `GoogleSheets.GetSpreadsheet` * `GoogleSheets.WriteToCell` Also, a standalone `GoogleDrive.GenerateGoogleFilePickerUrl` tool exists. ## Other * The `SearchDocuments` and `SearchAndRetrieveDocuments` tools used to be organized within the Drive portion of the Google toolkit, but I moved these into the new GoogleDocs toolkit because they are specific to Docs. # Progress - [x] `OutlookCalendar` - [x] `OutlookMail` - [x] `GoogleFinance` - [x] `GoogleFlights` - [x] `GoogleHotels` - [x] `GoogleJobs` - [x] `GoogleMaps` - [x] `GoogleNews` - [x] `GoogleSearch` - [x] `GoogleShopping` - [x] `Walmart` - [x] `Youtube` - [x] `GoogleCalendar` - [x] `GoogleContacts` - [x] `GoogleDocs` - [x] `GoogleDrive` - [x] `Gmail` - [x] `GoogleSheets` - [x] `Firecrawl` - [x] `E2B` - [x] File picker # Discussion * Repeated code is a consequence of splitting toolkits that use the same provider. I am open to any ideas that would allow multiple toolkits to reference common code. Comment your ideas in this PR.
119 lines
3.6 KiB
Python
119 lines
3.6 KiB
Python
import logging
|
|
from typing import Any
|
|
|
|
from google.oauth2.credentials import Credentials
|
|
from googleapiclient.discovery import Resource, build
|
|
|
|
from arcade_google_docs.enum import Corpora, OrderBy
|
|
|
|
## Set up basic configuration for logging to the console with DEBUG level and a specific format.
|
|
logging.basicConfig(
|
|
level=logging.DEBUG,
|
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def build_docs_service(auth_token: str | None) -> Resource: # type: ignore[no-any-unimported]
|
|
"""
|
|
Build a Drive service object.
|
|
"""
|
|
auth_token = auth_token or ""
|
|
return build("docs", "v1", credentials=Credentials(auth_token))
|
|
|
|
|
|
def build_drive_service(auth_token: str | None) -> Resource: # type: ignore[no-any-unimported]
|
|
"""
|
|
Build a Drive service object.
|
|
"""
|
|
auth_token = auth_token or ""
|
|
return build("drive", "v3", credentials=Credentials(auth_token))
|
|
|
|
|
|
def build_files_list_params(
|
|
mime_type: str,
|
|
page_size: int,
|
|
order_by: list[OrderBy],
|
|
pagination_token: str | None,
|
|
include_shared_drives: bool,
|
|
search_only_in_shared_drive_id: str | None,
|
|
include_organization_domain_documents: bool,
|
|
document_contains: list[str] | None = None,
|
|
document_not_contains: list[str] | None = None,
|
|
) -> dict[str, Any]:
|
|
query = build_files_list_query(
|
|
mime_type=mime_type,
|
|
document_contains=document_contains,
|
|
document_not_contains=document_not_contains,
|
|
)
|
|
|
|
params = {
|
|
"q": query,
|
|
"pageSize": page_size,
|
|
"orderBy": ",".join([item.value for item in order_by]),
|
|
"pageToken": pagination_token,
|
|
}
|
|
|
|
if (
|
|
include_shared_drives
|
|
or search_only_in_shared_drive_id
|
|
or include_organization_domain_documents
|
|
):
|
|
params["includeItemsFromAllDrives"] = "true"
|
|
params["supportsAllDrives"] = "true"
|
|
|
|
if search_only_in_shared_drive_id:
|
|
params["driveId"] = search_only_in_shared_drive_id
|
|
params["corpora"] = Corpora.DRIVE.value
|
|
|
|
if include_organization_domain_documents:
|
|
params["corpora"] = Corpora.DOMAIN.value
|
|
|
|
params = remove_none_values(params)
|
|
|
|
return params
|
|
|
|
|
|
def build_files_list_query(
|
|
mime_type: str,
|
|
document_contains: list[str] | None = None,
|
|
document_not_contains: list[str] | None = None,
|
|
) -> str:
|
|
query = [f"(mimeType = '{mime_type}' and trashed = false)"]
|
|
|
|
if isinstance(document_contains, str):
|
|
document_contains = [document_contains]
|
|
|
|
if isinstance(document_not_contains, str):
|
|
document_not_contains = [document_not_contains]
|
|
|
|
if document_contains:
|
|
for keyword in document_contains:
|
|
name_contains = keyword.replace("'", "\\'")
|
|
full_text_contains = keyword.replace("'", "\\'")
|
|
keyword_query = (
|
|
f"(name contains '{name_contains}' or fullText contains '{full_text_contains}')"
|
|
)
|
|
query.append(keyword_query)
|
|
|
|
if document_not_contains:
|
|
for keyword in document_not_contains:
|
|
name_not_contains = keyword.replace("'", "\\'")
|
|
full_text_not_contains = keyword.replace("'", "\\'")
|
|
keyword_query = (
|
|
f"(name not contains '{name_not_contains}' and "
|
|
f"fullText not contains '{full_text_not_contains}')"
|
|
)
|
|
query.append(keyword_query)
|
|
|
|
return " and ".join(query)
|
|
|
|
|
|
def remove_none_values(params: dict) -> dict:
|
|
"""
|
|
Remove None values from a dictionary.
|
|
:param params: The dictionary to clean
|
|
:return: A new dictionary with None values removed
|
|
"""
|
|
return {k: v for k, v in params.items() if v is not None}
|