arcade-mcp/toolkits/google/arcade_google/models.py
Renato Byrro ac0f5aa10c
Search Google Drive documents and retrieve contents (#265)
This tool will be useful in scenarios akin to RAG, where someone wants
to ask questions or request the production of a summary, for instance,
about a bunch of documents related to a particular topic. Currently, to
fulfill such requests, the LLM needs to first `list_documents`, then
`get_document_by_id` for each document.

We also implement a utility functions to return documents in Markdown
and HTML, since the Drive API JSON is verbose and would waste too many
tokens unnecessarily.

Limitations: the Markdown/HTML utilities do not handle table of contents
(which I think aren't really useful here), headers, footers, or
footnotes.

---
This PR deprecates `list_documents` and implements `search_documents`,
apart from `search_and_retrieve_documents`). This configuration makes it
easier for LLMs to understand when to call each tool.

Both tools had their interfaces refactored to remove Google API-specific
arguments that were confusing LLMs sometimes, such as "corpora" and
"support_all_drives". It now accepts arguments that better relate to
expected user requests.

---------

Co-authored-by: Eric Gustin <eric@arcade.dev>
2025-03-07 18:42:12 -03:00

363 lines
10 KiB
Python

from datetime import date, datetime, time, timedelta
from enum import Enum
from zoneinfo import ZoneInfo
# ---------------------------------------------------------------------------- #
# Google Calendar Models and Enums
# ---------------------------------------------------------------------------- #
class DateRange(Enum):
TODAY = "today"
TOMORROW = "tomorrow"
THIS_WEEK = "this_week"
NEXT_WEEK = "next_week"
THIS_MONTH = "this_month"
NEXT_MONTH = "next_month"
def to_date_range(self) -> tuple[date, date]:
today = datetime.now().date()
if self == DateRange.TODAY:
return today, today + timedelta(days=1)
elif self == DateRange.TOMORROW:
return today + timedelta(days=1), today + timedelta(days=2)
elif self == DateRange.THIS_WEEK:
start = today - timedelta(days=today.weekday())
return start, start + timedelta(days=7)
elif self == DateRange.NEXT_WEEK:
start = today + timedelta(days=7 - today.weekday())
return start, start + timedelta(days=7)
elif self == DateRange.THIS_MONTH:
start = today.replace(day=1)
next_month = start + timedelta(days=32)
end = next_month.replace(day=1)
return start, end
elif self == DateRange.NEXT_MONTH:
start = (today.replace(day=1) + timedelta(days=32)).replace(day=1)
next_month = start + timedelta(days=32)
end = next_month.replace(day=1)
return start, end
def to_datetime_range(self, time_zone_name: str | None = None) -> tuple[datetime, datetime]:
start_date, end_date = self.to_date_range()
# time_zone = ZoneInfo(time_zone_name)
start_datetime = datetime.combine(
start_date, datetime.min.time()
) # .replace(tzinfo=time_zone)
end_datetime = datetime.combine(end_date, datetime.min.time()) # .replace(tzinfo=time_zone)
return start_datetime, end_datetime
class Day(Enum):
# TODO: THere are obvious limitations here. We should do better and support any date.
YESTERDAY = "yesterday"
TODAY = "today"
TOMORROW = "tomorrow"
THIS_SUNDAY = "this_sunday"
THIS_MONDAY = "this_monday"
THIS_TUESDAY = "this_tuesday"
THIS_WEDNESDAY = "this_wednesday"
THIS_THURSDAY = "this_thursday"
THIS_FRIDAY = "this_friday"
THIS_SATURDAY = "this_saturday"
NEXT_SUNDAY = "next_sunday"
NEXT_MONDAY = "next_monday"
NEXT_TUESDAY = "next_tuesday"
NEXT_WEDNESDAY = "next_wednesday"
NEXT_THURSDAY = "next_thursday"
NEXT_FRIDAY = "next_friday"
NEXT_SATURDAY = "next_saturday"
def to_date(self, time_zone_name: str) -> date:
time_zone = ZoneInfo(time_zone_name)
today = datetime.now(time_zone).date()
weekday = today.weekday()
if self == Day.YESTERDAY:
return today - timedelta(days=1)
elif self == Day.TODAY:
return today
elif self == Day.TOMORROW:
return today + timedelta(days=1)
day_offsets = {
Day.THIS_SUNDAY: 6,
Day.THIS_MONDAY: 0,
Day.THIS_TUESDAY: 1,
Day.THIS_WEDNESDAY: 2,
Day.THIS_THURSDAY: 3,
Day.THIS_FRIDAY: 4,
Day.THIS_SATURDAY: 5,
}
if self in day_offsets:
return today + timedelta(days=(day_offsets[self] - weekday) % 7)
next_week_offsets = {
Day.NEXT_SUNDAY: 6,
Day.NEXT_MONDAY: 0,
Day.NEXT_TUESDAY: 1,
Day.NEXT_WEDNESDAY: 2,
Day.NEXT_THURSDAY: 3,
Day.NEXT_FRIDAY: 4,
Day.NEXT_SATURDAY: 5,
}
if self in next_week_offsets:
return today + timedelta(days=(next_week_offsets[self] - weekday + 7) % 7)
raise ValueError(f"Invalid Day enum value: {self}")
class TimeSlot(Enum):
_0000 = "00:00"
_0015 = "00:15"
_0030 = "00:30"
_0045 = "00:45"
_0100 = "01:00"
_0115 = "01:15"
_0130 = "01:30"
_0145 = "01:45"
_0200 = "02:00"
_0215 = "02:15"
_0230 = "02:30"
_0245 = "02:45"
_0300 = "03:00"
_0315 = "03:15"
_0330 = "03:30"
_0345 = "03:45"
_0400 = "04:00"
_0415 = "04:15"
_0430 = "04:30"
_0445 = "04:45"
_0500 = "05:00"
_0515 = "05:15"
_0530 = "05:30"
_0545 = "05:45"
_0600 = "06:00"
_0615 = "06:15"
_0630 = "06:30"
_0645 = "06:45"
_0700 = "07:00"
_0715 = "07:15"
_0730 = "07:30"
_0745 = "07:45"
_0800 = "08:00"
_0815 = "08:15"
_0830 = "08:30"
_0845 = "08:45"
_0900 = "09:00"
_0915 = "09:15"
_0930 = "09:30"
_0945 = "09:45"
_1000 = "10:00"
_1015 = "10:15"
_1030 = "10:30"
_1045 = "10:45"
_1100 = "11:00"
_1115 = "11:15"
_1130 = "11:30"
_1145 = "11:45"
_1200 = "12:00"
_1215 = "12:15"
_1230 = "12:30"
_1245 = "12:45"
_1300 = "13:00"
_1315 = "13:15"
_1330 = "13:30"
_1345 = "13:45"
_1400 = "14:00"
_1415 = "14:15"
_1430 = "14:30"
_1445 = "14:45"
_1500 = "15:00"
_1515 = "15:15"
_1530 = "15:30"
_1545 = "15:45"
_1600 = "16:00"
_1615 = "16:15"
_1630 = "16:30"
_1645 = "16:45"
_1700 = "17:00"
_1715 = "17:15"
_1730 = "17:30"
_1745 = "17:45"
_1800 = "18:00"
_1815 = "18:15"
_1830 = "18:30"
_1845 = "18:45"
_1900 = "19:00"
_1915 = "19:15"
_1930 = "19:30"
_1945 = "19:45"
_2000 = "20:00"
_2015 = "20:15"
_2030 = "20:30"
_2045 = "20:45"
_2100 = "21:00"
_2115 = "21:15"
_2130 = "21:30"
_2145 = "21:45"
_2200 = "22:00"
_2215 = "22:15"
_2230 = "22:30"
_2245 = "22:45"
_2300 = "23:00"
_2315 = "23:15"
_2330 = "23:30"
_2345 = "23:45"
def to_time(self) -> time:
return datetime.strptime(self.value, "%H:%M").time()
class EventVisibility(Enum):
DEFAULT = "default"
PUBLIC = "public"
PRIVATE = "private"
CONFIDENTIAL = "confidential"
class EventType(Enum):
BIRTHDAY = "birthday" # Special all-day events with an annual recurrence.
DEFAULT = "default" # Regular events
FOCUS_TIME = "focusTime" # Focus time events
FROM_GMAIL = "fromGmail" # Events from Gmail
OUT_OF_OFFICE = "outOfOffice" # Out of office events
WORKING_LOCATION = "workingLocation" # Working location events
class SendUpdatesOptions(Enum):
NONE = "none" # No notifications are sent
ALL = "all" # Notifications are sent to all guests
EXTERNAL_ONLY = "externalOnly" # Notifications are sent to non-Google Calendar guests only.
# ---------------------------------------------------------------------------- #
# Google Drive Models and Enums
# ---------------------------------------------------------------------------- #
class Corpora(str, Enum):
"""
Bodies of items (files/documents) to which the query applies.
Prefer 'user' or 'drive' to 'allDrives' for efficiency.
By default, corpora is set to 'user'.
"""
USER = "user"
DOMAIN = "domain"
DRIVE = "drive"
ALL_DRIVES = "allDrives"
class OrderBy(str, Enum):
"""
Sort keys for ordering files in Google Drive.
Each key has both ascending and descending options.
"""
CREATED_TIME = (
# When the file was created (ascending)
"createdTime"
)
CREATED_TIME_DESC = (
# When the file was created (descending)
"createdTime desc"
)
FOLDER = (
# The folder ID, sorted using alphabetical ordering (ascending)
"folder"
)
FOLDER_DESC = (
# The folder ID, sorted using alphabetical ordering (descending)
"folder desc"
)
MODIFIED_BY_ME_TIME = (
# The last time the file was modified by the user (ascending)
"modifiedByMeTime"
)
MODIFIED_BY_ME_TIME_DESC = (
# The last time the file was modified by the user (descending)
"modifiedByMeTime desc"
)
MODIFIED_TIME = (
# The last time the file was modified by anyone (ascending)
"modifiedTime"
)
MODIFIED_TIME_DESC = (
# The last time the file was modified by anyone (descending)
"modifiedTime desc"
)
NAME = (
# The name of the file, sorted using alphabetical ordering (e.g., 1, 12, 2, 22) (ascending)
"name"
)
NAME_DESC = (
# The name of the file, sorted using alphabetical ordering (e.g., 1, 12, 2, 22) (descending)
"name desc"
)
NAME_NATURAL = (
# The name of the file, sorted using natural sort ordering (e.g., 1, 2, 12, 22) (ascending)
"name_natural"
)
NAME_NATURAL_DESC = (
# The name of the file, sorted using natural sort ordering (e.g., 1, 2, 12, 22) (descending)
"name_natural desc"
)
QUOTA_BYTES_USED = (
# The number of storage quota bytes used by the file (ascending)
"quotaBytesUsed"
)
QUOTA_BYTES_USED_DESC = (
# The number of storage quota bytes used by the file (descending)
"quotaBytesUsed desc"
)
RECENCY = (
# The most recent timestamp from the file's date-time fields (ascending)
"recency"
)
RECENCY_DESC = (
# The most recent timestamp from the file's date-time fields (descending)
"recency desc"
)
SHARED_WITH_ME_TIME = (
# When the file was shared with the user, if applicable (ascending)
"sharedWithMeTime"
)
SHARED_WITH_ME_TIME_DESC = (
# When the file was shared with the user, if applicable (descending)
"sharedWithMeTime desc"
)
STARRED = (
# Whether the user has starred the file (ascending)
"starred"
)
STARRED_DESC = (
# Whether the user has starred the file (descending)
"starred desc"
)
VIEWED_BY_ME_TIME = (
# The last time the file was viewed by the user (ascending)
"viewedByMeTime"
)
VIEWED_BY_ME_TIME_DESC = (
# The last time the file was viewed by the user (descending)
"viewedByMeTime desc"
)
class DocumentFormat(str, Enum):
MARKDOWN = "markdown"
HTML = "html"
GOOGLE_API_JSON = "google_api_json"
# ---------------------------------------------------------------------------- #
# Google Gmail Models and Enums
# ---------------------------------------------------------------------------- #
class GmailReplyToWhom(str, Enum):
EVERY_RECIPIENT = "every_recipient"
ONLY_THE_SENDER = "only_the_sender"
class GmailAction(str, Enum):
SEND = "send"
DRAFT = "draft"