This tool will be useful in scenarios akin to RAG, where someone wants to ask questions or request the production of a summary, for instance, about a bunch of documents related to a particular topic. Currently, to fulfill such requests, the LLM needs to first `list_documents`, then `get_document_by_id` for each document. We also implement a utility functions to return documents in Markdown and HTML, since the Drive API JSON is verbose and would waste too many tokens unnecessarily. Limitations: the Markdown/HTML utilities do not handle table of contents (which I think aren't really useful here), headers, footers, or footnotes. --- This PR deprecates `list_documents` and implements `search_documents`, apart from `search_and_retrieve_documents`). This configuration makes it easier for LLMs to understand when to call each tool. Both tools had their interfaces refactored to remove Google API-specific arguments that were confusing LLMs sometimes, such as "corpora" and "support_all_drives". It now accepts arguments that better relate to expected user requests. --------- Co-authored-by: Eric Gustin <eric@arcade.dev>
363 lines
10 KiB
Python
363 lines
10 KiB
Python
from datetime import date, datetime, time, timedelta
|
|
from enum import Enum
|
|
from zoneinfo import ZoneInfo
|
|
|
|
|
|
# ---------------------------------------------------------------------------- #
|
|
# Google Calendar Models and Enums
|
|
# ---------------------------------------------------------------------------- #
|
|
class DateRange(Enum):
|
|
TODAY = "today"
|
|
TOMORROW = "tomorrow"
|
|
THIS_WEEK = "this_week"
|
|
NEXT_WEEK = "next_week"
|
|
THIS_MONTH = "this_month"
|
|
NEXT_MONTH = "next_month"
|
|
|
|
def to_date_range(self) -> tuple[date, date]:
|
|
today = datetime.now().date()
|
|
if self == DateRange.TODAY:
|
|
return today, today + timedelta(days=1)
|
|
elif self == DateRange.TOMORROW:
|
|
return today + timedelta(days=1), today + timedelta(days=2)
|
|
elif self == DateRange.THIS_WEEK:
|
|
start = today - timedelta(days=today.weekday())
|
|
return start, start + timedelta(days=7)
|
|
elif self == DateRange.NEXT_WEEK:
|
|
start = today + timedelta(days=7 - today.weekday())
|
|
return start, start + timedelta(days=7)
|
|
elif self == DateRange.THIS_MONTH:
|
|
start = today.replace(day=1)
|
|
next_month = start + timedelta(days=32)
|
|
end = next_month.replace(day=1)
|
|
return start, end
|
|
elif self == DateRange.NEXT_MONTH:
|
|
start = (today.replace(day=1) + timedelta(days=32)).replace(day=1)
|
|
next_month = start + timedelta(days=32)
|
|
end = next_month.replace(day=1)
|
|
return start, end
|
|
|
|
def to_datetime_range(self, time_zone_name: str | None = None) -> tuple[datetime, datetime]:
|
|
start_date, end_date = self.to_date_range()
|
|
# time_zone = ZoneInfo(time_zone_name)
|
|
start_datetime = datetime.combine(
|
|
start_date, datetime.min.time()
|
|
) # .replace(tzinfo=time_zone)
|
|
end_datetime = datetime.combine(end_date, datetime.min.time()) # .replace(tzinfo=time_zone)
|
|
return start_datetime, end_datetime
|
|
|
|
|
|
class Day(Enum):
|
|
# TODO: THere are obvious limitations here. We should do better and support any date.
|
|
YESTERDAY = "yesterday"
|
|
TODAY = "today"
|
|
TOMORROW = "tomorrow"
|
|
THIS_SUNDAY = "this_sunday"
|
|
THIS_MONDAY = "this_monday"
|
|
THIS_TUESDAY = "this_tuesday"
|
|
THIS_WEDNESDAY = "this_wednesday"
|
|
THIS_THURSDAY = "this_thursday"
|
|
THIS_FRIDAY = "this_friday"
|
|
THIS_SATURDAY = "this_saturday"
|
|
NEXT_SUNDAY = "next_sunday"
|
|
NEXT_MONDAY = "next_monday"
|
|
NEXT_TUESDAY = "next_tuesday"
|
|
NEXT_WEDNESDAY = "next_wednesday"
|
|
NEXT_THURSDAY = "next_thursday"
|
|
NEXT_FRIDAY = "next_friday"
|
|
NEXT_SATURDAY = "next_saturday"
|
|
|
|
def to_date(self, time_zone_name: str) -> date:
|
|
time_zone = ZoneInfo(time_zone_name)
|
|
today = datetime.now(time_zone).date()
|
|
weekday = today.weekday()
|
|
|
|
if self == Day.YESTERDAY:
|
|
return today - timedelta(days=1)
|
|
elif self == Day.TODAY:
|
|
return today
|
|
elif self == Day.TOMORROW:
|
|
return today + timedelta(days=1)
|
|
|
|
day_offsets = {
|
|
Day.THIS_SUNDAY: 6,
|
|
Day.THIS_MONDAY: 0,
|
|
Day.THIS_TUESDAY: 1,
|
|
Day.THIS_WEDNESDAY: 2,
|
|
Day.THIS_THURSDAY: 3,
|
|
Day.THIS_FRIDAY: 4,
|
|
Day.THIS_SATURDAY: 5,
|
|
}
|
|
|
|
if self in day_offsets:
|
|
return today + timedelta(days=(day_offsets[self] - weekday) % 7)
|
|
|
|
next_week_offsets = {
|
|
Day.NEXT_SUNDAY: 6,
|
|
Day.NEXT_MONDAY: 0,
|
|
Day.NEXT_TUESDAY: 1,
|
|
Day.NEXT_WEDNESDAY: 2,
|
|
Day.NEXT_THURSDAY: 3,
|
|
Day.NEXT_FRIDAY: 4,
|
|
Day.NEXT_SATURDAY: 5,
|
|
}
|
|
|
|
if self in next_week_offsets:
|
|
return today + timedelta(days=(next_week_offsets[self] - weekday + 7) % 7)
|
|
|
|
raise ValueError(f"Invalid Day enum value: {self}")
|
|
|
|
|
|
class TimeSlot(Enum):
|
|
_0000 = "00:00"
|
|
_0015 = "00:15"
|
|
_0030 = "00:30"
|
|
_0045 = "00:45"
|
|
_0100 = "01:00"
|
|
_0115 = "01:15"
|
|
_0130 = "01:30"
|
|
_0145 = "01:45"
|
|
_0200 = "02:00"
|
|
_0215 = "02:15"
|
|
_0230 = "02:30"
|
|
_0245 = "02:45"
|
|
_0300 = "03:00"
|
|
_0315 = "03:15"
|
|
_0330 = "03:30"
|
|
_0345 = "03:45"
|
|
_0400 = "04:00"
|
|
_0415 = "04:15"
|
|
_0430 = "04:30"
|
|
_0445 = "04:45"
|
|
_0500 = "05:00"
|
|
_0515 = "05:15"
|
|
_0530 = "05:30"
|
|
_0545 = "05:45"
|
|
_0600 = "06:00"
|
|
_0615 = "06:15"
|
|
_0630 = "06:30"
|
|
_0645 = "06:45"
|
|
_0700 = "07:00"
|
|
_0715 = "07:15"
|
|
_0730 = "07:30"
|
|
_0745 = "07:45"
|
|
_0800 = "08:00"
|
|
_0815 = "08:15"
|
|
_0830 = "08:30"
|
|
_0845 = "08:45"
|
|
_0900 = "09:00"
|
|
_0915 = "09:15"
|
|
_0930 = "09:30"
|
|
_0945 = "09:45"
|
|
_1000 = "10:00"
|
|
_1015 = "10:15"
|
|
_1030 = "10:30"
|
|
_1045 = "10:45"
|
|
_1100 = "11:00"
|
|
_1115 = "11:15"
|
|
_1130 = "11:30"
|
|
_1145 = "11:45"
|
|
_1200 = "12:00"
|
|
_1215 = "12:15"
|
|
_1230 = "12:30"
|
|
_1245 = "12:45"
|
|
_1300 = "13:00"
|
|
_1315 = "13:15"
|
|
_1330 = "13:30"
|
|
_1345 = "13:45"
|
|
_1400 = "14:00"
|
|
_1415 = "14:15"
|
|
_1430 = "14:30"
|
|
_1445 = "14:45"
|
|
_1500 = "15:00"
|
|
_1515 = "15:15"
|
|
_1530 = "15:30"
|
|
_1545 = "15:45"
|
|
_1600 = "16:00"
|
|
_1615 = "16:15"
|
|
_1630 = "16:30"
|
|
_1645 = "16:45"
|
|
_1700 = "17:00"
|
|
_1715 = "17:15"
|
|
_1730 = "17:30"
|
|
_1745 = "17:45"
|
|
_1800 = "18:00"
|
|
_1815 = "18:15"
|
|
_1830 = "18:30"
|
|
_1845 = "18:45"
|
|
_1900 = "19:00"
|
|
_1915 = "19:15"
|
|
_1930 = "19:30"
|
|
_1945 = "19:45"
|
|
_2000 = "20:00"
|
|
_2015 = "20:15"
|
|
_2030 = "20:30"
|
|
_2045 = "20:45"
|
|
_2100 = "21:00"
|
|
_2115 = "21:15"
|
|
_2130 = "21:30"
|
|
_2145 = "21:45"
|
|
_2200 = "22:00"
|
|
_2215 = "22:15"
|
|
_2230 = "22:30"
|
|
_2245 = "22:45"
|
|
_2300 = "23:00"
|
|
_2315 = "23:15"
|
|
_2330 = "23:30"
|
|
_2345 = "23:45"
|
|
|
|
def to_time(self) -> time:
|
|
return datetime.strptime(self.value, "%H:%M").time()
|
|
|
|
|
|
class EventVisibility(Enum):
|
|
DEFAULT = "default"
|
|
PUBLIC = "public"
|
|
PRIVATE = "private"
|
|
CONFIDENTIAL = "confidential"
|
|
|
|
|
|
class EventType(Enum):
|
|
BIRTHDAY = "birthday" # Special all-day events with an annual recurrence.
|
|
DEFAULT = "default" # Regular events
|
|
FOCUS_TIME = "focusTime" # Focus time events
|
|
FROM_GMAIL = "fromGmail" # Events from Gmail
|
|
OUT_OF_OFFICE = "outOfOffice" # Out of office events
|
|
WORKING_LOCATION = "workingLocation" # Working location events
|
|
|
|
|
|
class SendUpdatesOptions(Enum):
|
|
NONE = "none" # No notifications are sent
|
|
ALL = "all" # Notifications are sent to all guests
|
|
EXTERNAL_ONLY = "externalOnly" # Notifications are sent to non-Google Calendar guests only.
|
|
|
|
|
|
# ---------------------------------------------------------------------------- #
|
|
# Google Drive Models and Enums
|
|
# ---------------------------------------------------------------------------- #
|
|
class Corpora(str, Enum):
|
|
"""
|
|
Bodies of items (files/documents) to which the query applies.
|
|
Prefer 'user' or 'drive' to 'allDrives' for efficiency.
|
|
By default, corpora is set to 'user'.
|
|
"""
|
|
|
|
USER = "user"
|
|
DOMAIN = "domain"
|
|
DRIVE = "drive"
|
|
ALL_DRIVES = "allDrives"
|
|
|
|
|
|
class OrderBy(str, Enum):
|
|
"""
|
|
Sort keys for ordering files in Google Drive.
|
|
Each key has both ascending and descending options.
|
|
"""
|
|
|
|
CREATED_TIME = (
|
|
# When the file was created (ascending)
|
|
"createdTime"
|
|
)
|
|
CREATED_TIME_DESC = (
|
|
# When the file was created (descending)
|
|
"createdTime desc"
|
|
)
|
|
FOLDER = (
|
|
# The folder ID, sorted using alphabetical ordering (ascending)
|
|
"folder"
|
|
)
|
|
FOLDER_DESC = (
|
|
# The folder ID, sorted using alphabetical ordering (descending)
|
|
"folder desc"
|
|
)
|
|
MODIFIED_BY_ME_TIME = (
|
|
# The last time the file was modified by the user (ascending)
|
|
"modifiedByMeTime"
|
|
)
|
|
MODIFIED_BY_ME_TIME_DESC = (
|
|
# The last time the file was modified by the user (descending)
|
|
"modifiedByMeTime desc"
|
|
)
|
|
MODIFIED_TIME = (
|
|
# The last time the file was modified by anyone (ascending)
|
|
"modifiedTime"
|
|
)
|
|
MODIFIED_TIME_DESC = (
|
|
# The last time the file was modified by anyone (descending)
|
|
"modifiedTime desc"
|
|
)
|
|
NAME = (
|
|
# The name of the file, sorted using alphabetical ordering (e.g., 1, 12, 2, 22) (ascending)
|
|
"name"
|
|
)
|
|
NAME_DESC = (
|
|
# The name of the file, sorted using alphabetical ordering (e.g., 1, 12, 2, 22) (descending)
|
|
"name desc"
|
|
)
|
|
NAME_NATURAL = (
|
|
# The name of the file, sorted using natural sort ordering (e.g., 1, 2, 12, 22) (ascending)
|
|
"name_natural"
|
|
)
|
|
NAME_NATURAL_DESC = (
|
|
# The name of the file, sorted using natural sort ordering (e.g., 1, 2, 12, 22) (descending)
|
|
"name_natural desc"
|
|
)
|
|
QUOTA_BYTES_USED = (
|
|
# The number of storage quota bytes used by the file (ascending)
|
|
"quotaBytesUsed"
|
|
)
|
|
QUOTA_BYTES_USED_DESC = (
|
|
# The number of storage quota bytes used by the file (descending)
|
|
"quotaBytesUsed desc"
|
|
)
|
|
RECENCY = (
|
|
# The most recent timestamp from the file's date-time fields (ascending)
|
|
"recency"
|
|
)
|
|
RECENCY_DESC = (
|
|
# The most recent timestamp from the file's date-time fields (descending)
|
|
"recency desc"
|
|
)
|
|
SHARED_WITH_ME_TIME = (
|
|
# When the file was shared with the user, if applicable (ascending)
|
|
"sharedWithMeTime"
|
|
)
|
|
SHARED_WITH_ME_TIME_DESC = (
|
|
# When the file was shared with the user, if applicable (descending)
|
|
"sharedWithMeTime desc"
|
|
)
|
|
STARRED = (
|
|
# Whether the user has starred the file (ascending)
|
|
"starred"
|
|
)
|
|
STARRED_DESC = (
|
|
# Whether the user has starred the file (descending)
|
|
"starred desc"
|
|
)
|
|
VIEWED_BY_ME_TIME = (
|
|
# The last time the file was viewed by the user (ascending)
|
|
"viewedByMeTime"
|
|
)
|
|
VIEWED_BY_ME_TIME_DESC = (
|
|
# The last time the file was viewed by the user (descending)
|
|
"viewedByMeTime desc"
|
|
)
|
|
|
|
|
|
class DocumentFormat(str, Enum):
|
|
MARKDOWN = "markdown"
|
|
HTML = "html"
|
|
GOOGLE_API_JSON = "google_api_json"
|
|
|
|
|
|
# ---------------------------------------------------------------------------- #
|
|
# Google Gmail Models and Enums
|
|
# ---------------------------------------------------------------------------- #
|
|
class GmailReplyToWhom(str, Enum):
|
|
EVERY_RECIPIENT = "every_recipient"
|
|
ONLY_THE_SENDER = "only_the_sender"
|
|
|
|
|
|
class GmailAction(str, Enum):
|
|
SEND = "send"
|
|
DRAFT = "draft"
|