Tool to retrieve file tree structure from Google 'My Drive' and 'Shared Drives' (#269)

This commit is contained in:
Renato Byrro 2025-03-07 18:02:09 -03:00 committed by GitHub
parent 4608cce862
commit 2135101acd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 700 additions and 4 deletions

View file

@ -2,8 +2,14 @@ from typing import Annotated, Any, Optional
from arcade.sdk import ToolContext, tool
from arcade.sdk.auth import Google
from googleapiclient.errors import HttpError
from arcade_google.tools.utils import build_drive_service, remove_none_values
from arcade_google.tools.utils import (
build_drive_service,
build_file_tree,
build_file_tree_request_params,
remove_none_values,
)
from .models import Corpora, OrderBy
@ -82,3 +88,96 @@ async def list_documents(
break
return {"documents_count": len(files), "documents": files}
@tool(
requires_auth=Google(
scopes=["https://www.googleapis.com/auth/drive.file"],
)
)
async def get_file_tree_structure(
context: ToolContext,
include_shared_drives: Annotated[
bool, "Whether to include shared drives in the file tree structure. Defaults to False."
] = False,
restrict_to_shared_drive_id: Annotated[
Optional[str],
"If provided, only include files from this shared drive in the file tree structure. "
"Defaults to None, which will include files and folders from all drives.",
] = None,
include_organization_domain_documents: Annotated[
bool,
"Whether to include documents from the organization's domain. This is applicable to admin "
"users who have permissions to view organization-wide documents in a Google Workspace "
"account. Defaults to False.",
] = False,
order_by: Annotated[
Optional[list[OrderBy]],
"Sort order. Defaults to listing the most recently modified documents first",
] = None,
limit: Annotated[
Optional[int],
"The number of files and folders to list. Defaults to None, "
"which will list all files and folders.",
] = None,
) -> Annotated[
dict,
"A dictionary containing the file/folder tree structure in the user's Google Drive",
]:
"""
Get the file/folder tree structure of the user's Google Drive.
"""
service = build_drive_service(
context.authorization.token if context.authorization and context.authorization.token else ""
)
keep_paginating = True
page_token = None
files = {}
file_tree: dict[str, list[dict]] = {"My Drive": []}
params = build_file_tree_request_params(
order_by,
page_token,
limit,
include_shared_drives,
restrict_to_shared_drive_id,
include_organization_domain_documents,
)
while keep_paginating:
# Get a list of files
results = service.files().list(**params).execute()
# Update page token
page_token = results.get("nextPageToken")
params["pageToken"] = page_token
keep_paginating = page_token is not None
for file in results.get("files", []):
files[file["id"]] = file
if not files:
return {"drives": []}
file_tree = build_file_tree(files)
drives = []
for drive_id, files in file_tree.items(): # type: ignore[assignment]
if drive_id == "My Drive":
drive = {"name": "My Drive", "children": files}
else:
try:
drive_details = service.drives().get(driveId=drive_id).execute()
drive_name = drive_details.get("name", "Shared Drive (name unavailable)")
except HttpError as e:
drive_name = (
f"Shared Drive (name unavailable: 'HttpError {e.status_code}: {e.reason}')"
)
drive = {"name": drive_name, "id": drive_id, "children": files}
drives.append(drive)
return {"drives": drives}

View file

@ -15,7 +15,14 @@ from googleapiclient.discovery import Resource, build
from arcade_google.tools.constants import DEFAULT_SEARCH_CONTACTS_LIMIT
from arcade_google.tools.exceptions import GmailToolError, GoogleServiceError
from arcade_google.tools.models import Day, GmailAction, GmailReplyToWhom, TimeSlot
from arcade_google.tools.models import (
Corpora,
Day,
GmailAction,
GmailReplyToWhom,
OrderBy,
TimeSlot,
)
## Set up basic configuration for logging to the console with DEBUG level and a specific format.
logging.basicConfig(
@ -592,6 +599,97 @@ def build_drive_service(auth_token: Optional[str]) -> Resource: # type: ignore[
return build("drive", "v3", credentials=Credentials(auth_token))
def build_file_tree_request_params(
order_by: Optional[list[OrderBy]],
page_token: Optional[str],
limit: Optional[int],
include_shared_drives: bool,
restrict_to_shared_drive_id: Optional[str],
include_organization_domain_documents: bool,
) -> dict[str, Any]:
if order_by is None:
order_by = [OrderBy.MODIFIED_TIME_DESC]
elif isinstance(order_by, OrderBy):
order_by = [order_by]
params = {
"q": "trashed = false",
"corpora": Corpora.USER.value,
"pageToken": page_token,
"fields": (
"files(id, name, parents, mimeType, driveId, size, createdTime, modifiedTime, owners)"
),
"orderBy": ",".join([item.value for item in order_by]),
}
if limit:
params["pageSize"] = str(limit)
if (
include_shared_drives
or restrict_to_shared_drive_id
or include_organization_domain_documents
):
params["includeItemsFromAllDrives"] = "true"
params["supportsAllDrives"] = "true"
if restrict_to_shared_drive_id:
params["driveId"] = restrict_to_shared_drive_id
params["corpora"] = Corpora.DRIVE.value
if include_organization_domain_documents:
params["corpora"] = Corpora.DOMAIN.value
return params
def build_file_tree(files: dict[str, Any]) -> dict[str, Any]:
file_tree: dict[str, Any] = {}
for file in files.values():
owners = file.get("owners", [])
if owners:
owners = [
{"name": owner.get("displayName", ""), "email": owner.get("emailAddress", "")}
for owner in owners
]
file["owners"] = owners
if "size" in file:
file["size"] = {"value": int(file["size"]), "unit": "bytes"}
# Although "parents" is a list, a file can only have one parent
try:
parent_id = file["parents"][0]
del file["parents"]
except (KeyError, IndexError):
parent_id = None
# Determine the file's Drive ID
if "driveId" in file:
drive_id = file["driveId"]
del file["driveId"]
# If a shared drive id is not present, the file is in "My Drive"
else:
drive_id = "My Drive"
if drive_id not in file_tree:
file_tree[drive_id] = []
# Root files will have the Drive's id as the parent. If the parent id is not in the files
# list, the file must be at drive's root
if parent_id not in files:
file_tree[drive_id].append(file)
# Associate the file with its parent
else:
if "children" not in files[parent_id]:
files[parent_id]["children"] = []
files[parent_id]["children"].append(file)
return file_tree
# Docs utils
def build_docs_service(auth_token: Optional[str]) -> Resource: # type: ignore[no-any-unimported]
"""

197
toolkits/google/conftest.py Normal file
View file

@ -0,0 +1,197 @@
import pytest
@pytest.fixture
def sample_drive_file_tree_request_responses() -> tuple[dict, list]:
files_list = {
"files": [
# Shared Drive 1 files and folders
{
"id": "19WVyQndQsc0AxxfdrIt5CvDQd6r-BvpqnB8bWZoL7Xk",
"name": "shared-1-folder-1-doc-1",
"mimeType": "application/vnd.google-apps.document",
"parents": ["1dCOCdPxhTqiB3j3bWrIWM692ZbL8dyjt"],
"createdTime": "2025-02-26T00:28:20.571Z",
"modifiedTime": "2025-02-26T00:28:30.773Z",
"driveId": "0AFqcR6obkydtUk9PVA",
"size": "1024",
},
{
"id": "1dCOCdPxhTqiB3j3bWrIWM692ZbL8dyjt",
"name": "shared-1-folder-1",
"mimeType": "application/vnd.google-apps.folder",
"parents": ["0AFqcR6obkydtUk9PVA"],
"createdTime": "2025-02-26T00:27:45.526Z",
"modifiedTime": "2025-02-26T00:27:45.526Z",
"driveId": "0AFqcR6obkydtUk9PVA",
},
{
"id": "1didt_h-tDjuJ-dmYtHUSyOCPci30K_kSszvg0G3tKBM",
"name": "shared-1-doc-1",
"mimeType": "application/vnd.google-apps.document",
"parents": ["0AFqcR6obkydtUk9PVA"],
"createdTime": "2025-02-26T00:27:19.287Z",
"modifiedTime": "2025-02-26T00:27:26.079Z",
"driveId": "0AFqcR6obkydtUk9PVA",
"size": "1024",
},
# My Drive files and folders
{
"id": "1vB6sv0MD0hYSraYvWU_fcci3GN_-Jf4g-LfyXdG8ZMo",
"name": "The Birth of MX Engineering",
"mimeType": "application/vnd.google-apps.document",
"parents": ["0AIbBwO2hjeHqUk9PVA"],
"createdTime": "2025-01-24T06:34:22.305Z",
"modifiedTime": "2025-02-25T21:54:30.632Z",
"owners": [
{
"kind": "drive#user",
"displayName": "one_new_tool_everyday",
"photoLink": "https://lh3.googleusercontent.com/a-/photo.png",
"me": True,
"permissionId": "00356981722324419750",
"emailAddress": "one_new_tool_everyday@arcade.dev",
}
],
"size": "6634",
},
{
"id": "1wv2dmYo0skJTI59ZIcwH9vm-wt7psMwXTvihuEGeHeI",
"name": "test document 1.1.1",
"mimeType": "application/vnd.google-apps.document",
"parents": ["1J92V9yvVWm_uNHq3CCY4wyG1H9B6iiwO"],
"createdTime": "2025-02-25T17:59:03.325Z",
"modifiedTime": "2025-02-25T17:59:11.445Z",
"owners": [
{
"kind": "drive#user",
"displayName": "one_new_tool_everyday",
"photoLink": "https://lh3.googleusercontent.com/a-/photo.png",
"me": True,
"permissionId": "00356981722324419750",
"emailAddress": "one_new_tool_everyday@arcade.dev",
}
],
"size": "1024",
},
{
"id": "1J92V9yvVWm_uNHq3CCY4wyG1H9B6iiwO",
"name": "test folder 1.1",
"mimeType": "application/vnd.google-apps.folder",
"parents": ["1gqioaHG53jPVeJN5gBpHoO-GWtwiJcLo"],
"createdTime": "2025-02-25T17:58:58.987Z",
"modifiedTime": "2025-02-25T17:58:58.987Z",
"owners": [
{
"kind": "drive#user",
"displayName": "one_new_tool_everyday",
"photoLink": "https://lh3.googleusercontent.com/a-/photo.png",
"me": True,
"permissionId": "00356981722324419750",
"emailAddress": "one_new_tool_everyday@arcade.dev",
}
],
},
{
"id": "1DSmL7d07kjT6b6L-t4JIT06ElUbZ1q0K6_gEpn_UGZ8",
"name": "test document 1.2",
"mimeType": "application/vnd.google-apps.document",
"parents": ["1gqioaHG53jPVeJN5gBpHoO-GWtwiJcLo"],
"createdTime": "2025-02-25T17:58:38.628Z",
"modifiedTime": "2025-02-25T17:58:46.713Z",
"owners": [
{
"kind": "drive#user",
"displayName": "one_new_tool_everyday",
"photoLink": "https://lh3.googleusercontent.com/a-/photo.png",
"me": True,
"permissionId": "00356981722324419750",
"emailAddress": "one_new_tool_everyday@arcade.dev",
}
],
"size": "1024",
},
{
"id": "1Fcxz7HsyO2Zyc-5DTD3zBQnaVrZwD29BP9KD9rPnYfE",
"name": "test document 1.1",
"mimeType": "application/vnd.google-apps.document",
"parents": ["1gqioaHG53jPVeJN5gBpHoO-GWtwiJcLo"],
"createdTime": "2025-02-25T17:57:53.850Z",
"modifiedTime": "2025-02-25T17:58:28.745Z",
"owners": [
{
"kind": "drive#user",
"displayName": "one_new_tool_everyday",
"photoLink": "https://lh3.googleusercontent.com/a-/photo.png",
"me": True,
"permissionId": "00356981722324419750",
"emailAddress": "one_new_tool_everyday@arcade.dev",
}
],
"size": "1024",
},
{
"id": "1gqioaHG53jPVeJN5gBpHoO-GWtwiJcLo",
"name": "test folder 1",
"mimeType": "application/vnd.google-apps.folder",
"parents": ["0AIbBwO2hjeHqUk9PVA"],
"createdTime": "2025-02-25T17:57:46.036Z",
"modifiedTime": "2025-02-25T17:57:46.036Z",
"owners": [
{
"kind": "drive#user",
"displayName": "one_new_tool_everyday",
"photoLink": "https://lh3.googleusercontent.com/a-/photo.png",
"me": True,
"permissionId": "00356981722324419750",
"emailAddress": "one_new_tool_everyday@arcade.dev",
}
],
},
{
"id": "16PUe97yGQeOjQgrgd54iCoxzid4SEvu_J33P_ELd5r8",
"name": "Hello world presentation",
"mimeType": "application/vnd.google-apps.presentation",
"createdTime": "2025-02-18T20:48:52.786Z",
"modifiedTime": "2025-02-19T23:31:20.483Z",
"owners": [
{
"kind": "drive#user",
"displayName": "john.doe",
"photoLink": "https://lh3.googleusercontent.com/a-/photo.png",
"me": False,
"permissionId": "06420661154928749996",
"emailAddress": "john.doe@arcade.dev",
}
],
"size": "15774558",
},
{
"id": "1nG7lSvIyK05N9METPczVJa4iGgE7uoo-A6zpqjpUsDY",
"name": "Shared doc 1",
"mimeType": "application/vnd.google-apps.document",
"createdTime": "2025-02-19T18:51:44.622Z",
"modifiedTime": "2025-02-19T19:30:39.773Z",
"owners": [
{
"kind": "drive#user",
"displayName": "theboss",
"photoLink": "https://lh3.googleusercontent.com/a-/photo.png",
"me": False,
"permissionId": "11571864250637401873",
"emailAddress": "theboss@arcade.dev",
}
],
"size": "2700",
},
],
}
drives_get = [
{
"id": "0AFqcR6obkydtUk9PVA",
"name": "Shared Drive 1",
}
]
return files_list, drives_get

View file

@ -8,7 +8,7 @@ from arcade.sdk.eval import (
)
import arcade_google
from arcade_google.tools.drive import list_documents
from arcade_google.tools.drive import get_file_tree_structure, list_documents
from arcade_google.tools.models import Corpora, OrderBy
# Evaluation rubric
@ -104,3 +104,112 @@ def drive_eval_suite() -> EvalSuite:
)
return suite
@tool_eval()
def get_file_tree_structure_eval_suite() -> EvalSuite:
"""Create an evaluation suite for Google Drive tools."""
suite = EvalSuite(
name="Google Drive Tools Evaluation",
system_message="You are an AI assistant that can manage Google Drive documents using the provided tools.",
catalog=catalog,
rubric=rubric,
)
suite.add_case(
name="get my google drive's file tree structure including shared drives",
user_message="get my google drive's file tree structure including shared drives",
expected_tool_calls=[
ExpectedToolCall(
func=get_file_tree_structure,
args={
"restrict_to_shared_drive_id": None,
"include_shared_drives": True,
"include_organization_domain_documents": False,
"order_by": None,
"limit": None,
},
)
],
critics=[
BinaryCritic(critic_field="include_shared_drives", weight=0.5),
BinaryCritic(critic_field="restrict_to_shared_drive_id", weight=0.5 / 4),
BinaryCritic(critic_field="include_organization_domain_documents", weight=0.5 / 4),
BinaryCritic(critic_field="order_by", weight=0.5 / 4),
BinaryCritic(critic_field="limit", weight=0.5 / 4),
],
)
suite.add_case(
name="get my google drive's file tree structure without shared drives",
user_message="get my google drive's file tree structure without shared drives",
expected_tool_calls=[
ExpectedToolCall(
func=get_file_tree_structure,
args={
"restrict_to_shared_drive_id": None,
"include_shared_drives": False,
"include_organization_domain_documents": False,
"order_by": None,
"limit": None,
},
)
],
critics=[
BinaryCritic(critic_field="include_shared_drives", weight=0.5),
BinaryCritic(critic_field="restrict_to_shared_drive_id", weight=0.5 / 4),
BinaryCritic(critic_field="include_organization_domain_documents", weight=0.5 / 4),
BinaryCritic(critic_field="order_by", weight=0.5 / 4),
BinaryCritic(critic_field="limit", weight=0.5 / 4),
],
)
suite.add_case(
name="what are the files in the folder 'hello world' in my google drive?",
user_message="what are the files in the folder 'hello world' in my google drive?",
expected_tool_calls=[
ExpectedToolCall(
func=get_file_tree_structure,
args={
"restrict_to_shared_drive_id": None,
"include_shared_drives": False,
"include_organization_domain_documents": False,
"order_by": None,
"limit": None,
},
)
],
critics=[
BinaryCritic(critic_field="include_shared_drives", weight=0.5),
BinaryCritic(critic_field="restrict_to_shared_drive_id", weight=0.5 / 4),
BinaryCritic(critic_field="include_organization_domain_documents", weight=0.5 / 4),
BinaryCritic(critic_field="order_by", weight=0.5 / 4),
BinaryCritic(critic_field="limit", weight=0.5 / 4),
],
)
suite.add_case(
name="how many files are there in all my google drives, including shared ones?",
user_message="how many files are there in all my google drives, including shared ones?",
expected_tool_calls=[
ExpectedToolCall(
func=get_file_tree_structure,
args={
"restrict_to_shared_drive_id": None,
"include_shared_drives": True,
"include_organization_domain_documents": False,
"order_by": None,
"limit": None,
},
)
],
critics=[
BinaryCritic(critic_field="include_shared_drives", weight=0.5),
BinaryCritic(critic_field="restrict_to_shared_drive_id", weight=0.5 / 4),
BinaryCritic(critic_field="include_organization_domain_documents", weight=0.5 / 4),
BinaryCritic(critic_field="order_by", weight=0.5 / 4),
BinaryCritic(critic_field="limit", weight=0.5 / 4),
],
)
return suite

View file

@ -4,7 +4,7 @@ import pytest
from arcade.sdk.errors import ToolExecutionError
from googleapiclient.errors import HttpError
from arcade_google.tools.drive import list_documents
from arcade_google.tools.drive import get_file_tree_structure, list_documents
from arcade_google.tools.models import Corpora, OrderBy
from arcade_google.tools.utils import build_drive_service
@ -115,3 +115,196 @@ async def test_list_documents_with_parameters(mock_context, mock_service):
corpora="user",
supportsAllDrives=False,
)
@pytest.mark.asyncio
async def test_get_file_tree_structure(
mock_context, mock_service, sample_drive_file_tree_request_responses
):
files_list_sample, drives_get_sample = sample_drive_file_tree_request_responses
mock_service.files.return_value.list.return_value.execute.side_effect = [files_list_sample]
mock_service.drives.return_value.get.return_value.execute.side_effect = drives_get_sample
result = await get_file_tree_structure(mock_context, include_shared_drives=True)
expected_file_tree = {
"drives": [
{
"id": "0AFqcR6obkydtUk9PVA",
"name": "Shared Drive 1",
"children": [
{
"createdTime": "2025-02-26T00:27:45.526Z",
"id": "1dCOCdPxhTqiB3j3bWrIWM692ZbL8dyjt",
"mimeType": "application/vnd.google-apps.folder",
"modifiedTime": "2025-02-26T00:27:45.526Z",
"name": "shared-1-folder-1",
"children": [
{
"createdTime": "2025-02-26T00:28:20.571Z",
"id": "19WVyQndQsc0AxxfdrIt5CvDQd6r-BvpqnB8bWZoL7Xk",
"mimeType": "application/vnd.google-apps.document",
"modifiedTime": "2025-02-26T00:28:30.773Z",
"name": "shared-1-folder-1-doc-1",
"size": {
"unit": "bytes",
"value": 1024,
},
}
],
},
{
"createdTime": "2025-02-26T00:27:19.287Z",
"id": "1didt_h-tDjuJ-dmYtHUSyOCPci30K_kSszvg0G3tKBM",
"mimeType": "application/vnd.google-apps.document",
"modifiedTime": "2025-02-26T00:27:26.079Z",
"name": "shared-1-doc-1",
"size": {
"unit": "bytes",
"value": 1024,
},
},
],
},
{
"name": "My Drive",
"children": [
{
"createdTime": "2025-01-24T06:34:22.305Z",
"id": "1vB6sv0MD0hYSraYvWU_fcci3GN_-Jf4g-LfyXdG8ZMo",
"mimeType": "application/vnd.google-apps.document",
"modifiedTime": "2025-02-25T21:54:30.632Z",
"name": "The Birth of MX Engineering",
"owners": [
{
"email": "one_new_tool_everyday@arcade.dev",
"name": "one_new_tool_everyday",
}
],
"size": {
"unit": "bytes",
"value": 6634,
},
},
{
"createdTime": "2025-02-25T17:57:46.036Z",
"id": "1gqioaHG53jPVeJN5gBpHoO-GWtwiJcLo",
"mimeType": "application/vnd.google-apps.folder",
"modifiedTime": "2025-02-25T17:57:46.036Z",
"name": "test folder 1",
"owners": [
{
"email": "one_new_tool_everyday@arcade.dev",
"name": "one_new_tool_everyday",
}
],
"children": [
{
"id": "1J92V9yvVWm_uNHq3CCY4wyG1H9B6iiwO",
"name": "test folder 1.1",
"mimeType": "application/vnd.google-apps.folder",
"createdTime": "2025-02-25T17:58:58.987Z",
"modifiedTime": "2025-02-25T17:58:58.987Z",
"owners": [
{
"email": "one_new_tool_everyday@arcade.dev",
"name": "one_new_tool_everyday",
}
],
"children": [
{
"id": "1wv2dmYo0skJTI59ZIcwH9vm-wt7psMwXTvihuEGeHeI",
"name": "test document 1.1.1",
"mimeType": "application/vnd.google-apps.document",
"createdTime": "2025-02-25T17:59:03.325Z",
"modifiedTime": "2025-02-25T17:59:11.445Z",
"owners": [
{
"email": "one_new_tool_everyday@arcade.dev",
"name": "one_new_tool_everyday",
}
],
"size": {
"unit": "bytes",
"value": 1024,
},
},
],
},
{
"id": "1DSmL7d07kjT6b6L-t4JIT06ElUbZ1q0K6_gEpn_UGZ8",
"name": "test document 1.2",
"mimeType": "application/vnd.google-apps.document",
"createdTime": "2025-02-25T17:58:38.628Z",
"modifiedTime": "2025-02-25T17:58:46.713Z",
"owners": [
{
"email": "one_new_tool_everyday@arcade.dev",
"name": "one_new_tool_everyday",
}
],
"size": {
"unit": "bytes",
"value": 1024,
},
},
{
"id": "1Fcxz7HsyO2Zyc-5DTD3zBQnaVrZwD29BP9KD9rPnYfE",
"name": "test document 1.1",
"mimeType": "application/vnd.google-apps.document",
"createdTime": "2025-02-25T17:57:53.850Z",
"modifiedTime": "2025-02-25T17:58:28.745Z",
"owners": [
{
"email": "one_new_tool_everyday@arcade.dev",
"name": "one_new_tool_everyday",
}
],
"size": {
"unit": "bytes",
"value": 1024,
},
},
],
},
{
"createdTime": "2025-02-18T20:48:52.786Z",
"id": "16PUe97yGQeOjQgrgd54iCoxzid4SEvu_J33P_ELd5r8",
"mimeType": "application/vnd.google-apps.presentation",
"modifiedTime": "2025-02-19T23:31:20.483Z",
"name": "Hello world presentation",
"owners": [
{
"email": "john.doe@arcade.dev",
"name": "john.doe",
}
],
"size": {
"unit": "bytes",
"value": 15774558,
},
},
{
"id": "1nG7lSvIyK05N9METPczVJa4iGgE7uoo-A6zpqjpUsDY",
"name": "Shared doc 1",
"mimeType": "application/vnd.google-apps.document",
"createdTime": "2025-02-19T18:51:44.622Z",
"modifiedTime": "2025-02-19T19:30:39.773Z",
"owners": [
{
"name": "theboss",
"email": "theboss@arcade.dev",
}
],
"size": {
"unit": "bytes",
"value": 2700,
},
},
],
},
]
}
assert result == expected_file_tree