From 04bda3cc4513777b9ddaaf9e1f997c17baae0be2 Mon Sep 17 00:00:00 2001 From: Eric Gustin <34000337+EricGustin@users.noreply.github.com> Date: Mon, 24 Mar 2025 08:52:51 -0800 Subject: [PATCH] Google Sheets Tools (#321) | Name | Description | |--------------------------|---------------------------------------------------------------------------------------| | Google.CreateSpreadsheet | Create a new spreadsheet with the provided title and data in its first sheet | | Google.GetSpreadsheet | Get the user entered and formatted data for all sheets in the spreadsheet | | Google.WriteToCell | Write a value to a single cell in a spreadsheet. | ## Google.CreateSpreadsheet This tool can create a new spreadsheet with data in its first sheet This tool takes in the data as a JSON string. Here's an example input: ``` // Good at large payloads, sparse payloads, and contiguous data payloads. // For example data[1]["D"] represents the value of the cell in the first row in the D column { // All data in row 1 1: { "A": 42, "B": 2, "D":"=A1+B1" }, // All data in row 54 54: { "A": "my string", "QQ": "my far away string" } } ``` The above data format performed better on evals than the other two that I tested: ``` // Performed poorly at sparse data and also at larger amounts of data [ [42, 2, "", "=A1+B1"], [], [], ..., ["A": "my string", "", "", ..., "my far away string"] ] ``` ``` // Good at small payloads and sparse payloads, but very bad at payloads with contiguous data { "A1": 42", "B1": 2, "D1": "=A1+B1", "A54": "my string", "QQ": "my far away string" } ``` ## Google.GetSpreadsheet Gets the formatted values for all non empty cells in all sheets of the spreadsheet. The data returned is in a similar format as the `Google.CreateSpreadsheet` tool's `data` input parameter. The difference is that `get_spreadsheet` will return the user entered value (=A1+B1) and also the formatted value (23.4) for each cell. ## Google.WriteToCell Writes to a single cell. At this point in time we do not support batch updating a sheet. --- toolkits/google/arcade_google/constants.py | 3 + toolkits/google/arcade_google/enums.py | 0 toolkits/google/arcade_google/models.py | 268 +++++++++ toolkits/google/arcade_google/tools/sheets.py | 144 +++++ toolkits/google/arcade_google/utils.py | 549 +++++++++++++++++- toolkits/google/evals/eval_google_sheets.py | 169 ++++++ toolkits/google/tests/test_sheets_models.py | 84 +++ toolkits/google/tests/test_sheets_utils.py | 542 +++++++++++++++++ 8 files changed, 1757 insertions(+), 2 deletions(-) create mode 100644 toolkits/google/arcade_google/enums.py create mode 100644 toolkits/google/arcade_google/tools/sheets.py create mode 100644 toolkits/google/evals/eval_google_sheets.py create mode 100644 toolkits/google/tests/test_sheets_models.py create mode 100644 toolkits/google/tests/test_sheets_utils.py diff --git a/toolkits/google/arcade_google/constants.py b/toolkits/google/arcade_google/constants.py index 46b3ef2d..c2bff210 100644 --- a/toolkits/google/arcade_google/constants.py +++ b/toolkits/google/arcade_google/constants.py @@ -19,3 +19,6 @@ except ValueError as e: DEFAULT_SEARCH_CONTACTS_LIMIT = 30 + +DEFAULT_SHEET_ROW_COUNT = 1000 +DEFAULT_SHEET_COLUMN_COUNT = 26 diff --git a/toolkits/google/arcade_google/enums.py b/toolkits/google/arcade_google/enums.py new file mode 100644 index 00000000..e69de29b diff --git a/toolkits/google/arcade_google/models.py b/toolkits/google/arcade_google/models.py index c4e20d9f..c80d8b83 100644 --- a/toolkits/google/arcade_google/models.py +++ b/toolkits/google/arcade_google/models.py @@ -1,7 +1,11 @@ +import json from datetime import date, datetime, time, timedelta from enum import Enum +from typing import Optional from zoneinfo import ZoneInfo +from pydantic import BaseModel, field_validator, model_validator + # ---------------------------------------------------------------------------- # # Google Calendar Models and Enums @@ -361,3 +365,267 @@ class GmailReplyToWhom(str, Enum): class GmailAction(str, Enum): SEND = "send" DRAFT = "draft" + + +# ---------------------------------------------------------------------------- # +# Google Sheets Models and Enums +# ---------------------------------------------------------------------------- # +class CellErrorType(str, Enum): + """The type of error in a cell + + Implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#ErrorType + """ + + ERROR_TYPE_UNSPECIFIED = "ERROR_TYPE_UNSPECIFIED" # The default error type, do not use this. + ERROR = "ERROR" # Corresponds to the #ERROR! error. + NULL_VALUE = "NULL_VALUE" # Corresponds to the #NULL! error. + DIVIDE_BY_ZERO = "DIVIDE_BY_ZERO" # Corresponds to the #DIV/0 error. + VALUE = "VALUE" # Corresponds to the #VALUE! error. + REF = "REF" # Corresponds to the #REF! error. + NAME = "NAME" # Corresponds to the #NAME? error. + NUM = "NUM" # Corresponds to the #NUM! error. + N_A = "N_A" # Corresponds to the #N/A error. + LOADING = "LOADING" # Corresponds to the Loading... state. + + +class CellErrorValue(BaseModel): + """An error in a cell + + Implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#ErrorValue + """ + + type: CellErrorType + message: str + + +class CellExtendedValue(BaseModel): + """The kinds of value that a cell in a spreadsheet can have + + Implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#ExtendedValue + """ + + numberValue: Optional[float] = None + stringValue: Optional[str] = None + boolValue: Optional[bool] = None + formulaValue: Optional[str] = None + errorValue: Optional["CellErrorValue"] = None + + @model_validator(mode="after") + def check_exactly_one_value(cls, instance): # type: ignore[no-untyped-def] + provided = [v for v in instance.__dict__.values() if v is not None] + if len(provided) != 1: + raise ValueError( + "Exactly one of numberValue, stringValue, boolValue, " + "formulaValue, or errorValue must be set." + ) + return instance + + +class NumberFormatType(str, Enum): + NUMBER = "NUMBER" + PERCENT = "PERCENT" + CURRENCY = "CURRENCY" + + +class NumberFormat(BaseModel): + """The format of a number + + Implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormat + """ + + pattern: str + type: NumberFormatType + + +class CellFormat(BaseModel): + """The format of a cell + + Partial implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#CellFormat + """ + + numberFormat: NumberFormat + + +class CellData(BaseModel): + """Data about a specific cell + + A partial implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#CellData + """ + + userEnteredValue: CellExtendedValue + userEnteredFormat: Optional[CellFormat] = None + + +class RowData(BaseModel): + """Data about each cellin a row + + A partial implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/sheets#RowData + """ + + values: list[CellData] + + +class GridData(BaseModel): + """Data in the grid + + A partial implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/sheets#GridData + """ + + startRow: int + startColumn: int + rowData: list[RowData] + + +class GridProperties(BaseModel): + """Properties of a grid + + A partial implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/sheets#GridProperties + """ + + rowCount: int + columnCount: int + + +class SheetProperties(BaseModel): + """Properties of a Sheet + + A partial implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/sheets#SheetProperties + """ + + sheetId: int + title: str + gridProperties: Optional[GridProperties] = None + + +class Sheet(BaseModel): + """A Sheet in a spreadsheet + + A partial implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/sheets#Sheet + """ + + properties: SheetProperties + data: Optional[list[GridData]] = None + + +class SpreadsheetProperties(BaseModel): + """Properties of a spreadsheet + + A partial implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets#SpreadsheetProperties + """ + + title: str + + +class Spreadsheet(BaseModel): + """A spreadsheet + + A partial implementation of https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets + """ + + properties: SpreadsheetProperties + sheets: list[Sheet] + + +CellValue = int | float | str | bool + + +class SheetDataInput(BaseModel): + """ + SheetDataInput models the cell data of a spreadsheet in a custom format. + + It is a dictionary mapping row numbers (as ints) to dictionaries that map + column letters (as uppercase strings) to cell values (int, float, str, or bool). + + This model enforces that: + - The outer keys are convertible to int. + - The inner keys are alphabetic strings (normalized to uppercase). + - All cell values are only of type int, float, str, or bool. + + The model automatically serializes (via `json_data()`) + and validates the inner types. + """ + + data: dict[int, dict[str, CellValue]] + + @classmethod + def _parse_json_if_string(cls, value): # type: ignore[no-untyped-def] + """Parses the value if it is a JSON string, otherwise returns it. + + Helper method for when validating the `data` field. + """ + if isinstance(value, str): + try: + return json.loads(value) + except json.JSONDecodeError as e: + raise TypeError(f"Invalid JSON: {e}") + return value + + @classmethod + def _validate_row_key(cls, row_key) -> int: # type: ignore[no-untyped-def] + """Converts the row key to an integer, raising an error if conversion fails. + + Helper method for when validating the `data` field. + """ + try: + return int(row_key) + except (ValueError, TypeError): + raise TypeError(f"Row key '{row_key}' is not convertible to int.") + + @classmethod + def _validate_inner_cells(cls, cells, row_int: int) -> dict: # type: ignore[no-untyped-def] + """Validates that 'cells' is a dict mapping column letters to valid cell values + and normalizes the keys. + + Helper method for when validating the `data` field. + """ + if not isinstance(cells, dict): + raise TypeError( + f"Value for row '{row_int}' must be a dict mapping column letters to cell values." + ) + new_inner = {} + for col_key, cell_value in cells.items(): + if not isinstance(col_key, str): + raise TypeError(f"Column key '{col_key}' must be a string.") + col_string = col_key.upper() + if not col_string.isalpha(): + raise TypeError(f"Column key '{col_key}' is invalid. Must be alphabetic.") + if not isinstance(cell_value, (int, float, str, bool)): + raise TypeError( + f"Cell value for {col_string}{row_int} must be an int, float, str, or bool." + ) + new_inner[col_string] = cell_value + return new_inner + + @field_validator("data", mode="before") + @classmethod + def validate_and_convert_keys(cls, value): # type: ignore[no-untyped-def] + """ + Validates data when SheetDataInput is instantiated and converts it to the correct format. + Uses private helper methods to parse JSON, validate row keys, and validate inner cell data. + """ + if value is None: + return {} + + value = cls._parse_json_if_string(value) + if isinstance(value, dict): + new_value = {} + for row_key, cells in value.items(): + row_int = cls._validate_row_key(row_key) + inner_cells = cls._validate_inner_cells(cells, row_int) + new_value[row_int] = inner_cells + return new_value + + raise TypeError("data must be a dict or a valid JSON string representing a dict") + + def json_data(self) -> str: + """ + Serialize the sheet data to a JSON string. + """ + return json.dumps(self.data) + + @classmethod + def from_json(cls, json_str: str) -> "SheetDataInput": + """ + Create a SheetData instance from a JSON string. + """ + return cls.model_validate_json(json_str) diff --git a/toolkits/google/arcade_google/tools/sheets.py b/toolkits/google/arcade_google/tools/sheets.py new file mode 100644 index 00000000..555bcb59 --- /dev/null +++ b/toolkits/google/arcade_google/tools/sheets.py @@ -0,0 +1,144 @@ +from typing import Annotated, Optional + +from arcade.sdk import ToolContext, tool +from arcade.sdk.auth import Google +from arcade.sdk.errors import RetryableToolError + +from arcade_google.models import ( + SheetDataInput, + Spreadsheet, + SpreadsheetProperties, +) +from arcade_google.utils import ( + build_sheets_service, + create_sheet, + parse_get_spreadsheet_response, + parse_write_to_cell_response, + validate_write_to_cell_params, +) + + +@tool( + requires_auth=Google( + scopes=["https://www.googleapis.com/auth/drive.file"], + ) +) +def create_spreadsheet( + context: ToolContext, + title: Annotated[str, "The title of the new spreadsheet"] = "Untitled spreadsheet", + data: Annotated[ + Optional[str], + "The data to write to the spreadsheet. A JSON string " + "(property names enclosed in double quotes) representing a dictionary that " + "maps row numbers to dictionaries that map column letters to cell values. " + "For example, data[23]['C'] would be the value of the cell in row 23, column C. " + "Type hint: dict[int, dict[str, Union[int, float, str, bool]]]", + ] = None, +) -> Annotated[dict, "The created spreadsheet's id and title"]: + """Create a new spreadsheet with the provided title and data in its first sheet + + Returns the newly created spreadsheet's id and title + """ + service = build_sheets_service(context.get_auth_token_or_empty()) + + try: + sheet_data = SheetDataInput(data=data) # type: ignore[arg-type] + except Exception as e: + msg = "Invalid JSON or unexpected data format for parameter `data`" + raise RetryableToolError( + message=msg, + additional_prompt_content=f"{msg}: {e}", + retry_after_ms=100, + ) + + spreadsheet = Spreadsheet( + properties=SpreadsheetProperties(title=title), + sheets=[create_sheet(sheet_data)], + ) + + body = spreadsheet.model_dump() + + response = ( + service.spreadsheets() + .create(body=body, fields="spreadsheetId,spreadsheetUrl,properties/title") + .execute() + ) + + return { + "title": response["properties"]["title"], + "spreadsheetId": response["spreadsheetId"], + "spreadsheetUrl": response["spreadsheetUrl"], + } + + +@tool( + requires_auth=Google( + scopes=["https://www.googleapis.com/auth/drive.file"], + ) +) +async def get_spreadsheet( + context: ToolContext, + spreadsheet_id: Annotated[str, "The id of the spreadsheet to get"], +) -> Annotated[ + dict, + "The spreadsheet properties and data for all sheets in the spreadsheet", +]: + """ + Get the user entered values and formatted values for all cells in all sheets in the spreadsheet + along with the spreadsheet's properties + """ + service = build_sheets_service(context.get_auth_token_or_empty()) + response = ( + service.spreadsheets() + .get( + spreadsheetId=spreadsheet_id, + includeGridData=True, + fields="spreadsheetId,spreadsheetUrl,properties/title,sheets/properties,sheets/data/rowData/values/userEnteredValue,sheets/data/rowData/values/formattedValue,sheets/data/rowData/values/effectiveValue", + ) + .execute() + ) + return parse_get_spreadsheet_response(response) + + +@tool( + requires_auth=Google( + scopes=["https://www.googleapis.com/auth/drive.file"], + ) +) +def write_to_cell( + context: ToolContext, + spreadsheet_id: Annotated[str, "The id of the spreadsheet to write to"], + column: Annotated[str, "The column string to write to. For example, 'A', 'F', or 'AZ'"], + row: Annotated[int, "The row number to write to"], + value: Annotated[str, "The value to write to the cell"], + sheet_name: Annotated[ + str, "The name of the sheet to write to. Defaults to 'Sheet1'" + ] = "Sheet1", +) -> Annotated[dict, "The status of the operation"]: + """ + Write a value to a single cell in a spreadsheet. + """ + service = build_sheets_service(context.get_auth_token_or_empty()) + validate_write_to_cell_params(service, spreadsheet_id, sheet_name, column, row) + + range_ = f"'{sheet_name}'!{column.upper()}{row}" + body = { + "range": range_, + "majorDimension": "ROWS", + "values": [[value]], + } + + sheet_properties = ( + service.spreadsheets() + .values() + .update( + spreadsheetId=spreadsheet_id, + range=range_, + valueInputOption="USER_ENTERED", + includeValuesInResponse=True, + body=body, + ) + .execute() + ) + + return parse_write_to_cell_response(sheet_properties) diff --git a/toolkits/google/arcade_google/utils.py b/toolkits/google/arcade_google/utils.py index f996282c..8094897c 100644 --- a/toolkits/google/arcade_google/utils.py +++ b/toolkits/google/arcade_google/utils.py @@ -9,13 +9,37 @@ from typing import Any, Optional, Union, cast from zoneinfo import ZoneInfo from arcade.sdk import ToolContext +from arcade.sdk.errors import RetryableToolError, ToolExecutionError from bs4 import BeautifulSoup from google.oauth2.credentials import Credentials from googleapiclient.discovery import Resource, build -from arcade_google.constants import DEFAULT_SEARCH_CONTACTS_LIMIT +from arcade_google.constants import ( + DEFAULT_SEARCH_CONTACTS_LIMIT, + DEFAULT_SHEET_COLUMN_COUNT, + DEFAULT_SHEET_ROW_COUNT, +) from arcade_google.exceptions import GmailToolError, GoogleServiceError -from arcade_google.models import Corpora, Day, GmailAction, GmailReplyToWhom, OrderBy, TimeSlot +from arcade_google.models import ( + CellData, + CellExtendedValue, + CellFormat, + CellValue, + Corpora, + Day, + GmailAction, + GmailReplyToWhom, + GridData, + GridProperties, + NumberFormat, + NumberFormatType, + OrderBy, + RowData, + Sheet, + SheetDataInput, + SheetProperties, + TimeSlot, +) ## Set up basic configuration for logging to the console with DEBUG level and a specific format. logging.basicConfig( @@ -805,3 +829,524 @@ def search_contacts(service: Any, query: str, limit: Optional[int]) -> list[dict ) return cast(list[dict[str, Any]], response.get("results", [])) + + +# ---------------------------------------------------------------- +# Sheets utils +# ---------------------------------------------------------------- + + +def build_sheets_service(auth_token: Optional[str]) -> Resource: # type: ignore[no-any-unimported] + """ + Build a Sheets service object. + """ + auth_token = auth_token or "" + return build("sheets", "v4", credentials=Credentials(auth_token)) + + +def col_to_index(col: str) -> int: + """Convert a sheet's column string to a 0-indexed column index + + Args: + col (str): The column string to convert. e.g., "A", "AZ", "QED" + + Returns: + int: The 0-indexed column index. + """ + result = 0 + for char in col.upper(): + result = result * 26 + (ord(char) - ord("A") + 1) + return result - 1 + + +def index_to_col(index: int) -> str: + """Convert a 0-indexed column index to its corresponding column string + + Args: + index (int): The 0-indexed column index to convert. + + Returns: + str: The column string. e.g., "A", "AZ", "QED" + """ + result = "" + index += 1 + while index > 0: + index, rem = divmod(index - 1, 26) + result = chr(rem + ord("A")) + result + return result + + +def is_col_greater(col1: str, col2: str) -> bool: + """Determine if col1 represents a column that comes after col2 in a sheet + + This comparison is based on: + 1. The length of the column string (longer means greater). + 2. Lexicographical comparison if both strings are the same length. + + Args: + col1 (str): The first column string to compare. + col2 (str): The second column string to compare. + + Returns: + bool: True if col1 comes after col2, False otherwise. + """ + if len(col1) != len(col2): + return len(col1) > len(col2) + return col1.upper() > col2.upper() + + +def compute_sheet_data_dimensions( + sheet_data_input: SheetDataInput, +) -> tuple[tuple[int, int], tuple[int, int]]: + """ + Compute the dimensions of a sheet based on the data provided. + + Args: + sheet_data_input (SheetDataInput): + The data to compute the dimensions of. + + Returns: + tuple[tuple[int, int], tuple[int, int]]: The dimensions of the sheet. The first tuple + contains the row range (start, end) and the second tuple contains the column range + (start, end). + """ + max_row = 0 + min_row = 10_000_000 # max number of cells in a sheet + max_col_str = None + min_col_str = None + + for key, row in sheet_data_input.data.items(): + try: + row_num = int(key) + except ValueError: + continue + if row_num > max_row: + max_row = row_num + if row_num < min_row: + min_row = row_num + + if isinstance(row, dict): + for col in row: + # Update max column string + if max_col_str is None or is_col_greater(col, max_col_str): + max_col_str = col + # Update min column string + if min_col_str is None or is_col_greater(min_col_str, col): + min_col_str = col + + max_col_index = col_to_index(max_col_str) if max_col_str is not None else -1 + min_col_index = col_to_index(min_col_str) if min_col_str is not None else 0 + + return (min_row, max_row), (min_col_index, max_col_index) + + +def create_sheet(sheet_data_input: SheetDataInput) -> Sheet: + """Create a Google Sheet from a dictionary of data. + + Args: + sheet_data_input (SheetDataInput): The data to create the sheet from. + + Returns: + Sheet: The created sheet. + """ + (_, max_row), (min_col_index, max_col_index) = compute_sheet_data_dimensions(sheet_data_input) + sheet_data = create_sheet_data(sheet_data_input, min_col_index, max_col_index) + sheet_properties = create_sheet_properties( + row_count=max(DEFAULT_SHEET_ROW_COUNT, max_row), + column_count=max(DEFAULT_SHEET_COLUMN_COUNT, max_col_index + 1), + ) + + return Sheet(properties=sheet_properties, data=sheet_data) + + +def create_sheet_properties( + sheet_id: int = 1, + title: str = "Sheet1", + row_count: int = DEFAULT_SHEET_ROW_COUNT, + column_count: int = DEFAULT_SHEET_COLUMN_COUNT, +) -> SheetProperties: + """Create a SheetProperties object + + Args: + sheet_id (int): The ID of the sheet. + title (str): The title of the sheet. + row_count (int): The number of rows in the sheet. + column_count (int): The number of columns in the sheet. + + Returns: + SheetProperties: The created sheet properties object. + """ + return SheetProperties( + sheetId=sheet_id, + title=title, + gridProperties=GridProperties(rowCount=row_count, columnCount=column_count), + ) + + +def group_contiguous_rows(row_numbers: list[int]) -> list[list[int]]: + """Groups a sorted list of row numbers into contiguous groups + + A contiguous group is a list of row numbers that are consecutive integers. + For example, [1,2,3,5,6] is converted to [[1,2,3],[5,6]]. + + Args: + row_numbers (list[int]): The list of row numbers to group. + + Returns: + list[list[int]]: The grouped row numbers. + """ + if not row_numbers: + return [] + groups = [] + current_group = [row_numbers[0]] + for r in row_numbers[1:]: + if r == current_group[-1] + 1: + current_group.append(r) + else: + groups.append(current_group) + current_group = [r] + groups.append(current_group) + return groups + + +def create_cell_data(cell_value: CellValue) -> CellData: + """ + Create a CellData object based on the type of cell_value. + """ + if isinstance(cell_value, bool): + return _create_bool_cell(cell_value) + elif isinstance(cell_value, int): + return _create_int_cell(cell_value) + elif isinstance(cell_value, float): + return _create_float_cell(cell_value) + elif isinstance(cell_value, str): + return _create_string_cell(cell_value) + + +def _create_formula_cell(cell_value: str) -> CellData: + cell_val = CellExtendedValue(formulaValue=cell_value) + return CellData(userEnteredValue=cell_val) + + +def _create_currency_cell(cell_value: str) -> CellData: + value_without_symbol = cell_value[1:] + try: + num_value = int(value_without_symbol) + cell_format = CellFormat( + numberFormat=NumberFormat(type=NumberFormatType.CURRENCY, pattern="$#,##0") + ) + cell_val = CellExtendedValue(numberValue=num_value) + return CellData(userEnteredValue=cell_val, userEnteredFormat=cell_format) + except ValueError: + try: + num_value = float(value_without_symbol) # type: ignore[assignment] + cell_format = CellFormat( + numberFormat=NumberFormat(type=NumberFormatType.CURRENCY, pattern="$#,##0.00") + ) + cell_val = CellExtendedValue(numberValue=num_value) + return CellData(userEnteredValue=cell_val, userEnteredFormat=cell_format) + except ValueError: + return CellData(userEnteredValue=CellExtendedValue(stringValue=cell_value)) + + +def _create_percent_cell(cell_value: str) -> CellData: + try: + num_value = float(cell_value[:-1].strip()) + cell_format = CellFormat( + numberFormat=NumberFormat(type=NumberFormatType.PERCENT, pattern="0.00%") + ) + cell_val = CellExtendedValue(numberValue=num_value) + return CellData(userEnteredValue=cell_val, userEnteredFormat=cell_format) + except ValueError: + return CellData(userEnteredValue=CellExtendedValue(stringValue=cell_value)) + + +def _create_bool_cell(cell_value: bool) -> CellData: + return CellData(userEnteredValue=CellExtendedValue(boolValue=cell_value)) + + +def _create_int_cell(cell_value: int) -> CellData: + cell_format = CellFormat( + numberFormat=NumberFormat(type=NumberFormatType.NUMBER, pattern="#,##0") + ) + return CellData( + userEnteredValue=CellExtendedValue(numberValue=cell_value), userEnteredFormat=cell_format + ) + + +def _create_float_cell(cell_value: float) -> CellData: + cell_format = CellFormat( + numberFormat=NumberFormat(type=NumberFormatType.NUMBER, pattern="#,##0.00") + ) + return CellData( + userEnteredValue=CellExtendedValue(numberValue=cell_value), userEnteredFormat=cell_format + ) + + +def _create_string_cell(cell_value: str) -> CellData: + if cell_value.startswith("="): + return _create_formula_cell(cell_value) + elif cell_value.startswith("$") and len(cell_value) > 1: + return _create_currency_cell(cell_value) + elif cell_value.endswith("%") and len(cell_value) > 1: + return _create_percent_cell(cell_value) + + return CellData(userEnteredValue=CellExtendedValue(stringValue=cell_value)) + + +def create_row_data( + row_data: dict[str, CellValue], min_col_index: int, max_col_index: int +) -> RowData: + """Constructs RowData for a single row using the provided row_data. + + Args: + row_data (dict[str, CellValue]): The data to create the row from. + min_col_index (int): The minimum column index from the SheetDataInput. + max_col_index (int): The maximum column index from the SheetDataInput. + """ + row_cells = [] + for col_idx in range(min_col_index, max_col_index + 1): + col_letter = index_to_col(col_idx) + if col_letter in row_data: + cell_data = create_cell_data(row_data[col_letter]) + else: + cell_data = CellData(userEnteredValue=CellExtendedValue(stringValue="")) + row_cells.append(cell_data) + return RowData(values=row_cells) + + +def create_sheet_data( + sheet_data_input: SheetDataInput, + min_col_index: int, + max_col_index: int, +) -> list[GridData]: + """Create grid data from SheetDataInput by grouping contiguous rows and processing cells. + + Args: + sheet_data_input (SheetDataInput): The data to create the sheet from. + min_col_index (int): The minimum column index from the SheetDataInput. + max_col_index (int): The maximum column index from the SheetDataInput. + + Returns: + list[GridData]: The created grid data. + """ + row_numbers = list(sheet_data_input.data.keys()) + if not row_numbers: + return [] + + sorted_rows = sorted(row_numbers) + groups = group_contiguous_rows(sorted_rows) + + sheet_data = [] + for group in groups: + rows_data = [] + for r in group: + current_row_data = sheet_data_input.data.get(r, {}) + row = create_row_data(current_row_data, min_col_index, max_col_index) + rows_data.append(row) + grid_data = GridData( + startRow=group[0] - 1, # convert to 0-indexed + startColumn=min_col_index, + rowData=rows_data, + ) + sheet_data.append(grid_data) + + return sheet_data + + +def parse_get_spreadsheet_response(api_response: dict) -> dict: + """ + Parse the get spreadsheet Google Sheets API response into a structured dictionary. + """ + properties = api_response.get("properties", {}) + sheets = [parse_sheet(sheet) for sheet in api_response.get("sheets", [])] + + return { + "title": properties.get("title", ""), + "spreadsheetId": api_response.get("spreadsheetId", ""), + "spreadsheetUrl": api_response.get("spreadsheetUrl", ""), + "sheets": sheets, + } + + +def parse_sheet(api_sheet: dict) -> dict: + """ + Parse an individual sheet's data from the Google Sheets 'get spreadsheet' + API response into a structured dictionary. + """ + props = api_sheet.get("properties", {}) + grid_props = props.get("gridProperties", {}) + cell_data = convert_api_grid_data_to_dict(api_sheet.get("data", [])) + + return { + "sheetId": props.get("sheetId"), + "title": props.get("title", ""), + "rowCount": grid_props.get("rowCount", 0), + "columnCount": grid_props.get("columnCount", 0), + "data": cell_data, + } + + +def extract_user_entered_cell_value(cell: dict) -> Any: + """ + Extract the user entered value from a cell's 'userEnteredValue'. + + Args: + cell (dict): A cell dictionary from the grid data. + + Returns: + The extracted value if present, otherwise None. + """ + user_val = cell.get("userEnteredValue", {}) + for key in ["stringValue", "numberValue", "boolValue", "formulaValue"]: + if key in user_val: + return user_val[key] + + return "" + + +def process_row(row: dict, start_column_index: int) -> dict: + """ + Process a single row from grid data, converting non-empty cells into a dictionary + that maps column letters to cell values. + + Args: + row (dict): A row from the grid data. + start_column_index (int): The starting column index for this row. + + Returns: + dict: A mapping of column letters to cell values for non-empty cells. + """ + row_result = {} + for j, cell in enumerate(row.get("values", [])): + column_index = start_column_index + j + column_string = index_to_col(column_index) + user_entered_cell_value = extract_user_entered_cell_value(cell) + formatted_cell_value = cell.get("formattedValue", "") + + if user_entered_cell_value != "" or formatted_cell_value != "": + row_result[column_string] = { + "userEnteredValue": user_entered_cell_value, + "formattedValue": formatted_cell_value, + } + + return row_result + + +def convert_api_grid_data_to_dict(grids: list[dict]) -> dict: + """ + Convert a list of grid data dictionaries from the 'get spreadsheet' API + response into a structured cell dictionary. + + The returned dictionary maps row numbers to sub-dictionaries that map column letters + (e.g., 'A', 'B', etc.) to their corresponding non-empty cell values. + + Args: + grids (list[dict]): The list of grid data dictionaries from the API. + + Returns: + dict: A dictionary mapping row numbers to dictionaries of column letter/value pairs. + Only includes non-empty rows and non-empty cells. + """ + result = {} + for grid in grids: + start_row = grid.get("startRow", 0) + start_column = grid.get("startColumn", 0) + + for i, row in enumerate(grid.get("rowData", []), start=1): + current_row = start_row + i + row_data = process_row(row, start_column) + + if row_data: + result[current_row] = row_data + + return dict(sorted(result.items())) + + +def validate_write_to_cell_params( # type: ignore[no-any-unimported] + service: Resource, + spreadsheet_id: str, + sheet_name: str, + column: str, + row: int, +) -> None: + """Validates the input parameters for the write to cell tool. + + Args: + service (Resource): The Google Sheets service. + spreadsheet_id (str): The ID of the spreadsheet provided to the tool. + sheet_name (str): The name of the sheet provided to the tool. + column (str): The column to write to provided to the tool. + row (int): The row to write to provided to the tool. + + Raises: + RetryableToolError: + If the sheet name is not found in the spreadsheet + ToolExecutionError: + If the column is not alphabetical + If the row is not a positive number + If the row is out of bounds for the sheet + If the column is out of bounds for the sheet + """ + if not column.isalpha(): + raise ToolExecutionError( + message=( + f"Invalid column name {column}. " + "It must be a non-empty string containing only letters" + ), + ) + + if row < 1: + raise ToolExecutionError( + message=(f"Invalid row number {row}. It must be a positive integer greater than 0."), + ) + + sheet_properties = ( + service.spreadsheets() + .get( + spreadsheetId=spreadsheet_id, + includeGridData=True, + fields="sheets/properties/title,sheets/properties/gridProperties/rowCount,sheets/properties/gridProperties/columnCount", + ) + .execute() + ) + sheet_names = [sheet["properties"]["title"] for sheet in sheet_properties["sheets"]] + sheet_row_count = sheet_properties["sheets"][0]["properties"]["gridProperties"]["rowCount"] + sheet_column_count = sheet_properties["sheets"][0]["properties"]["gridProperties"][ + "columnCount" + ] + + if sheet_name not in sheet_names: + raise RetryableToolError( + message=f"Sheet name {sheet_name} not found in spreadsheet with id {spreadsheet_id}", + additional_prompt_content=f"Sheet names in the spreadsheet: {sheet_names}", + retry_after_ms=100, + ) + + if row > sheet_row_count: + raise ToolExecutionError( + message=( + f"Row {row} is out of bounds for sheet {sheet_name} " + f"in spreadsheet with id {spreadsheet_id}. " + f"Sheet only has {sheet_row_count} rows which is less than the requested row {row}" + ) + ) + + if col_to_index(column) > sheet_column_count: + raise ToolExecutionError( + message=( + f"Column {column} is out of bounds for sheet {sheet_name} " + f"in spreadsheet with id {spreadsheet_id}. " + f"Sheet only has {sheet_column_count} columns which " + f"is less than the requested column {column}" + ) + ) + + +def parse_write_to_cell_response(response: dict) -> dict: + return { + "spreadsheetId": response["spreadsheetId"], + "sheetTitle": response["updatedData"]["range"].split("!")[0], + "updatedCell": response["updatedData"]["range"].split("!")[1], + "value": response["updatedData"]["values"][0][0], + } diff --git a/toolkits/google/evals/eval_google_sheets.py b/toolkits/google/evals/eval_google_sheets.py new file mode 100644 index 00000000..d3a732f4 --- /dev/null +++ b/toolkits/google/evals/eval_google_sheets.py @@ -0,0 +1,169 @@ +from arcade.sdk import ToolCatalog +from arcade.sdk.eval import ( + BinaryCritic, + EvalRubric, + EvalSuite, + ExpectedToolCall, + SimilarityCritic, + tool_eval, +) + +import arcade_google +from arcade_google.tools.sheets import ( + create_spreadsheet, + get_spreadsheet, +) + +# Evaluation rubric +rubric = EvalRubric( + fail_threshold=0.9, + warn_threshold=0.95, +) + +catalog = ToolCatalog() +catalog.add_module(arcade_google) + +sheet_content_prompt = """name age email score gender city country registration_date +John Doe 28 johndoe@example.com 85 Male New York USA 2023-01-15 +Jane Smith 34 janesmith@example.com 92 Female Los Angeles USA 2023-02-20 +Alice Johnson 22 alicej@example.com 78 Female Chicago USA 2023-03-10 +Bob Brown 45 bobbrown@example.com 88 Male Houston USA 2023-04-05 +Charlie Davis 30 charlied@example.com 95 Male Phoenix USA 2023-05-12 +Eve White 27 evewhite@example.com 82 Female Philadelphia USA 2023-06-18 +Frank Black 40 frankb@example.com 90 Male San Antonio USA 2023-07-25 +Grace Green 29 graceg@example.com 76 Female Dallas USA 2023-08-30 +Hank Blue 35 hankb@example.com 89 Male San Diego USA 2023-09-15 +Ivy Red 31 ivyred@example.com 91 Female San Jose USA 2023-10-01 +Michael Grey 33 michaelg@example.com 87 Male Seattle USA 2023-10-05 +Nina Black 26 ninab@example.com 84 Female Miami USA 2023-10-10 +Oscar White 38 oscarw@example.com 90 Male Atlanta USA 2023-10-15 +Paula Green 32 paulag@example.com 93 Female Boston USA 2023-10-20 +Quentin Brown 29 quentinb@example.com 81 Male Denver USA 2023-10-25 +Rachel Blue 24 rachelb@example.com 79 Female Orlando USA 2023-10-30 +Steve Red 36 stever@example.com 88 Male Las Vegas USA 2023-11-01 +Tina Yellow 30 tinay@example.com 85 Female Portland USA 2023-11-05 +Ursula Pink 27 ursulap@example.com 82 Female San Francisco USA 2023-11-10 +Victor Grey 41 victorg@example.com 91 Male Charlotte USA 2023-11-15 +Wendy Black 34 wendyb@example.com 89 Female Detroit USA 2023-11-20 +Xander White 29 xanderw@example.com 86 Male Indianapolis USA 2023-11-25 +Yvonne Green 25 yvonnag@example.com 83 Female Columbus USA 2023-11-30 +Zachary Blue 37 zacharyb@example.com 90 Male Jacksonville USA 2023-12-01 +Alice Brown 28 aliceb@example.com 80 Female Memphis USA 2023-12-05 +Brian Black 39 brianb@example.com 92 Male Nashville USA 2023-12-10 +Cathy Green 31 cathyg@example.com 84 Female Virginia Beach USA 2023-12-15 +Daniel White 30 danielw@example.com 88 Male Atlanta USA 2023-12-20 +Eva Red 26 evar@example.com 81 Female New Orleans USA 2023-12-25 +Frankie Grey 35 frankieg@example.com 90 Male San Antonio USA 2023-12-30 +Gina Blue 29 ginab@example.com 87 Female San Diego USA 2024-01-01 +Henry Black 42 henryb@example.com 93 Male Philadelphia USA 2024-01-05 +Isla Green 24 islag@example.com 79 Female Chicago USA 2024-01-10 +Jack White 33 jackw@example.com 85 Male Los Angeles USA 2024-01-15 +Kathy Red 31 kathyr@example.com 82 Female Miami USA 2024-01-20 +Liam Grey 36 liamg@example.com 89 Male Seattle USA 2024-01-25 +Mia Black 27 miab@example.com 80 Female Denver USA 2024-01-30 +Nate Green 30 nateg@example.com 88 Male Orlando USA 2024-02-01 +- (empty row) +- (empty row) +- (empty row) +100, 300, 234, 399, 5039, 2345, 23526, 123, 54, 234, 54, 23, 12, 57, 1324, (the formula for sum of everything to the left) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +- (empty row) +456, 234, 234, 399, 234, 1234, 23526, 123, 54, 234, 4567, 23, 12, 234, 1324, (the formula for sum of everything to the left) +""" + + +@tool_eval() +def create_spreadsheet_eval() -> EvalSuite: + """Create an evaluation suite for Google Sheets create_spreadsheet tool.""" + + sheet_content_expected1 = """{"1": {"A": "name", "B": "age", "C": "email", "D": "score", "E": "gender", "F": "city", "G": "country", "H": "registration_date"}, "2": {"A": "John Doe", "B": 28, "C": "johndoe@example.com", "D": 85, "E": "Male", "F": "New York", "G": "USA", "H": "2023-01-15"}, "3": {"A": "Jane Smith", "B": 34, "C": "janesmith@example.com", "D": 92, "E": "Female", "F": "Los Angeles", "G": "USA", "H": "2023-02-20"}, "4": {"A": "Alice Johnson", "B": 22, "C": "alicej@example.com", "D": 78, "E": "Female", "F": "Chicago", "G": "USA", "H": "2023-03-10"}, "5": {"A": "Bob Brown", "B": 45, "C": "bobbrown@example.com", "D": 88, "E": "Male", "F": "Houston", "G": "USA", "H": "2023-04-05"}, "6": {"A": "Charlie Davis", "B": 30, "C": "charlied@example.com", "D": 95, "E": "Male", "F": "Phoenix", "G": "USA", "H": "2023-05-12"}, "7": {"A": "Eve White", "B": 27, "C": "evewhite@example.com", "D": 82, "E": "Female", "F": "Philadelphia", "G": "USA", "H": "2023-06-18"}, "8": {"A": "Frank Black", "B": 40, "C": "frankb@example.com", "D": 90, "E": "Male", "F": "San Antonio", "G": "USA", "H": "2023-07-25"}, "9": {"A": "Grace Green", "B": 29, "C": "graceg@example.com", "D": 76, "E": "Female", "F": "Dallas", "G": "USA", "H": "2023-08-30"}, "10": {"A": "Hank Blue", "B": 35, "C": "hankb@example.com", "D": 89, "E": "Male", "F": "San Diego", "G": "USA", "H": "2023-09-15"}, "11": {"A": "Ivy Red", "B": 31, "C": "ivyred@example.com", "D": 91, "E": "Female", "F": "San Jose", "G": "USA", "H": "2023-10-01"}, "12": {"A": "Michael Grey", "B": 33, "C": "michaelg@example.com", "D": 87, "E": "Male", "F": "Seattle", "G": "USA", "H": "2023-10-05"}, "13": {"A": "Nina Black", "B": 26, "C": "ninab@example.com", "D": 84, "E": "Female", "F": "Miami", "G": "USA", "H": "2023-10-10"}, "14": {"A": "Oscar White", "B": 38, "C": "oscarw@example.com", "D": 90, "E": "Male", "F": "Atlanta", "G": "USA", "H": "2023-10-15"}, "15": {"A": "Paula Green", "B": 32, "C": "paulag@example.com", "D": 93, "E": "Female", "F": "Boston", "G": "USA", "H": "2023-10-20"}, "16": {"A": "Quentin Brown", "B": 29, "C": "quentinb@example.com", "D": 81, "E": "Male", "F": "Denver", "G": "USA", "H": "2023-10-25"}, "17": {"A": "Rachel Blue", "B": 24, "C": "rachelb@example.com", "D": 79, "E": "Female", "F": "Orlando", "G": "USA", "H": "2023-10-30"}, "18": {"A": "Steve Red", "B": 36, "C": "stever@example.com", "D": 88, "E": "Male", "F": "Las Vegas", "G": "USA", "H": "2023-11-01"}, "19": {"A": "Tina Yellow", "B": 30, "C": "tinay@example.com", "D": 85, "E": "Female", "F": "Portland", "G": "USA", "H": "2023-11-05"}, "20": {"A": "Ursula Pink", "B": 27, "C": "ursulap@example.com", "D": 82, "E": "Female", "F": "San Francisco", "G": "USA", "H": "2023-11-10"}, "21": {"A": "Victor Grey", "B": 41, "C": "victorg@example.com", "D": 91, "E": "Male", "F": "Charlotte", "G": "USA", "H": "2023-11-15"}, "22": {"A": "Wendy Black", "B": 34, "C": "wendyb@example.com", "D": 89, "E": "Female", "F": "Detroit", "G": "USA", "H": "2023-11-20"}, "23": {"A": "Xander White", "B": 29, "C": "xanderw@example.com", "D": 86, "E": "Male", "F": "Indianapolis", "G": "USA", "H": "2023-11-25"}, "24": {"A": "Yvonne Green", "B": 25, "C": "yvonnag@example.com", "D": 83, "E": "Female", "F": "Columbus", "G": "USA", "H": "2023-11-30"}, "25": {"A": "Zachary Blue", "B": 37, "C": "zacharyb@example.com", "D": 90, "E": "Male", "F": "Jacksonville", "G": "USA", "H": "2023-12-01"}, "26": {"A": "Alice Brown", "B": 28, "C": "aliceb@example.com", "D": 80, "E": "Female", "F": "Memphis", "G": "USA", "H": "2023-12-05"}, "27": {"A": "Brian Black", "B": 39, "C": "brianb@example.com", "D": 92, "E": "Male", "F": "Nashville", "G": "USA", "H": "2023-12-10"}, "28": {"A": "Cathy Green", "B": 31, "C": "cathyg@example.com", "D": 84, "E": "Female", "F": "Virginia Beach", "G": "USA", "H": "2023-12-15"}, "29": {"A": "Daniel White", "B": 30, "C": "danielw@example.com", "D": 88, "E": "Male", "F": "Atlanta", "G": "USA", "H": "2023-12-20"}, "30": {"A": "Eva Red", "B": 26, "C": "evar@example.com", "D": 81, "E": "Female", "F": "New Orleans", "G": "USA", "H": "2023-12-25"}, "31": {"A": "Frankie Grey", "B": 35, "C": "frankieg@example.com", "D": 90, "E": "Male", "F": "San Antonio", "G": "USA", "H": "2023-12-30"}, "32": {"A": "Gina Blue", "B": 29, "C": "ginab@example.com", "D": 87, "E": "Female", "F": "San Diego", "G": "USA", "H": "2024-01-01"}, "33": {"A": "Henry Black", "B": 42, "C": "henryb@example.com", "D": 93, "E": "Male", "F": "Philadelphia", "G": "USA", "H": "2024-01-05"}, "34": {"A": "Isla Green", "B": 24, "C": "islag@example.com", "D": 79, "E": "Female", "F": "Chicago", "G": "USA", "H": "2024-01-10"}, "35": {"A": "Jack White", "B": 33, "C": "jackw@example.com", "D": 85, "E": "Male", "F": "Los Angeles", "G": "USA", "H": "2024-01-15"}, "36": {"A": "Kathy Red", "B": 31, "C": "kathyr@example.com", "D": 82, "E": "Female", "F": "Miami", "G": "USA", "H": "2024-01-20"}, "37": {"A": "Liam Grey", "B": 36, "C": "liamg@example.com", "D": 89, "E": "Male", "F": "Seattle", "G": "USA", "H": "2024-01-25"}, "38": {"A": "Mia Black", "B": 27, "C": "miab@example.com", "D": 80, "E": "Female", "F": "Denver", "G": "USA", "H": "2024-01-30"}, "39": {"A": "Nate Green", "B": 30, "C": "nateg@example.com", "D": 88, "E": "Male", "F": "Orlando", "G": "USA", "H": "2024-02-01"}, "40": {}, "41": {}, "42": {}, "43": {"A": 100, "B": 300, "C": 234, "D": 399, "E": 5039, "F": 2345, "G": 23526, "H": 123, "I": 54, "J": 234, "K": 54, "L": 23, "M": 12, "N": 57, "O": 1324, "P": "(the formula for sum of everything to the left)"}, "44": {}, "45": {}, "46": {}, "47": {}, "48": {}, "49": {}, "50": {}, "51": {}, "52": {}, "53": {}, "54": {}, "55": {}, "56": {}, "57": {}, "58": {}, "59": {}, "60": {"A": 456, "B": 234, "C": 234, "D": 399, "E": 234, "F": 1234, "G": 23526, "H": 123, "I": 54, "J": 234, "K": 4567, "L": 899, "M": 12, "N": 234, "O": 45, "P": "(the formula for sum of everything to the left)"}}""" + sheet_content_sparse_expected = """{"1": {"AA": "=SUM(A1,A2,A3)", "3782": {"A": 3783, "D": 3784, "AAZ": 3785, "ZZFS": 3786, "CA": 3787}}}""" + + suite = EvalSuite( + name="Google Sheets Tools Evaluation", + system_message="You are an AI assistant that can manage Google Sheets using the provided tools.", + catalog=catalog, + rubric=rubric, + ) + + suite.add_case( + name="Create a spreadsheet from large data payload", + user_message=f"Create a spreadsheet named 'Data' with the following content:\n{sheet_content_prompt}", + expected_tool_calls=[ + ExpectedToolCall( + func=create_spreadsheet, + args={ + "title": "Data", + "data": sheet_content_expected1, + }, + ) + ], + critics=[ + BinaryCritic(critic_field="title", weight=0.1), + SimilarityCritic(critic_field="data", weight=0.9, similarity_threshold=0.99), + ], + ) + + suite.add_case( + name="Create a spreadsheet from sparse data payload", + user_message="Create a spreadsheet named 'Sparse Data' that fills the 27th column in the first row with the formula that sums A1, A2, and A3 cells. The 3782nd row should have its A, D, AAZ, ZZFS, and CA columns filled with the numbers 1, 2, 3, 4, and 5, respectively, summed with its row number.", + expected_tool_calls=[ + ExpectedToolCall( + func=create_spreadsheet, + args={ + "title": "Sparse Data", + "data": sheet_content_sparse_expected, + }, + ) + ], + critics=[ + BinaryCritic(critic_field="title", weight=0.1), + SimilarityCritic(critic_field="data", weight=0.9, similarity_threshold=0.95), + ], + ) + + return suite + + +@tool_eval() +def get_spreadsheet_eval() -> EvalSuite: + """Create an evaluation suite for Google Sheets get_spreadsheet tool.""" + + suite = EvalSuite( + name="Google Sheets Tools Evaluation", + system_message="You are an AI assistant that can manage Google Sheets using the provided tools.", + catalog=catalog, + rubric=rubric, + ) + + suite.add_case( + name="Get a spreadsheet", + user_message="Get the data in the second sheet of the spreadsheet with the following id 1L2ovCUcRNOacoWxtLV3jgaidWZq4Bw_WXbIWJcxobN0", + expected_tool_calls=[ + ExpectedToolCall( + func=get_spreadsheet, + args={ + "spreadsheet_id": "1L2ovCUcRNOacoWxtLV3jgaidWZq4Bw_WXbIWJcxobN0", + }, + ) + ], + critics=[ + BinaryCritic(critic_field="spreadsheet_id", weight=1.0), + ], + ) + + return suite diff --git a/toolkits/google/tests/test_sheets_models.py b/toolkits/google/tests/test_sheets_models.py new file mode 100644 index 00000000..2415cffa --- /dev/null +++ b/toolkits/google/tests/test_sheets_models.py @@ -0,0 +1,84 @@ +from arcade_google.models import SheetDataInput + + +def test_sheet_input_data_init(): + data = '{"1":{"A":"name","B":"age","C":"email","D":"score","E":"gender","F":"city","G":"country","H":"registration_date"},"34":{"A":"Isla Green","B":24,"C":"islag@example.com","D":79,"E":"Female","F":"Chicago","G":"USA","H":"2024-01-10"},"38":{"A":"Mia Black","B":27,"C":"miab@example.com","D":80,"E":"Female","F":"Denver","G":"USA","H":"2024-01-30"},"39":{"A":"Nate Green","B":30,"C":"nateg@example.com","D":88,"E":"Male","F":"Orlando","G":"USA","H":"2024-02-01"},"43":{"A":100,"B":300,"C":234,"D":399,"E":5039,"F":2345,"G":23526,"H":123,"I":54,"J":234,"K":54,"L":23,"M":12,"N":57,"O":1324},"47":{"A":456,"B":234,"C":234,"D":399,"E":234,"F":1234,"G":23526,"H":123,"I":54,"J":234,"K":4567,"L":23,"M":12,"N":234,"O":1324}}' + expected_data = { + 1: { + "A": "name", + "B": "age", + "C": "email", + "D": "score", + "E": "gender", + "F": "city", + "G": "country", + "H": "registration_date", + }, + 34: { + "A": "Isla Green", + "B": 24, + "C": "islag@example.com", + "D": 79, + "E": "Female", + "F": "Chicago", + "G": "USA", + "H": "2024-01-10", + }, + 38: { + "A": "Mia Black", + "B": 27, + "C": "miab@example.com", + "D": 80, + "E": "Female", + "F": "Denver", + "G": "USA", + "H": "2024-01-30", + }, + 39: { + "A": "Nate Green", + "B": 30, + "C": "nateg@example.com", + "D": 88, + "E": "Male", + "F": "Orlando", + "G": "USA", + "H": "2024-02-01", + }, + 43: { + "A": 100, + "B": 300, + "C": 234, + "D": 399, + "E": 5039, + "F": 2345, + "G": 23526, + "H": 123, + "I": 54, + "J": 234, + "K": 54, + "L": 23, + "M": 12, + "N": 57, + "O": 1324, + }, + 47: { + "A": 456, + "B": 234, + "C": 234, + "D": 399, + "E": 234, + "F": 1234, + "G": 23526, + "H": 123, + "I": 54, + "J": 234, + "K": 4567, + "L": 23, + "M": 12, + "N": 234, + "O": 1324, + }, + } + + sheet_input_data = SheetDataInput(data=data) + assert sheet_input_data.data == expected_data diff --git a/toolkits/google/tests/test_sheets_utils.py b/toolkits/google/tests/test_sheets_utils.py new file mode 100644 index 00000000..420f0554 --- /dev/null +++ b/toolkits/google/tests/test_sheets_utils.py @@ -0,0 +1,542 @@ +from unittest.mock import MagicMock, patch + +import pytest +from arcade.sdk.errors import RetryableToolError, ToolExecutionError + +from arcade_google.models import ( + CellData, + CellExtendedValue, + NumberFormatType, + RowData, + SheetDataInput, +) +from arcade_google.utils import ( + col_to_index, + compute_sheet_data_dimensions, + convert_api_grid_data_to_dict, + create_cell_data, + create_row_data, + create_sheet_data, + create_sheet_properties, + extract_user_entered_cell_value, + group_contiguous_rows, + index_to_col, + is_col_greater, + process_row, + validate_write_to_cell_params, +) + + +@pytest.fixture +def sheet_data_input_fixture(): + data = { + 1: { + "A": "name", + "B": "age", + "C": "email", + "D": "score", + "E": "gender", + "F": "city", + "G": "country", + "H": "registration_date", + }, + 2: { + "A": "John Doe", + "B": 28, + "C": "johndoe@example.com", + "D": 85.4, + "E": "Male", + "F": "New York", + "G": "USA", + "H": "2023-01-15", + }, + 10: { + "A": "Nate Green", + "B": 30, + "C": "nateg@example.com", + "D": 88, + "E": "Male", + "F": "Orlando", + "G": "USA", + "H": "2024-02-01", + }, + 43: { + "A": 100, + "B": 300, + "H": 123, + "I": "=SUM(SEQUENCE(10))", + }, + 44: { + "A": 456, + "B": 234, + "H": 123, + "I": "=SUM(SEQUENCE(10))", + }, + } + return SheetDataInput(data=data) + + +@pytest.mark.parametrize( + "col, expected_index", + [ + ("A", 0), + ("B", 1), + ("Z", 25), + ("AA", 26 + 0), + ("AZ", (1 * 26) + 25), + ("BA", (2 * 26) + 0), + ("ZZ", (26 * 26) + 25), + ("AAA", (1 * 26 * 26) + (1 * 26) + 0), + ("AAB", (1 * 26 * 26) + (1 * 26) + 1), + ("QED", (17 * 26 * 26) + (5 * 26) + 3), + ], +) +def test_col_to_index(col, expected_index): + assert col_to_index(col) == expected_index + + +@pytest.mark.parametrize( + "index, expected_col", + [ + (0, "A"), + (1, "B"), + (25, "Z"), + (26 + 0, "AA"), + ((1 * 26) + 25, "AZ"), + ((2 * 26) + 0, "BA"), + ((26 * 26) + 25, "ZZ"), + ((1 * 26 * 26) + (1 * 26) + 0, "AAA"), + ((1 * 26 * 26) + (1 * 26) + 1, "AAB"), + ((17 * 26 * 26) + (5 * 26) + 3, "QED"), + ], +) +def test_index_to_col(index, expected_col): + assert index_to_col(index) == expected_col + + +@pytest.mark.parametrize( + "col1, col2, expected_result", + [ + ("A", "B", False), + ("B", "A", True), + ("AA", "AB", False), + ("AB", "AA", True), + ("A", "AA", False), + ("AA", "A", True), + ("Z", "AA", False), + ("AA", "Z", True), + ("AAA", "AAB", False), + ("AAB", "AAA", True), + ("QED", "QEE", False), + ("QEE", "QED", True), + ], +) +def test_is_col_greater(col1, col2, expected_result): + assert is_col_greater(col1, col2) == expected_result + + +def test_compute_sheet_data_dimensions(sheet_data_input_fixture): + (min_row, max_row), (min_col_index, max_col_index) = compute_sheet_data_dimensions( + sheet_data_input_fixture + ) + + expected_min_row = 1 + expected_max_row = 44 + expected_min_col_index = 0 # Column "A" + expected_max_col_index = 8 # Column "I" + + assert min_row == expected_min_row + assert max_row == expected_max_row + assert min_col_index == expected_min_col_index + assert max_col_index == expected_max_col_index + + +def test_create_sheet_properties(): + sheet_properties = create_sheet_properties( + sheet_id=1, + title="Sheet1", + row_count=10000, + column_count=260, + ) + + assert sheet_properties.sheetId == 1 + assert sheet_properties.title == "Sheet1" + assert sheet_properties.gridProperties.rowCount == 10000 + assert sheet_properties.gridProperties.columnCount == 260 + + +@pytest.mark.parametrize( + "row_numbers, expected_groups", + [ + ([], []), + ([5, 6, 7], [[5, 6, 7]]), + ( + [1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 18, 19, 20], + [[1, 2, 3], [5, 6, 7, 8, 9, 10, 11], [18, 19, 20]], + ), + ], +) +def test_group_contiguous_rows(row_numbers, expected_groups): + grouped_rows = group_contiguous_rows(row_numbers) + assert grouped_rows == expected_groups + + +@pytest.mark.parametrize( + "input_value, expected_key, expected_value, expected_type, expected_pattern", + [ + (1234, "numberValue", 1234, NumberFormatType.NUMBER, "#,##0"), + (1.234, "numberValue", 1.234, NumberFormatType.NUMBER, "#,##0.00"), + ("$100", "numberValue", 100, NumberFormatType.CURRENCY, "$#,##0"), + ("$100.50", "numberValue", 100.50, NumberFormatType.CURRENCY, "$#,##0.00"), + ("75%", "numberValue", 75.00, NumberFormatType.PERCENT, "0.00%"), + ("75.34%", "numberValue", 75.34, NumberFormatType.PERCENT, "0.00%"), + ("$1abc", "stringValue", "$1abc", None, None), + ("abc7%", "stringValue", "abc7%", None, None), + ("=SUM(A1:B1)", "formulaValue", "=SUM(A1:B1)", None, None), + (True, "boolValue", True, None, None), + ], +) +def test_create_cell_data( + input_value, expected_key, expected_value, expected_type, expected_pattern +): + cell_data = create_cell_data(input_value) + expected_cell_value = CellExtendedValue(**{expected_key: expected_value}) + assert cell_data.userEnteredValue == expected_cell_value + if expected_type is None: + assert cell_data.userEnteredFormat is None + else: + assert cell_data.userEnteredFormat is not None + assert cell_data.userEnteredFormat.numberFormat.type == expected_type + assert cell_data.userEnteredFormat.numberFormat.pattern == expected_pattern + + +def test_create_row_data(): + row_data = { + "A": 1, # Column index 0 + "B": 2.5, # Column index 1 + "AA": "test", # Column index 26 + "BA": True, # Column index 52 + "BB": "=SUM(A1:B1)", # Column index 53 + } + min_col_index = 0 # Column "A" + max_col_index = 53 # Column "BB" + + expected_row_data = RowData( + values=[ + CellData(userEnteredValue=CellExtendedValue(stringValue="")) + for _ in range(max_col_index + 1) + ] + ) + expected_row_data.values[0].userEnteredValue = CellExtendedValue(numberValue=1) + expected_row_data.values[1].userEnteredValue = CellExtendedValue(numberValue=2.5) + expected_row_data.values[26].userEnteredValue = CellExtendedValue(stringValue="test") + expected_row_data.values[52].userEnteredValue = CellExtendedValue(boolValue=True) + expected_row_data.values[53].userEnteredValue = CellExtendedValue(formulaValue="=SUM(A1:B1)") + + row_data = create_row_data(row_data, min_col_index, max_col_index) + + assert len(row_data.values) == len(expected_row_data.values) + for cell, expected in zip(row_data.values, expected_row_data.values): + assert cell.userEnteredValue == expected.userEnteredValue + + +def test_create_sheet_data(): + from arcade_google.models import CellData, CellExtendedValue, SheetDataInput + from arcade_google.utils import create_cell_data + + test_data = { + 2: {"B": "row2B", "C": 200}, + 3: {"B": "row3B"}, + 5: {"A": "=SUM(A1:A1)", "C": "row5C"}, + } + sheet_data_input = SheetDataInput(data=test_data) + min_col_index = 0 # Column "A" + max_col_index = 2 # Column "C" + + grid_data_list = create_sheet_data(sheet_data_input, min_col_index, max_col_index) + + assert len(grid_data_list) == 2, "Should have two groups of contiguous rows" + + group1 = grid_data_list[0] + assert group1.startRow == 1 + assert group1.startColumn == min_col_index + assert len(group1.rowData) == 2 + + row2_cells = group1.rowData[0].values + expected_row2 = [ + CellData(userEnteredValue=CellExtendedValue(stringValue="")), + create_cell_data("row2B"), + create_cell_data(200), + ] + for cell, expected in zip(row2_cells, expected_row2): + assert cell.userEnteredValue == expected.userEnteredValue + + row3_cells = group1.rowData[1].values + expected_row3 = [ + CellData(userEnteredValue=CellExtendedValue(stringValue="")), + create_cell_data("row3B"), + CellData(userEnteredValue=CellExtendedValue(stringValue="")), + ] + for cell, expected in zip(row3_cells, expected_row3): + assert cell.userEnteredValue == expected.userEnteredValue + + group2 = grid_data_list[1] + assert group2.startRow == 4 + assert group2.startColumn == min_col_index + assert len(group2.rowData) == 1 + + row5_cells = group2.rowData[0].values + expected_row5 = [ + create_cell_data("=SUM(A1:A1)"), + CellData(userEnteredValue=CellExtendedValue(stringValue="")), + create_cell_data("row5C"), + ] + for cell, expected in zip(row5_cells, expected_row5): + assert cell.userEnteredValue == expected.userEnteredValue + + +@pytest.mark.parametrize( + "cell, expected", + [ + ({}, ""), + ({"userEnteredValue": {}}, ""), + ({"userEnteredValue": {"stringValue": "hello"}}, "hello"), + ({"userEnteredValue": {"numberValue": 123}}, 123), + ({"userEnteredValue": {"boolValue": True}}, True), + ({"userEnteredValue": {"formulaValue": "=SUM(A1:A2)"}}, "=SUM(A1:A2)"), + ], +) +def test_extract_user_entered_cell_value(cell, expected): + result = extract_user_entered_cell_value(cell) + assert result == expected + + +def test_process_row_empty(): + row = {} + assert process_row(row, 0) == {} + + +def test_process_row_non_empty(): + row = { + "values": [ + {"userEnteredValue": {"stringValue": "cell1"}, "formattedValue": "cell1"}, + {"userEnteredValue": {}}, # should be ignored + {"userEnteredValue": {"formulaValue": "=C1+D4"}, "formattedValue": 42}, + {"userEnteredValue": {"stringValue": ""}, "formattedValue": ""}, # should be ignored + {"userEnteredValue": {"boolValue": False}, "formattedValue": False}, + ] + } + expected = { + "A": {"userEnteredValue": "cell1", "formattedValue": "cell1"}, + "C": {"userEnteredValue": "=C1+D4", "formattedValue": 42}, + "E": {"userEnteredValue": False, "formattedValue": False}, + } + + assert process_row(row, 0) == expected + + +def test_process_row_with_start_index(): + row = { + "values": [ + {"userEnteredValue": {"stringValue": "x"}, "formattedValue": "x"}, + {"userEnteredValue": {"formulaValue": "=C1+D4"}, "formattedValue": "$10.00"}, + ] + } + expected = { + "C": {"userEnteredValue": "x", "formattedValue": "x"}, + "D": {"userEnteredValue": "=C1+D4", "formattedValue": "$10.00"}, + } + + assert process_row(row, 2) == expected + + +def test_convert_api_grid_data_to_dict_single_grid(): + data = [ + { + "startRow": 0, + "startColumn": 0, + "rowData": [ + { + "values": [ + {"userEnteredValue": {"stringValue": "A1"}, "formattedValue": "A1"}, + {"userEnteredValue": {"numberValue": 1}, "formattedValue": 1}, + ] + }, + { + "values": [ + {"userEnteredValue": {"stringValue": "A2"}, "formattedValue": "A2"}, + {"userEnteredValue": {"numberValue": 2}, "formattedValue": 2}, + ] + }, + { + "values": [ + {"userEnteredValue": {}}, + { + "userEnteredValue": {"stringValue": "ignored"}, + "formattedValue": "ignored", + }, + {"userEnteredValue": {"numberValue": 3}, "formattedValue": 3}, + ] + }, + ], + } + ] + result = convert_api_grid_data_to_dict(data) + expected = { + 1: { + "A": {"userEnteredValue": "A1", "formattedValue": "A1"}, + "B": {"userEnteredValue": 1, "formattedValue": 1}, + }, + 2: { + "A": {"userEnteredValue": "A2", "formattedValue": "A2"}, + "B": {"userEnteredValue": 2, "formattedValue": 2}, + }, + 3: { + "B": {"userEnteredValue": "ignored", "formattedValue": "ignored"}, + "C": {"userEnteredValue": 3, "formattedValue": 3}, + }, + } + + assert result == expected + + +def test_convert_api_grid_data_to_dict_multiple_grids(): + data = [ + { + "startRow": 5, + "startColumn": 1, + "rowData": [ + { + "values": [ + {"userEnteredValue": {"numberValue": 100}, "formattedValue": 100}, + {"userEnteredValue": {"stringValue": "=SUM(A1:A2)"}, "formattedValue": 23}, + ] + } + ], + }, + { + "startRow": 0, + "startColumn": 0, + "rowData": [ + { + "values": [ + {"userEnteredValue": {"stringValue": "First"}, "formattedValue": "First"}, + {"userEnteredValue": {"numberValue": 10}, "formattedValue": 10}, + ] + } + ], + }, + ] + result = convert_api_grid_data_to_dict(data) + expected = { + 1: { + "A": {"userEnteredValue": "First", "formattedValue": "First"}, + "B": {"userEnteredValue": 10, "formattedValue": 10}, + }, + 6: { + "B": {"userEnteredValue": 100, "formattedValue": 100}, + "C": {"userEnteredValue": "=SUM(A1:A2)", "formattedValue": 23}, + }, + } + + assert result == expected + + +def test_convert_api_grid_data_to_dict_empty_rows(): + data = [ + { + "startRow": 10, + "startColumn": 0, + "rowData": [ + {"values": [{"userEnteredValue": {}, "formattedValue": ""}]}, + {"values": []}, + ], + } + ] + result = convert_api_grid_data_to_dict(data) + expected = {} + + assert result == expected + + +FAKE_SHEET_RESPONSE = { + "sheets": [ + {"properties": {"title": "Sheet1", "gridProperties": {"rowCount": 10, "columnCount": 6}}} + ] +} + + +@patch("arcade_google.utils.build_sheets_service") +def test_validate_write_to_cell_params_valid(mock_build): + mock_service = MagicMock() + mock_service.spreadsheets().get().execute.return_value = FAKE_SHEET_RESPONSE + mock_build.return_value = mock_service + + service = mock_build("dummy_token") + + validate_write_to_cell_params( + service=service, + spreadsheet_id="dummy_id", + sheet_name="Sheet1", + column="B", + row=5, + ) + + +@patch("arcade_google.utils.build_sheets_service") +def test_validate_write_to_cell_params_invalid_sheet_name(mock_build): + mock_service = MagicMock() + mock_service.spreadsheets().get().execute.return_value = FAKE_SHEET_RESPONSE + mock_build.return_value = mock_service + + service = mock_build("dummy_token") + + with pytest.raises(RetryableToolError) as excinfo: + validate_write_to_cell_params( + service=service, + spreadsheet_id="dummy_id", + sheet_name="NonExistentSheet", + column="A", + row=5, + ) + assert "Sheet name NonExistentSheet not found" in str(excinfo.value) + + +@patch("arcade_google.utils.build_sheets_service") +def test_validate_write_to_cell_params_row_out_of_bounds(mock_build): + mock_service = MagicMock() + mock_service.spreadsheets().get().execute.return_value = FAKE_SHEET_RESPONSE + mock_build.return_value = mock_service + + service = mock_build("dummy_token") + + out_of_bounds_row = 15 + with pytest.raises(ToolExecutionError) as excinfo: + validate_write_to_cell_params( + service=service, + spreadsheet_id="dummy_id", + sheet_name="Sheet1", + column="A", + row=out_of_bounds_row, + ) + assert f"Row {out_of_bounds_row} is out of bounds" in str(excinfo.value) + + +@patch("arcade_google.utils.build_sheets_service") +def test_validate_write_to_cell_params_column_out_of_bounds(mock_build): + mock_service = MagicMock() + mock_service.spreadsheets().get().execute.return_value = FAKE_SHEET_RESPONSE + mock_build.return_value = mock_service + + service = mock_build("dummy_token") + + out_of_bounds_column = "Z" + with pytest.raises(ToolExecutionError) as excinfo: + validate_write_to_cell_params( + service=service, + spreadsheet_id="dummy_id", + sheet_name="Sheet1", + column=out_of_bounds_column, + row=5, + ) + assert f"Column {out_of_bounds_column} is out of bounds" in str(excinfo.value)