Notion Toolkit (#283)

This commit is contained in:
Eric Gustin 2025-03-15 11:08:10 -08:00 committed by GitHub
parent 99ff11d30e
commit 7428397318
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 2385 additions and 0 deletions

View file

@ -0,0 +1,18 @@
files: ^./
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: "v4.4.0"
hooks:
- id: check-case-conflict
- id: check-merge-conflict
- id: check-toml
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.7
hooks:
- id: ruff
args: [--fix]
- id: ruff-format

View file

@ -0,0 +1,44 @@
target-version = "py39"
line-length = 100
fix = true
[lint]
select = [
# flake8-2020
"YTT",
# flake8-bandit
"S",
# flake8-bugbear
"B",
# flake8-builtins
"A",
# flake8-comprehensions
"C4",
# flake8-debugger
"T10",
# flake8-simplify
"SIM",
# isort
"I",
# mccabe
"C90",
# pycodestyle
"E", "W",
# pyflakes
"F",
# pygrep-hooks
"PGH",
# pyupgrade
"UP",
# ruff
"RUF",
# tryceratops
"TRY",
]
[lint.per-file-ignores]
"**/tests/*" = ["S101"]
[format]
preview = true
skip-magic-trailing-comma = false

21
toolkits/notion/LICENSE Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025, Arcade AI
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

53
toolkits/notion/Makefile Normal file
View file

@ -0,0 +1,53 @@
.PHONY: help
help:
@echo "🛠️ notion Commands:\n"
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
.PHONY: install
install: ## Install the poetry environment and install the pre-commit hooks
@echo "📦 Checking if Poetry is installed"
@if ! command -v poetry &> /dev/null; then \
echo "📦 Installing Poetry with pip"; \
pip install poetry==1.8.5; \
else \
echo "📦 Poetry is already installed"; \
fi
@echo "🚀 Installing package in development mode with all extras"
poetry install --all-extras
.PHONY: build
build: clean-build ## Build wheel file using poetry
@echo "🚀 Creating wheel file"
poetry build
.PHONY: clean-build
clean-build: ## clean build artifacts
@echo "🗑️ Cleaning dist directory"
rm -rf dist
.PHONY: test
test: ## Test the code with pytest
@echo "🚀 Testing code: Running pytest"
@poetry run pytest -W ignore -v --cov --cov-config=pyproject.toml --cov-report=xml
.PHONY: coverage
coverage: ## Generate coverage report
@echo "coverage report"
coverage report
@echo "Generating coverage report"
coverage html
.PHONY: bump-version
bump-version: ## Bump the version in the pyproject.toml file
@echo "🚀 Bumping version in pyproject.toml"
poetry version patch
.PHONY: check
check: ## Run code quality tools.
@echo "🚀 Checking Poetry lock file consistency with 'pyproject.toml': Running poetry check"
@poetry check
@echo "🚀 Linting code: Running pre-commit"
@poetry run pre-commit run -a
@echo "🚀 Static type checking: Running mypy"
@poetry run mypy --config-file=pyproject.toml

View file

@ -0,0 +1,201 @@
import asyncio
from typing import Any, Optional
from arcade.sdk import ToolContext
from arcade_notion.enums import BlockType
from arcade_notion.utils import get_page_url
class BlockToMarkdownConverter:
"""
A converter class that transforms Notion blocks into Markdown.
The class registers conversion handlers for different Notion block types.
If a block type does not have a handler, then the block's plain text is returned.
"""
def __init__(self, context: ToolContext):
self.context = context
# block types whose conversion logic has been implemented
# TODO: implement conversion logic for more block types
self.handlers = {
BlockType.BULLETED_LIST_ITEM.value: self._convert_bulleted_list_item,
BlockType.EQUATION.value: self._convert_equation,
BlockType.HEADING_1.value: self._convert_heading_1,
BlockType.HEADING_2.value: self._convert_heading_2,
BlockType.HEADING_3.value: self._convert_heading_3,
BlockType.LINK_PREVIEW.value: self._convert_link_preview,
BlockType.NUMBERED_LIST_ITEM.value: self._convert_numbered_list_item,
BlockType.PARAGRAPH.value: self._convert_paragraph,
}
async def convert_block(self, block: dict[str, Any]) -> str:
"""
Convert a single Notion block to a Markdown string
Args:
block (dict[str, Any]): A Notion block.
Returns:
str: A Markdown string.
"""
block_type = block.get("type")
if block_type in self.handlers:
converter = self.handlers[block_type]
if asyncio.iscoroutinefunction(converter):
md: str = await converter(block)
return md
else:
return converter(block)
elif block_type == BlockType.CHILD_PAGE.value:
return await self._convert_child_page(block)
else:
return self._get_plaintext(block)
@staticmethod
def rich_text_to_markdown(rich_text_items: list[dict[str, Any]]) -> str:
"""
Convert a list of rich text items (from a Notion block) into Markdown.
Handles formatting such as bold, italic, strikethrough, underline (via HTML),
inline code, text coloring, hyperlinks, and equations.
"""
md = ""
for item in rich_text_items:
annotations = item.get("annotations", {})
type_val = item.get("type", "text")
link = None
# Special handling for inline equations.
if type_val == "equation":
expression = item.get("equation", {}).get("expression", "")
md += f"${expression}$"
continue
if type_val == "text":
text_obj = item.get("text", {})
text = text_obj.get("content", "")
link_obj = text_obj.get("link")
link = (
link_obj.get("url")
if (link_obj and isinstance(link_obj, dict))
else item.get("href")
)
elif type_val == "mention":
text = item.get("plain_text", "")
link = item.get("href")
else:
text = item.get("plain_text", "")
link = item.get("href")
if text.strip() == "":
continue
# Apply annotation formatting.
text = BlockToMarkdownConverter.apply_formatting(text, annotations, link)
md += text
return md
@staticmethod
def apply_formatting(text: str, annotations: dict[str, Any], link: Optional[str] = None) -> str:
"""Apply formatting to a text string based on the annotations.
Used when converting rich text to markdown
Args:
text (str): The text to format.
annotations (dict[str, Any]): The annotations to apply to the text.
link (Optional[str]): An optional link for a hyperlink.
Returns:
str: The formatted text.
"""
# If code block, wrap in backticks and skip other formatting.
if annotations.get("code"):
return f"`{text}`"
# Add underline
if annotations.get("underline"):
text = f"<u>{text}</u>"
# Apply color
color = annotations.get("color", "default")
if color != "default":
text = f'<span style="color: {color};">{text}</span>'
# Add bold, italic, and strikethrough
markers = [
marker
for key, marker in (("bold", "**"), ("italic", "*"), ("strikethrough", "~~"))
if annotations.get(key)
]
if markers:
text = "".join(markers) + text + "".join(reversed(markers))
# Add hyperlink
if link:
text = f"[{text}]({link})"
return text
def _get_plaintext(self, block: dict[str, Any]) -> str:
"""
Extract and return the plain text from a Notion block.
This acts as a fallback for unsupported block types.
"""
block_type: str = block.get("type", "")
content = block.get(block_type, {})
if isinstance(content, dict):
rich_text_items = content.get("rich_text", [])
return "".join(item.get("plain_text", "") for item in rich_text_items)
return ""
def _convert_text_block(self, block: dict[str, Any], element_key: str, prefix: str = "") -> str:
"""
Helper method to convert a Notion block's rich_text element into a Markdown string.
Optionally, a prefix (like a markdown list marker or heading hashes) is added.
"""
element = block.get(element_key, {})
rich_text_items = element.get("rich_text", [])
text = self.rich_text_to_markdown(rich_text_items)
return f"{prefix}{text} \n"
async def _convert_child_page(self, block: dict[str, Any]) -> str:
"""
Asynchronously convert a child page block. This requires fetching the page's URL.
"""
page_url = await get_page_url(self.context, block.get("id", ""))
child_page = block.get("child_page", {})
rich_text_items = child_page.get("rich_text", [])
if rich_text_items:
title = self.rich_text_to_markdown(rich_text_items)
else:
title = child_page.get("title", "")
return f"[{title}]({page_url}) \n"
def _convert_bulleted_list_item(self, block: dict[str, Any]) -> str:
return self._convert_text_block(block, "bulleted_list_item", "- ")
def _convert_equation(self, block: dict[str, Any]) -> str:
expression = block.get("equation", {}).get("expression", "")
return f"$$ {expression} $$ \n"
def _convert_heading_1(self, block: dict[str, Any]) -> str:
return self._convert_text_block(block, "heading_1", "# ")
def _convert_heading_2(self, block: dict[str, Any]) -> str:
return self._convert_text_block(block, "heading_2", "## ")
def _convert_heading_3(self, block: dict[str, Any]) -> str:
return self._convert_text_block(block, "heading_3", "### ")
def _convert_link_preview(self, block: dict[str, Any]) -> str:
return self._convert_text_block(block, "link_preview")
def _convert_numbered_list_item(self, block: dict[str, Any]) -> str:
return self._convert_text_block(block, "numbered_list_item", "1. ")
def _convert_paragraph(self, block: dict[str, Any]) -> str:
return self._convert_text_block(block, "paragraph")

View file

@ -0,0 +1,17 @@
NOTION_API_URL = "https://api.notion.com/v1"
ENDPOINTS = {
"create_a_page": "/pages",
"retrieve_block_children": "/blocks/{block_id}/children",
"search_by_title": "/search",
"query_a_database": "/databases/{database_id}/query",
"update_page_properties": "/pages/{page_id}",
"append_block_children": "/blocks/{block_id}/children",
"retrieve_a_database": "/databases/{database_id}",
"create_comment": "/comments",
"retrieve_a_page": "/pages/{page_id}",
"retrieve_a_block": "/blocks/{block_id}",
}
UNTITLED_TITLE = "New Page"

View file

@ -0,0 +1,45 @@
from enum import Enum
class SortDirection(str, Enum):
ASCENDING = "ascending"
DESCENDING = "descending"
class ObjectType(str, Enum):
PAGE = "page"
DATABASE = "database"
class BlockType(str, Enum):
BOOKMARK = "bookmark"
BREADCRUMB = "breadcrumb"
BULLETED_LIST_ITEM = "bulleted_list_item"
CALLOUT = "callout"
CHILD_DATABASE = "child_database"
CHILD_PAGE = "child_page"
COLUMN = "column"
COLUMN_LIST = "column_list"
DIVIDER = "divider"
EMBED = "embed"
EQUATION = "equation"
FILE = "file"
HEADING_1 = "heading_1"
HEADING_2 = "heading_2"
HEADING_3 = "heading_3"
IMAGE = "image"
LINK_PREVIEW = "link_preview"
LINK_TO_PAGE = "link_to_page"
NUMBERED_LIST_ITEM = "numbered_list_item"
PARAGRAPH = "paragraph"
PDF = "pdf"
QUOTE = "quote"
SYNCED_BLOCK = "synced_block"
TABLE = "table"
TABLE_OF_CONTENTS = "table_of_contents"
TABLE_ROW = "table_row"
TEMPLATE = "template"
TO_DO = "to_do"
TOGGLE = "toggle"
UNSUPPORTED = "unsupported"
VIDEO = "video"

View file

@ -0,0 +1,158 @@
import re
from typing import Any
# TODO: This is a partial implementation.
# TODO: Does not support children blocks. Instead, the markdown content is flattened.
# TODO: Does not support equation blocks.
# TODO: Does not support colored text styling.
# TODO: Does not support underline text styling.
# TODO: Does not support multiple text styles for the same block.
def convert_markdown_to_blocks(content: str) -> list[dict[str, Any]]: # noqa: C901
"""Convert markdown content to Notion blocks."""
blocks: list[dict[str, Any]] = []
code_block: list[str] = []
in_code: bool = False
language: str = "plain text"
numbered_list_index: int = 0
for line in content.splitlines():
line = line.strip()
if line.startswith("```"):
if in_code:
blocks.append({
"type": "code",
"code": {
"rich_text": [
{
"type": "text",
"text": {"content": "\n".join(code_block)},
}
],
"language": language,
},
})
code_block = []
in_code = False
else:
in_code = True
language = line[3:].strip() or "plain text"
continue
if in_code:
code_block.append(line)
continue
if not line:
numbered_list_index = 0
continue
if line.startswith("### "):
block_type, text = "heading_3", line[4:]
elif line.startswith("## "):
block_type, text = "heading_2", line[3:]
elif line.startswith("# "):
block_type, text = "heading_1", line[2:]
elif numbered_match := re.match(r"(\d+)\.\s+(.+)", line):
block_type, text = "numbered_list_item", numbered_match.group(2)
numbered_list_index += 1
elif line.startswith("- "):
block_type, text = "bulleted_list_item", line[2:]
elif line.startswith("> "):
block_type, text = "quote", line[2:]
elif line == "---":
blocks.append({"type": "divider", "divider": {}})
continue
else:
block_type, text = "paragraph", line
blocks.append({"type": block_type, block_type: {"rich_text": format_text(text)}})
return blocks
def format_text(text: str) -> list[dict[str, Any]]:
"""Convert text with markdown formatting to Notion rich text."""
patterns = [
(r"\[([^\]]+)\]\(([^\)]+)\)", "link"), # [text](url)
(r"\*\*(.*?)\*\*", "bold"), # **bold**
(r"__(.*?)__", "bold"), # __bold__
(r"\*(.*?)\*", "italic"), # *italic*
(r"_(.*?)_", "italic"), # _italic_
(r"~~(.*?)~~", "strikethrough"), # ~~strikethrough~~
(r"`(.*?)`", "code"), # `code`
]
rich_text = []
last_index = 0
combined_pattern = "|".join(f"({pattern})" for pattern, _ in patterns)
for match in re.finditer(combined_pattern, text):
start, end = match.span()
if start > last_index:
rich_text.append({
"type": "text",
"text": {"content": text[last_index:start]},
"annotations": {
"bold": False,
"italic": False,
"strikethrough": False,
"underline": False,
"code": False,
"color": "default",
},
})
matched_text = match.group(0)
for pattern, format_type in patterns:
if m := re.match(pattern, matched_text):
if format_type == "link":
rich_text.append({
"type": "text",
"text": {
"content": m.group(1),
"link": {"url": m.group(2)},
},
"annotations": {
"bold": False,
"italic": False,
"strikethrough": False,
"underline": False,
"code": False,
"color": "default",
},
})
else:
rich_text.append({
"type": "text",
"text": {"content": m.group(1)},
"annotations": {
"bold": format_type == "bold",
"italic": format_type == "italic",
"strikethrough": format_type == "strikethrough",
"underline": False,
"code": format_type == "code",
"color": "default",
},
})
break
last_index = end
if last_index < len(text):
rich_text.append({
"type": "text",
"text": {"content": text[last_index:]},
"annotations": {
"bold": False,
"italic": False,
"strikethrough": False,
"underline": False,
"code": False,
"color": "default",
},
})
return rich_text

View file

@ -0,0 +1,11 @@
from arcade_notion.tools.pages import create_page, get_page_content_by_id, get_page_content_by_title
from arcade_notion.tools.search import get_object_metadata, get_workspace_structure, search_by_title
__all__ = [
"create_page",
"get_object_metadata",
"get_page_content_by_id",
"get_page_content_by_title",
"search_by_title",
"get_workspace_structure",
]

View file

@ -0,0 +1,133 @@
from typing import Annotated, Any, Optional
import httpx
from arcade.sdk import ToolContext, tool
from arcade.sdk.auth import Notion
from arcade.sdk.errors import ToolExecutionError
from arcade_notion.block_to_markdown_converter import BlockToMarkdownConverter
from arcade_notion.enums import BlockType, ObjectType
from arcade_notion.markdown_to_block_converter import convert_markdown_to_blocks
from arcade_notion.tools.search import get_object_metadata
from arcade_notion.types import DatabaseParent, PageWithPageParentProperties, create_parent
from arcade_notion.utils import (
extract_title,
get_headers,
get_next_page,
get_url,
)
@tool(requires_auth=Notion())
async def get_page_content_by_id(
context: ToolContext, page_id: Annotated[str, "ID of the page to get content from"]
) -> Annotated[str, "The markdown content of the page"]:
"""Get the content of a Notion page as markdown with the page's ID"""
headers = get_headers(context)
params = {"page_size": 100}
converter = BlockToMarkdownConverter(context)
async with httpx.AsyncClient() as client:
async def fetch_markdown_recursive(block_id: str, indent: str = "") -> str:
"""
Gets the markdown content of a Notion page.
Performs DFS while paginating through the page's block children, converting
each block to markdown and conserving the page's indentation level.
"""
markdown_pieces = []
url = get_url("retrieve_block_children", block_id=block_id)
cursor = None
while True:
data, has_more, cursor = await get_next_page(client, url, headers, params, cursor)
for block in data.get("results", []):
block_markdown = await converter.convert_block(block)
if block_markdown:
# Append each line with indent as a separate piece
for line in block_markdown.rstrip("\n").splitlines():
markdown_pieces.append(indent + line + "\n")
# If the block has children and is not a child page, recurse.
# We don't recurse into child page content, as this would result in fetching
# the children pages' content, which the Notion UI does not show.
if (
block.get("has_children", False)
and block.get("type") != BlockType.CHILD_PAGE.value
):
markdown_pieces.append(
await fetch_markdown_recursive(block["id"], indent + " ")
)
if not has_more:
break
return "".join(markdown_pieces)
# Get the title
page_metadata = await get_object_metadata(context, object_id=page_id)
markdown_title = f"# {extract_title(page_metadata)}\n"
# Get the content
markdown_content = await fetch_markdown_recursive(page_id, "")
return markdown_title + markdown_content
@tool(requires_auth=Notion())
async def get_page_content_by_title(
context: ToolContext, title: Annotated[str, "Title of the page to get content from"]
) -> Annotated[str, "The markdown content of the page"]:
"""Get the content of a Notion page as markdown with the page's title"""
page_metadata = await get_object_metadata(
context, object_title=title, object_type=ObjectType.PAGE
)
page_content: str = await get_page_content_by_id(context, page_metadata["id"])
return page_content
@tool(requires_auth=Notion())
async def create_page(
context: ToolContext,
parent_title: Annotated[
str,
"Title of an existing page/database within which the new page will be created. ",
],
title: Annotated[str, "Title of the new page"],
content: Annotated[Optional[str], "The content of the new page"] = None,
) -> Annotated[str, "The ID of the new page"]:
"""Create a new Notion page by the title of the new page's parent."""
# Notion API does not support creating a page at the root of the workspace... sigh
parent_metadata = await get_object_metadata(
context,
parent_title,
object_type=ObjectType.PAGE,
)
parent_type = parent_metadata["object"] + "_id"
parent = create_parent({"type": parent_type, parent_type: parent_metadata["id"]})
properties: dict[str, Any] = {}
if isinstance(parent, DatabaseParent):
# TODO: Support creating a page within a database
raise ToolExecutionError(
message="Creating a page within a database is not supported.",
developer_message="Database is not supported as a parent of a new page at this time.",
)
else:
properties = PageWithPageParentProperties(title=title).to_dict()
children = convert_markdown_to_blocks(content) if content else None
body = {
"parent": parent.to_dict(),
"properties": properties,
"children": children,
}
url = get_url("create_a_page")
headers = get_headers(context)
async with httpx.AsyncClient() as client:
response = await client.post(url, headers=headers, json=body)
response.raise_for_status()
return f"Successfully created page with ID: {response.json()['id']}"

View file

@ -0,0 +1,384 @@
from typing import Annotated, Any, Optional
import httpx
from arcade.sdk import ToolContext, tool
from arcade.sdk.auth import Notion
from arcade.sdk.errors import ToolExecutionError
from arcade_notion.enums import ObjectType, SortDirection
from arcade_notion.utils import (
build_workspace_structure,
get_headers,
get_url,
remove_none_values,
simplify_search_result,
)
@tool(requires_auth=Notion())
async def search_by_title(
context: ToolContext,
query: Annotated[
Optional[str],
"A substring to search for within page and database titles. "
"If not provided (default), all pages and/or databases are returned.",
] = None,
select: Annotated[
Optional[ObjectType],
"Limit the results to either only pages or only databases. Defaults to both.",
] = None,
order_by: Annotated[
SortDirection,
"The direction to sort search results by last edited time. Defaults to 'descending'.",
] = SortDirection.DESCENDING,
limit: Annotated[
int,
"The maximum number of results to return. Defaults to 100. Set to -1 for no limit.",
] = 100,
) -> Annotated[
dict,
"A dictionary containing minimal information about the pages and/or databases that have "
"titles that are the best match for the query. Does not include content or location.",
]:
"""Search for similar titles of pages, databases, or both within the user's workspace.
Does not include content.
"""
results = []
current_cursor = None
url = get_url("search_by_title")
headers = get_headers(context)
payload = {
"query": query,
"page_size": 100 if limit == -1 else min(100, limit),
"filter": {"property": "object", "value": select.value} if select else None,
"sort": {"direction": order_by, "timestamp": "last_edited_time"},
}
payload = remove_none_values(payload)
async with httpx.AsyncClient() as client:
while True:
if current_cursor:
payload["start_cursor"] = current_cursor
elif "start_cursor" in payload:
del payload["start_cursor"]
response = await client.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
page_results = [simplify_search_result(item) for item in data.get("results", [])]
results.extend(page_results)
# If a limit is set and we've reached or exceeded it, truncate the results.
if limit is not None and len(results) >= limit:
results = results[:limit]
break
if not data.get("has_more", False):
break
current_cursor = data.get("next_cursor")
return {"results": results}
@tool(requires_auth=Notion())
async def get_object_metadata(
context: ToolContext,
object_title: Annotated[
Optional[str], "Title of the page or database whose metadata to get"
] = None,
object_id: Annotated[Optional[str], "ID of the page or database whose metadata to get"] = None,
object_type: Annotated[
Optional[ObjectType],
"The type of object to match title to. Only used if `object_title` is provided. "
"Defaults to both",
] = None,
) -> Annotated[dict[str, Any], "The metadata of the object"]:
"""Get the metadata of a Notion object (page or database) from its title or ID.
One of `object_title` or `object_id` MUST be provided, but both cannot be provided.
The title is case-insensitive and outer whitespace is ignored.
An object's metadata includes it's id, various timestamps, properties, url, and more.
"""
async def get_metadata_by_title(object_title: str) -> dict[str, Any]:
candidates_response = await search_by_title(
context,
object_title,
select=object_type,
order_by=SortDirection.DESCENDING,
limit=3,
)
if object_type:
candidates: list[dict[str, Any]] = [
page
for page in candidates_response["results"]
if page["object"] == object_type.value
]
else:
candidates = candidates_response["results"]
normalized_title = object_title.lower().strip()
error_msg = (
f"The {object_type.value if object_type else 'object'} with "
f"the title '{object_title}' could not be found. "
"Either it does not exist, or it has not been shared with the integration."
)
if not candidates:
raise ToolExecutionError(message=error_msg)
for object_ in candidates:
if object_["title"].lower().strip() == normalized_title:
# object_ is either a page object: https://developers.notion.com/reference/page
# or a database object: https://developers.notion.com/reference/database
return object_
raise ToolExecutionError(
message=error_msg,
developer_message=f"The closest matches are: {candidates}",
)
async def get_metadata_by_id(object_id: str) -> dict[str, Any]:
url = get_url("retrieve_a_page", page_id=object_id)
headers = get_headers(context)
async with httpx.AsyncClient() as client:
response = await client.get(url, headers=headers)
if response.status_code != 200:
raise ToolExecutionError(
message="The page or database could not be found.",
developer_message=f"The response was: {response.json()}",
)
return dict(response.json())
if object_id is not None and object_id != "":
return await get_metadata_by_id(object_id)
elif object_title is not None and object_title != "":
return await get_metadata_by_title(object_title)
else:
raise ToolExecutionError(
message="Either object_title or object_id must be provided.",
)
@tool(requires_auth=Notion())
async def get_workspace_structure(
context: ToolContext,
) -> Annotated[dict[str, Any], "The workspace structure"]:
"""Get the workspace structure of the user's Notion workspace.
Ideal for finding where an object is located in the workspace.
"""
# Retrieve the complete flat list of all pages and databases.
results = await search_by_title(context, None, limit=-1)
# Remove database rows from results
# They're returned from the search results because they're
# technically child pages of the database, but since they're not displayed in the UI's
# sidebar workspace structure, we do not include them in this tool's response.
results["results"] = [
item
for item in results.get("results", [])
if not (
item.get("object", "") == "page"
and item.get("parent", {}).get("type", "") == "database_id"
)
]
async with httpx.AsyncClient() as client:
headers = get_headers(context)
orphaned_items = []
for item in results.get("results", []):
# This condition will only be met for databases that are 'child_pages' of a page.
# Notion API wraps these databases in a block object, so we need to unwrap it to
# link the parent page to the database. Sometimes it takes multiple unwrappings
# to get to the parent page.
while (
item.get("parent", {}).get("type", "") == "block_id"
and item.get("type", "database") == "database"
):
parent = item.get("parent", {})
block_id = parent["block_id"]
url = get_url("retrieve_a_block", block_id=block_id)
block_response = await client.get(url, headers=headers)
if block_response.status_code != 200:
# unable to attach the database to the parent page
orphaned_items.append(item["id"])
break
block_data = block_response.json()
if "parent" in block_data:
item["parent"] = block_data["parent"]
# Drop orphaned items from results since we were unable to attach them to a parent page.
results["results"] = [
item for item in results.get("results", []) if item["id"] not in orphaned_items
]
items = results.get("results", [])
workspace_tree = build_workspace_structure(items)
return workspace_tree
# {
# "workspace": [
# {
# "children": [
# {
# "id": "1b47a62b-04d4-800c-a6d4-e471ae371237",
# "title": "NextSteps",
# "type": "page",
# "url": "https://www.notion.so/Next-Steps-1b47a62b04d4800ca6d4e471ae371237",
# }
# ],
# "id": "1b37a62b-04d4-8079-a902-ce69ed7e7240",
# "title": "ArcadeNotes",
# "type": "page",
# "url": "https://www.notion.so/Arcade-Notes-1b37a62b04d48079a902ce69ed7e7240",
# },
# {
# "children": [
# {
# "id": "1b47a62b-04d4-8075-bf2d-c17daca86d2c",
# "title": "this is my subpage ofmy todos",
# "type": "page",
# "url": "https://www.notion.so/this-is-my-subpage-of-my-todos-1b47a62b04d48075bf2dc17daca86d2c",
# }
# ],
# "id": "1ad7a62b-04d4-8063-bbb9-dce59136e08d",
# "title": "Weekly To-doList",
# "type": "page",
# "url": "https://www.notion.so/Weekly-To-do-List-1ad7a62b04d48063bbb9dce59136e08d",
# },
# {
# "id": "1b27a62b-04d4-80d7-b372-e18300e71052",
# "title": "A pagehere!!",
# "type": "page",
# "url": "https://www.notion.so/A-page-here-1b27a62b04d480d7b372e18300e71052",
# },
# {
# "children": [
# {
# "children": [
# {
# "children": [
# {
# "children": [
# {
# "id": "1b37a62b-04d4-80b5-a7a2-f0c65cb3bf4d",
# "title": "Tooth",
# "type": "page",
# "url": "https://www.notion.so/Tooth-1b37a62b04d480b5a7a2f0c65cb3bf4d",
# }
# ],
# "id": "1b37a62b-04d4-80f8-b3b6-e5aef644b8ec",
# "title": "Teeth",
# "type": "page",
# "url": "https://www.notion.so/Teeth-1b37a62b04d480f8b3b6e5aef644b8ec",
# }
# ],
# "id": "1b37a62b-04d4-8096-94e2-ff9db2e5c2c5",
# "title": "Morning",
# "type": "page",
# "url": "https://www.notion.so/Morning-1b37a62b04d4809694e2ff9db2e5c2c5",
# }
# ],
# "id": "1ae7a62b-04d4-80ee-b291-fa69701d74d3",
# "title": "03/05/2025 - Wednesday, March5",
# "type": "page",
# "url": "https://www.notion.so/03-05-2025-Wednesday-March-5-1ae7a62b04d480eeb291fa69701d74d3",
# },
# {
# "id": "1b37a62b-04d4-80ee-b3da-d49d1ea043ac",
# "title": "how to call atool",
# "type": "page",
# "url": "https://www.notion.so/how-to-call-a-tool-1b37a62b04d480eeb3dad49d1ea043ac",
# },
# {
# "id": "1b37a62b-04d4-8103-a179-d27bef02c4b5",
# "title": "Atree",
# "type": "page",
# "url": "https://www.notion.so/A-tree-1b37a62b04d48103a179d27bef02c4b5",
# },
# {
# "id": "1b27a62b-04d4-8024-ae1a-db8d3deab4c8",
# "title": "adatabase",
# "type": "database",
# "url": "https://www.notion.so/1b27a62b04d48024ae1adb8d3deab4c8",
# },
# {
# "id": "1ae7a62b-04d4-8064-975b-fb27a6535eac",
# "title": "03/03/2025 - Tuesday, March3",
# "type": "page",
# "url": "https://www.notion.so/03-03-2025-Tuesday-March-3-1ae7a62b04d48064975bfb27a6535eac",
# },
# {
# "id": "1ae7a62b-04d4-8058-b273-ca9f8a88a15e",
# "title": "03/04/2025 - Tuesday,March 4",
# "type": "page",
# "url": "https://www.notion.so/03-04-2025-Tuesday-March-4-1ae7a62b04d48058b273ca9f8a88a15e",
# },
# ],
# "id": "1ae7a62b-04d4-80cd-8f30-fe64b5354cc0",
# "title": "Daily News byArcade.dev",
# "type": "page",
# "url": "https://www.notion.so/Daily-News-by-Arcade-dev-1ae7a62b04d480cd8f30fe64b5354cc0",
# },
# {
# "id": "1b27a62b-04d4-807f-8c69-d9c7c2d78255",
# "title": "sdf",
# "type": "page",
# "url": "https://www.notion.so/sdf-1b27a62b04d4807f8c69d9c7c2d78255",
# },
# {
# "children": [
# {
# "id": "1ae7a62b-04d4-8140-926b-eb5580022c27",
# "title": "HabitTracker",
# "type": "database",
# "url": "https://www.notion.so/1ae7a62b04d48140926beb5580022c27",
# },
# {
# "id": "1ae7a62b-04d4-818a-b1cc-d9a37b8f2fbb",
# "title": "Tasklist",
# "type": "database",
# "url": "https://www.notion.so/1ae7a62b04d4818ab1ccd9a37b8f2fbb",
# },
# {
# "id": "1ae7a62b-04d4-81d6-ae33-c14127f47b39",
# "title": "Schedule",
# "type": "database",
# "url": "https://www.notion.so/1ae7a62b04d481d6ae33c14127f47b39",
# },
# ],
# "id": "1ae7a62b-04d4-8068-918d-d39ade9183a3",
# "title": "D A I L Y P L A N N ER",
# "type": "page",
# "url": "https://www.notion.so/D-A-I-L-Y-P-L-A-N-N-E-R-1ae7a62b04d48068918dd39ade9183a3",
# },
# {
# "id": "1ae7a62b-04d4-808f-983c-f82f49250af5",
# "title": "Table: Daily News byArcade.dev",
# "type": "database",
# "url": "https://www.notion.so/1ae7a62b04d4808f983cf82f49250af5",
# },
# {
# "children": [
# {
# "id": "1ad7a62b-04d4-8169-92bd-d3232cae5e35",
# "title": "Income(Monthly)",
# "type": "database",
# "url": "https://www.notion.so/1ad7a62b04d4816992bdd3232cae5e35",
# }
# ],
# "id": "1ad7a62b-04d4-803d-855e-fa9d5adef96b",
# "title": "MonthlyBudget",
# "type": "page",
# "url": "https://www.notion.so/Monthly-Budget-1ad7a62b04d4803d855efa9d5adef96b",
# },
# ]
# }

View file

@ -0,0 +1,105 @@
from dataclasses import asdict, dataclass, field
# ------------------------------------------------------
# Parent types.
# See Notion API docs for more information:
# https://developers.notion.com/reference/parent-object
# ------------------------------------------------------
@dataclass
class Parent:
type: str
def to_dict(self) -> dict:
return asdict(self)
@dataclass
class DatabaseParent(Parent):
database_id: str
type: str = field(init=False, default="database_id")
@dataclass
class PageParent(Parent):
page_id: str
type: str = field(init=False, default="page_id")
@dataclass
class WorkspaceParent(Parent):
workspace: bool = True
type: str = field(init=False, default="workspace")
@dataclass
class BlockParent(Parent):
block_id: str
type: str = field(init=False, default="block_id")
def create_parent(parent_data: dict) -> Parent:
"""
Create a parent object from a dictionary.
See https://developers.notion.com/reference/parent-object for more information
about the parent object.
Args:
parent_data (dict): The dictionary containing the parent data.
Returns:
Parent: The parent object.
"""
parent_type = parent_data.get("type")
if parent_type == "database_id":
return DatabaseParent(database_id=parent_data.get("database_id", ""))
elif parent_type == "page_id":
return PageParent(page_id=parent_data.get("page_id", ""))
elif parent_type == "workspace":
return WorkspaceParent()
elif parent_type == "block_id":
return BlockParent(block_id=parent_data.get("block_id", ""))
else:
raise ValueError(f"Unknown parent type: {parent_type}") # noqa: TRY003
# ------------------------------------------------------
# Property types.
# See Notion API docs for more information:
# https://developers.notion.com/reference/property-object
# and https://developers.notion.com/reference/page-property-values
# ------------------------------------------------------
@dataclass
class PageWithPageParentProperties:
"""Properties for a page that has a parent that is also a page"""
title: str
def to_dict(self) -> dict:
return {
"title": {
"title": [
{
"type": "text",
"text": {
"content": self.title,
},
},
],
},
}
@dataclass
class PageWithDatabaseParentProperties:
# TODO: Implement when database parent is supported for `create_page` tool
pass
@dataclass
class DatabaseProperties:
# TODO: Implement when create_database tool is implemented
pass

View file

@ -0,0 +1,225 @@
from typing import Any, Optional
import httpx
from arcade.sdk import ToolContext
from arcade_notion.constants import ENDPOINTS, NOTION_API_URL, UNTITLED_TITLE
def get_url(endpoint: str, **kwargs: Any) -> str:
"""
Constructs the full URL for a specified notion endpoint.
Args:
endpoint (str): The endpoint key from ENDPOINTS.
**kwargs: Additional parameters to format the URL.
Returns:
str: The complete URL for the specified endpoint.
"""
return f"{NOTION_API_URL}{ENDPOINTS[endpoint].format(**kwargs)}"
def get_headers(context: ToolContext) -> dict[str, str]:
"""
Retrieves the headers for a given context.
Args:
context (ToolContext): The context containing authorization and other information.
Returns:
dict[str, str]: A dictionary containing the headers for the Notion API request.
"""
return {
"Authorization": context.get_auth_token_or_empty(),
"Content-Type": "application/json",
"Notion-Version": "2022-06-28",
}
def remove_none_values(payload: dict[str, Any]) -> dict[str, Any]:
"""
Removes all keys with None values from a dictionary.
Args:
payload (dict[str, Any]): The dictionary to remove None values from.
Returns:
dict[str, Any]: A dictionary with all None values removed.
"""
return {k: v for k, v in payload.items() if v is not None}
def extract_title(item: dict) -> str:
"""
Extracts a human-readable title from a page or database, or a block if possible.
Args:
item (dict): The item to extract the title from.
Returns:
str: The human-readable title of the item.
"""
properties: dict = item.get("properties", {})
# Case 1: Extract title from a database object.
if item["object"] == "database" and "title" in item:
return "".join([t.get("plain_text", "") for t in item.get("title", [])])
# Case 2: Extract title from a page object that is parented by the workspace or a page
if item["object"] == "page" and "title" in properties:
return "".join([t["plain_text"] for t in properties["title"].get("title", [])])
# Case 3: Extract title from a page object that is parented a database
elif item["object"] == "page":
for prop in properties.values():
if isinstance(prop, dict) and prop.get("type") == "title":
return "".join([t.get("plain_text", "") for t in prop.get("title", [])])
# Case 4: Extract title from a child page block object
if item.get("object") == "block":
block_type = item.get("type")
if block_type == "child_page":
title: str = item.get("child_page", {}).get("title", UNTITLED_TITLE)
return title
# For text-based blocks, try extracting rich_text.
if block_type in ["paragraph", "heading_1", "heading_2", "heading_3"]:
rich_text = item.get(block_type, {}).get("rich_text", [])
return "".join([t.get("plain_text", "") for t in rich_text]) or block_type
return UNTITLED_TITLE
def simplify_search_result(item: dict) -> dict:
"""
Simplifies a 'search by title' result from the Notion API.
Takes a page object or database object and extracts only the necessary data.
Args:
item (dict): The search result to simplify.
Returns:
dict: A simplified search result
"""
title = extract_title(item)
return {
"id": item.get("id"),
"object": item.get("object"),
"parent": item.get("parent"),
"created_time": item.get("created_time"),
"last_edited_time": item.get("last_edited_time"),
"title": title,
"url": item.get("url"),
"public_url": item.get("public_url"),
}
async def get_next_page(
client: httpx.AsyncClient,
url: str,
headers: dict,
params: Optional[dict] = None,
cursor: Optional[str] = None,
) -> tuple[dict, bool, str]:
"""
Retrieves the next page of results from a Notion API endpoint.
This is a helper function that is useful when paginating through Notion API responses.
Args:
client (httpx.AsyncClient): The HTTP client to use for the request.
url (str): The URL of the endpoint to request.
headers (dict): The headers to use for the request.
params (Optional[dict]): The parameters to use for the request.
cursor (Optional[str]): The cursor to use for the request.
Returns:
tuple[dict, bool, str]: A tuple containing the results, a boolean indicating if there is a
next page, and the next cursor.
"""
params = params or {}
if cursor:
params["start_cursor"] = cursor
elif "start_cursor" in params:
del params["start_cursor"]
response = await client.get(url, headers=headers, params=params)
response.raise_for_status()
data = response.json()
return data, data.get("has_more", False), data.get("next_cursor")
async def get_page_url(context: ToolContext, page_id: str) -> str:
"""
Retrieves the URL of a page from the Notion API.
Args:
context (ToolContext): The context containing authorization and other information.
page_id (str): The ID of the page to get the URL of.
Returns:
str: The URL of the page or an empty string if the page's metadata cannot be retrieved.
"""
url = get_url("retrieve_a_page", page_id=page_id)
headers = get_headers(context)
async with httpx.AsyncClient() as client:
response = await client.get(url, headers=headers)
if response.status_code != 200:
return ""
data = response.json()
return data.get("url", "") # type: ignore[no-any-return]
def build_workspace_structure(items: list[dict[str, Any]]) -> dict[str, list]:
"""Build a tree structure from a flat list of Notion objects.
Args:
items (list[dict[str, Any]]): A list of Notion objects.
Returns:
dict[str, list]: A tree structure of the workspace.
"""
# For each item, we initialize a children list and then attach it
# under its parent if one exists.
nodes = {}
for item in items:
node = item.copy()
node["children"] = []
nodes[node["id"]] = node
roots = []
for node in nodes.values():
parent = node.get("parent", {})
parent_type = parent.get("type")
if parent_type == "workspace":
# No parent beyond workspace i.e., the node is a root.
roots.append(node)
elif parent_type == "page_id":
parent_id = parent.get("page_id")
if parent_id and parent_id in nodes:
nodes[parent_id]["children"].append(node)
else:
roots.append(node)
elif parent_type == "database_id":
parent_id = parent.get("database_id")
if parent_id and parent_id in nodes:
nodes[parent_id]["children"].append(node)
else:
roots.append(node)
else:
# Fallback: if parent's type is missing or unrecognized, then treat as root.
roots.append(node)
def prune_node(node: dict) -> dict:
"""Get rid of all of the unnecessary fields in a node"""
pruned_node = {
"id": node["id"],
"title": node["title"],
"type": node["object"],
"url": node["url"],
}
if node.get("children"):
pruned_node["children"] = [prune_node(child) for child in node["children"]]
return pruned_node
return {"workspace": [prune_node(root) for root in roots]}

View file

@ -0,0 +1,8 @@
import pytest
from arcade.sdk import ToolAuthorizationContext, ToolContext
@pytest.fixture
def mock_context():
mock_auth = ToolAuthorizationContext(token="fake-token") # noqa: S106
return ToolContext(authorization=mock_auth)

View file

@ -0,0 +1,251 @@
from arcade.sdk import ToolCatalog
from arcade.sdk.eval import (
BinaryCritic,
EvalRubric,
EvalSuite,
ExpectedToolCall,
SimilarityCritic,
tool_eval,
)
import arcade_notion
from arcade_notion.tools import create_page, get_page_content_by_id, get_page_content_by_title
# Evaluation rubric
rubric = EvalRubric(
fail_threshold=0.85,
warn_threshold=0.95,
)
catalog = ToolCatalog()
catalog.add_module(arcade_notion)
PAGE_CONTENT = """## Why Build Tools with Arcade?
Arcade solves key challenges for agent developers:
1. **Auth Native to Agents**: Authentication designed for agentic workflows the right token is always available for each user without complex integration work.
2. **Multi-Tenant Tool Calling**: Enable your agent to take actions AS the specific user of the agent
3. **Better Agent Capabilities**: Build tools that securely connect to the services your users want your agent to integrate with (Gmail, Slack, Google Drive, Zoom, etc.) without complex integration code.
4. **Clean Codebase**: Eliminate environment variables full of API keys and complex OAuth implementations from your application code.
5. **Flexible Integration**: Choose your integration approach:
- LLM API for the simplest experience with hundreds of pre-built tools
- Tools API for direct execution control
- Auth API for authentication-only integration
- Framework connectors for LangChain, CrewAI and others
6. **Zero Schema Maintenance**: Tool definitions generate automatically from code annotations and translate to any LLM format.
7. **Built-in Evaluation**: Evaluate your tools across user scenarios, llms, and context with Arcade's tool calling evaluation framework. Ensure your tools are working as expected and are useful for your agents.
8. **Complete Tooling Ecosystem**: Built-in evaluation framework, scalable execution infrastructure, and flexible deployment options (including VPC, Docker, and Kubernetes).
Arcade lets you focus on creating useful tool functionality rather than solving complex authentication, deployment, and integration challenges.
""" # noqa: E501
# A conversation where a user asks the AI to get the content of a page named 'Arcade Notes'
GET_PAGE_CONVERSATION = [
{"role": "user", "content": "get 'Arcade Notes'"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_92Vhl75I8KEKQfjihS7l53DL",
"type": "function",
"function": {
"name": "Notion_GetPageContentByTitle",
"arguments": '{"title":"Arcade Notes"}',
},
}
],
},
{
"role": "tool",
"content": "# Arcade Notes\nFirst, make sure you have these pre-requisites installed on your system: \n- **Python 3.10**\xa0or higherVerify your Python version by running\xa0`python --version`\xa0or\xa0`python3 --version`\xa0in your terminal. \n- **pip**: The Python package installer should be available. It's typically included with Python. \n- **Arcade Account**: Sign up for an\xa0[Arcade account](https://api.arcade.dev/signup?utm_source=docs&utm_medium=page&utm_campaign=custom-tools)\xa0if you haven't already. \nLet's set up Arcade and give it a try! \n### **Obtain an API key** \nInstall the Arcade CLI and SDK and log in. Your Arcade API key will be printed to the console as well as saved to\xa0`~/.arcade/credentials.yaml`. \npip install arcade-aiarcade login\n### **Try\xa0**`arcade chat` \nWith Arcade CLI installed, you can test outour API with the\xa0`arcade chat`\xa0command: \narcade chat\nThis launches a chat with the Arcade Cloud Engine (hosted at\xa0`api.arcade.dev`). All pre-built Arcade tools are available to use. \nFor example, try asking: \nstar the ArcadeAI/arcade-ai repo on Github\nArcade will ask you to authorize with GitHub, and then the AI assistant will star the\xa0[ArcadeAI/arcade-ai](https://github.com/ArcadeAI/arcade-ai)\xa0repo on your behalf. \nYou'll see output similar to this: \nAssistant (gpt-4o):I starred the ArcadeAI/arcade-ai repo on Github for you!\nYou can use Ctrl-C to exit the chat at any time. \n### **Try more tools** \nTry prompting the AI with different requests: \n- Search the web to find when The Winds of Winter will be released \n- Summarize my emails from today \n- What's playing on my Spotify right now? \nTo see all tools available in the Cloud, run: \narcade show\nGreat job! 🎉 You've set up Arcade and used your first tool. \n### **Next steps** \n- [Learn about tools](https://docs.arcade.dev/home/use-tools/tools-overview)\xa0andhow they work \n- Learn how to\xa0[call tools with models](https://docs.arcade.dev/home/use-tools/call-tools-with-models) \n- Or, jump ahead to building your own\xa0[custom tools](https://docs.arcade.dev/home/build-tools/create-a-toolkit) \n", # noqa: E501
"tool_call_id": "call_92Vhl75I8KEKQfjihS7l53DL",
"name": "Notion_GetPageContentByTitle",
},
{
"role": "assistant",
"content": "Here are the notes on setting up and using Arcade:\n\n### Pre-requisites\nMake sure you have the following installed on your system:\n-**Python 3.10 or higher**: Verify using `python --version` or `python3 --version` in your terminal.\n- **pip**: The Python package installer, typically included with Python.\n- **Arcade Account**: Sign up for an [Arcade account](https://api.arcade.dev/signup?utm_source=docs&utm_medium=page&utm_campaign=custom-tools) if you haven't already.\n\n### Setting up Arcade\n\n#### Obtain an API key\n1. Install the Arcade CLI and SDK.\n2. Log in to Arcade: Run the command `arcade login`.\n3. Your Arcade API key will be printed to the console and saved in `~/.arcade/credentials.yaml`.\n\n#### Try `arcade chat`\n- With Arcade CLI installed, you can test the API using `arcade chat`.\n- This command launches a chat with the Arcade Cloud Engine (hosted at `api.arcade.dev`), giving you access to all pre-built Arcade tools.\n \nExample prompt: \n- Ask to star the `ArcadeAI/arcade-ai` repo on GitHub. The AI assistant will star the repo for you after authorization.\n\nEvent example output:\n```\nAssistant (gpt-4o): I starred the ArcadeAI/arcade-ai repo on Github for you!\n```\n\nUse `Ctrl-C` to exit the chat anytime.\n\n### Explore More Tools\n- Try different requests like searching the web, summarizing emails, or checking your current Spotify play.\n- Run `arcade show` to see all available tools in the Cloud.\n\n### Next Steps\n- [Learn about tools](https://docs.arcade.dev/home/use-tools/tools-overview) and how they function.\n- Understand how to [call tools with models](https://docs.arcade.dev/home/use-tools/call-tools-with-models).\n- Begin building your own [custom tools](https://docs.arcade.dev/home/build-tools/create-a-toolkit).\n\nCongratulations on setting up Arcade and trying your first tool! 🎉", # noqa: E501
},
]
@tool_eval()
def create_page_eval_suite() -> EvalSuite:
"""Create an evaluation suite for tools creating a Notion page"""
suite = EvalSuite(
name="Notion Create Page Evaluation",
system_message=(
"You are an AI assistant that has access to the user's Notion workspace. "
"You can take actions on the user's Notion workspace on behalf of the user."
),
catalog=catalog,
rubric=rubric,
)
# Easy case
suite.add_case(
name="Create page easy difficulty",
user_message=(
"Create a page with the title '07/11/2027' and content '* drank a slurpie' "
"under the parent page 'Daily Standup'."
),
expected_tool_calls=[
ExpectedToolCall(
func=create_page,
args={
"parent_title": "Daily Standup",
"title": "07/11/2027",
"content": "* drank a slurpie",
},
),
],
critics=[
BinaryCritic(critic_field="parent_title", weight=0.34),
SimilarityCritic(critic_field="title", weight=0.33, similarity_threshold=0.95),
SimilarityCritic(critic_field="content", weight=0.33, similarity_threshold=0.95),
],
)
# Medium case
suite.add_case(
name="Create page medium difficulty",
user_message=(
f"Create a page with the title 'Why Use Arcade?' and content {PAGE_CONTENT}"
"under the parent page 'Arcade Notes'."
),
expected_tool_calls=[
ExpectedToolCall(
func=create_page,
args={
"parent_title": "Arcade Notes",
"title": "Why Use Arcade?",
"content": PAGE_CONTENT,
},
),
],
critics=[
BinaryCritic(critic_field="parent_title", weight=0.34),
SimilarityCritic(critic_field="title", weight=0.33, similarity_threshold=0.95),
SimilarityCritic(critic_field="content", weight=0.33, similarity_threshold=0.95),
],
)
# Hard case
suite.add_case(
name="Create page hard difficulty",
user_message=(f"Add {PAGE_CONTENT} as a subpage. Name it 'Why Use Arcade?'"),
expected_tool_calls=[
ExpectedToolCall(
func=create_page,
args={
"parent_title": "Arcade Notes",
"title": "Why Use Arcade?",
"content": PAGE_CONTENT,
},
),
],
critics=[
BinaryCritic(critic_field="parent_title", weight=0.34),
SimilarityCritic(critic_field="title", weight=0.33, similarity_threshold=0.95),
SimilarityCritic(critic_field="content", weight=0.33, similarity_threshold=0.95),
],
additional_messages=GET_PAGE_CONVERSATION,
)
return suite
@tool_eval()
def get_page_content_eval_suite() -> EvalSuite:
"""Create an evaluation suite for tools getting the content of a Notion page"""
suite = EvalSuite(
name="Notion Get Page Content By ID Evaluation",
system_message=(
"You are an AI assistant that has access to the user's Notion workspace. "
"You can take actions on the user's Notion workspace on behalf of the user."
),
catalog=catalog,
rubric=rubric,
)
# Easy case
suite.add_case(
name="Get page content by id easy difficulty",
user_message="Get the content of the page with id 1b37a62b04d48079a902ce69ed7e7240",
expected_tool_calls=[
ExpectedToolCall(
func=get_page_content_by_id,
args={
"page_id": "1b37a62b04d48079a902ce69ed7e7240",
},
),
],
critics=[
BinaryCritic(critic_field="page_id", weight=1),
],
)
# Medium case
suite.add_case(
name="Get page content medium difficulty",
user_message=(
"Summarize the main points in 1b37a62b04d48079a902ce69ed7e7240. "
"Also, does 'Tool Calling with Arcade' actually talk about tools?"
),
expected_tool_calls=[
ExpectedToolCall(
func=get_page_content_by_id,
args={
"page_id": "1b37a62b04d48079a902ce69ed7e7240",
},
),
ExpectedToolCall(
func=get_page_content_by_title,
args={
"title": "Tool Calling with Arcade",
},
),
],
critics=[
BinaryCritic(critic_field="page_id", weight=0.5),
BinaryCritic(critic_field="title", weight=0.5),
],
)
# Hard case
suite.add_case(
name="Get page content hard difficulty",
user_message=(
"Compare it's main points against 'Tool Calling with Arcade' and "
"'Tool Execution with Arcade'"
),
expected_tool_calls=[
ExpectedToolCall(
func=get_page_content_by_title,
args={
"title": "Tool Calling with Arcade",
},
),
ExpectedToolCall(
func=get_page_content_by_title,
args={
"title": "Tool Execution with Arcade",
},
),
],
critics=[
BinaryCritic(critic_field="title", weight=1),
],
additional_messages=GET_PAGE_CONVERSATION,
)
return suite

View file

@ -0,0 +1,250 @@
from arcade.sdk import ToolCatalog
from arcade.sdk.eval import (
BinaryCritic,
EvalRubric,
EvalSuite,
ExpectedToolCall,
SimilarityCritic,
tool_eval,
)
import arcade_notion
from arcade_notion.enums import ObjectType
from arcade_notion.tools import get_object_metadata, get_workspace_structure, search_by_title
# Evaluation rubric
rubric = EvalRubric(
fail_threshold=0.8,
warn_threshold=0.9,
)
catalog = ToolCatalog()
catalog.add_module(arcade_notion)
# Additional messages
TOOL_CALLING_CONVERSATION = [
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "Hello! How can I assist you today?"},
{"role": "user", "content": "explain LLM tool calling"},
{
"role": "assistant",
"content": "Tool calling in the context of Large Language Models (LLMs) refers to the process where the model uses external tools, plugins, orfunctions to gather, process, or enhance information. This allows the model to access more current or specific data and perform actions that are beyond itstrained capabilities.\n\nHere's a breakdown of how this works:\n\n1. **Identification of Needs**: The LLM can identify when a task or query requires dataor actions that are not within its built-in knowledge. This could be due to the data being too recent, too specialized, or involving actions that requireinterfacing with external systems.\n\n2. **Tool Selection**: The LLM selects appropriate tools or functions that are registered and permitted for use. Thisselection is based on the task's requirements, such as retrieving data, performing calculations, or managing content.\n\n3. **Parameter Configuration**:The LLM prepares the necessary input parameters for the tools. These parameters tell the tools what specific actions to take or what data toretrieve.\n\n4. **Execution**: The tools are executed. Some tools run asynchronously, allowing the LLM to perform other tasks while waiting for theresults.\n\n5. **Result Integration**: The LLM integrates the results from the tools back into the conversation or task. It uses this information to answerquestions, perform further analysis, or update content as needed.\n\n6. **Feedback Loop**: Often, the results are looped back into the model's reasoningprocess, which can then adjust its line of questioning or actions based on the new data.\n\nThis system enhances the LLM's flexibility, making itapplicable to a wider range of real-world applications by utilizing up-to-date and specialized information through these tools.", # noqa: E501
},
]
@tool_eval()
def search_by_title_eval_suite() -> EvalSuite:
"""Create an evaluation suite for tools searching for objects by title."""
suite = EvalSuite(
name="Notion Search Tools Evaluation",
system_message=(
"You are an AI assistant that has access to the user's Notion workspace. "
"You can take actions on the user's Notion workspace on behalf of the user."
),
catalog=catalog,
rubric=rubric,
)
# Easy case
suite.add_case(
name="Search by title easy difficulty",
user_message="Search for my page with the title 'Daily Standup'",
expected_tool_calls=[
ExpectedToolCall(
func=search_by_title,
args={
"query": "Daily Standup",
"select": ObjectType.PAGE,
},
),
],
critics=[
SimilarityCritic(critic_field="query", weight=0.6, similarity_threshold=0.95),
BinaryCritic(critic_field="select", weight=0.4),
],
)
# Medium case
suite.add_case(
name="Search by title medium difficulty",
user_message=(
"so i was just thinking about LLMs and how to create an agent. "
"I remember that tools are important for some reason. "
"do i have a page or db about tool calling?"
),
expected_tool_calls=[
ExpectedToolCall(
func=search_by_title,
args={
"query": "tool calling",
"select": None,
},
),
],
critics=[
SimilarityCritic(critic_field="query", weight=0.6),
BinaryCritic(critic_field="select", weight=0.4),
],
)
# Hard case
suite.add_case(
name="Search by title hard difficulty",
user_message=(
"do i have any notes about any of those breakdown points? "
"Actually, do I have any notes about the 2nd, 3rd, or 5th points?"
),
expected_tool_calls=[
ExpectedToolCall(
func=search_by_title,
args={
"query": "Tool Selection",
"select": None,
},
),
ExpectedToolCall(
func=search_by_title,
args={
"query": "Parameter Configuration",
"select": None,
},
),
ExpectedToolCall(
func=search_by_title,
args={
"query": "Result Integration",
"select": None,
},
),
],
critics=[
SimilarityCritic(critic_field="query", weight=0.8),
BinaryCritic(critic_field="select", weight=0.2),
],
additional_messages=TOOL_CALLING_CONVERSATION,
)
return suite
@tool_eval()
def get_object_metadata_eval_suite() -> EvalSuite:
"""Create an evaluation suite for tools getting object metadata."""
suite = EvalSuite(
name="Notion Get Object Metadata Evaluation",
system_message=(
"You are an AI assistant that has access to the user's Notion workspace. "
"You can take actions on the user's Notion workspace on behalf of the user."
),
catalog=catalog,
rubric=rubric,
)
# Easy case
suite.add_case(
name="Get object metadata easy difficulty",
user_message="Get any metadata about my page with the title 'Daily Standup'",
expected_tool_calls=[
ExpectedToolCall(
func=get_object_metadata,
args={
"object_title": "Daily Standup",
"object_type": ObjectType.PAGE,
},
),
],
critics=[
SimilarityCritic(critic_field="object_title", weight=0.8, similarity_threshold=0.95),
BinaryCritic(critic_field="object_type", weight=0.2),
],
)
# Medium case
suite.add_case(
name="Get object metadata medium difficulty",
user_message="Get the id, url, and last edited time of 'Daily Standup'",
expected_tool_calls=[
ExpectedToolCall(
func=get_object_metadata,
args={
"object_title": "Daily Standup",
"object_type": None,
},
),
],
critics=[
SimilarityCritic(critic_field="object_title", weight=0.8, similarity_threshold=0.95),
BinaryCritic(critic_field="object_type", weight=0.2),
],
)
# Hard case
suite.add_case(
name="Get object metadata hard difficulty",
user_message=(
"oh I have page about that second point. "
"This page here https://www.notion.so/be633bf1dfa0436db259571129a590e5. "
"When was it created?"
),
expected_tool_calls=[
ExpectedToolCall(
func=get_object_metadata,
args={
"object_id": "be633bf1dfa0436db259571129a590e5",
"object_type": ObjectType.PAGE,
},
),
],
critics=[
BinaryCritic(critic_field="object_id", weight=0.8),
BinaryCritic(critic_field="object_type", weight=0.2),
],
additional_messages=TOOL_CALLING_CONVERSATION,
)
return suite
@tool_eval()
def get_workspace_structure_eval_suite() -> EvalSuite:
"""Create an evaluation suite for tools getting the workspace structure."""
suite = EvalSuite(
name="Notion Get Workspace Structure Evaluation",
system_message=(
"You are an AI assistant that has access to the user's Notion workspace. "
"You can take actions on the user's Notion workspace on behalf of the user."
),
catalog=catalog,
rubric=rubric,
)
# Easy case
suite.add_case(
name="Get workspace structure easy difficulty",
user_message="Get my workspace tree structure",
expected_tool_calls=[
ExpectedToolCall(func=get_workspace_structure, args={}),
],
)
# Medium case
suite.add_case(
name="Get workspace structure medium difficulty",
user_message="I'm trying to figure out where my 'Daily Standup' page is. "
"Can you help me find it?",
expected_tool_calls=[
ExpectedToolCall(func=get_workspace_structure, args={}),
],
)
# Hard case
suite.add_case(
name="Get workspace structure hard difficulty",
user_message="list pages that are subpages of my 'Daily Standup' page",
expected_tool_calls=[
ExpectedToolCall(func=get_workspace_structure, args={}),
],
)
return suite

View file

@ -0,0 +1,39 @@
[tool.poetry]
name = "arcade_notion"
version = "0.0.1"
description = "LLM tools for essential Notion interactions such as creating, updating, retrieving, and searching pages."
authors = ["ArcadeAI <dev@arcade.dev>"]
[tool.poetry.dependencies]
python = "^3.10"
arcade-ai = "^1.0.5"
[tool.poetry.dev-dependencies]
pytest = "^8.3.0"
pytest-cov = "^4.0.0"
mypy = "^1.5.1"
pre-commit = "^3.4.0"
tox = "^4.11.1"
ruff = "^0.7.4"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.mypy]
files = ["arcade_notion/**/*.py"]
python_version = "3.10"
disallow_untyped_defs = "True"
disallow_any_unimported = "True"
no_implicit_optional = "True"
check_untyped_defs = "True"
warn_return_any = "True"
warn_unused_ignores = "True"
show_error_codes = "True"
ignore_missing_imports = "True"
[tool.pytest.ini_options]
testpaths = ["tests"]
[tool.coverage.report]
skip_empty = true

View file

View file

@ -0,0 +1,161 @@
import pytest
from arcade_notion.block_to_markdown_converter import BlockToMarkdownConverter
@pytest.mark.asyncio
async def test_convert_paragraph():
block = {
"type": "paragraph",
"paragraph": {
"rich_text": [
{
"plain_text": "Hello, world!",
"annotations": {
"bold": False,
"italic": False,
"code": False,
"strikethrough": False,
"underline": False,
"color": "default",
},
"text": {"content": "Hello, world!", "link": None},
"type": "text",
}
],
"color": "default",
},
}
converter = BlockToMarkdownConverter(context=None)
result = await converter.convert_block(block)
assert result == "Hello, world! \n"
@pytest.mark.asyncio
async def test_convert_heading_1():
block = {
"type": "heading_1",
"heading_1": {
"rich_text": [
{
"plain_text": "Heading Test",
"annotations": {
"bold": True,
"italic": False,
"code": False,
"strikethrough": False,
"underline": False,
"color": "default",
},
"text": {"content": "Heading Test", "link": None},
"type": "text",
}
],
"color": "default",
},
}
converter = BlockToMarkdownConverter(context=None)
result = await converter.convert_block(block)
expected = "# **Heading Test** \n"
assert result == expected
@pytest.mark.asyncio
async def test_convert_bulleted_list_item():
block = {
"type": "bulleted_list_item",
"bulleted_list_item": {
"rich_text": [
{
"plain_text": "list item",
"annotations": {
"bold": False,
"italic": False,
"code": False,
"strikethrough": False,
"underline": False,
"color": "default",
},
"text": {"content": "list item", "link": None},
"type": "text",
}
],
"color": "default",
},
}
converter = BlockToMarkdownConverter(context=None)
result = await converter.convert_block(block)
expected = "- list item \n"
assert result == expected
@pytest.mark.asyncio
async def test_convert_equation():
block = {"type": "equation", "equation": {"expression": "x+1=2"}}
converter = BlockToMarkdownConverter(context=None)
result = await converter.convert_block(block)
expected = "$$ x+1=2 $$ \n"
assert result == expected
@pytest.mark.asyncio
async def test_convert_child_page(monkeypatch):
block = {
"type": "child_page",
"id": "child123",
"child_page": {
"rich_text": [
{
"plain_text": "Child Title",
"annotations": {
"bold": False,
"italic": False,
"code": False,
"strikethrough": False,
"underline": False,
"color": "default",
},
"text": {"content": "Child Title", "link": None},
"type": "text",
}
],
"title": "Child Title",
},
}
async def fake_get_page_url(context, block_id):
return f"http://example.com/{block_id}"
monkeypatch.setattr("arcade_notion.block_to_markdown_converter.get_page_url", fake_get_page_url)
converter = BlockToMarkdownConverter(context=None)
result = await converter.convert_block(block)
expected = "[Child Title](http://example.com/child123) \n"
assert result == expected
@pytest.mark.asyncio
async def test_fallback_plaintext():
block = {
"type": "unsupported-type",
"unsupported-type": {
"rich_text": [
{
"plain_text": "Fallback text",
"annotations": {
"bold": False,
"italic": False,
"code": False,
"strikethrough": False,
"underline": False,
"color": "default",
},
"text": {"content": "Fallback text", "link": None},
"type": "text",
}
]
},
}
converter = BlockToMarkdownConverter(context=None)
result = await converter.convert_block(block)
expected = "Fallback text"
assert result == expected

View file

@ -0,0 +1,169 @@
import pytest
# Simulates a single block with no children
fake_get_next_page_simple = (
{
"results": [
{
"object": "block",
"id": "block1",
"has_children": False,
"type": "paragraph",
"paragraph": {
"rich_text": [
{
"plain_text": "Hello World",
"type": "text",
"text": {"content": "Hello World", "link": None},
"annotations": {
"bold": False,
"italic": False,
"underline": False,
"strikethrough": False,
"code": False,
"color": "default",
},
"href": None,
}
]
},
}
]
},
False,
None,
)
# Simulates a parent block with a child block
fake_get_next_page_nested = (
{
"results": [
{
"object": "block",
"id": "parent_block",
"has_children": True,
"type": "paragraph",
"paragraph": {
"rich_text": [
{
"plain_text": "Parent Block",
"type": "text",
"text": {"content": "Parent Block", "link": None},
"annotations": {
"bold": False,
"italic": False,
"underline": False,
"strikethrough": False,
"code": False,
"color": "default",
},
"href": None,
}
]
},
}
]
},
False,
None,
)
fake_get_next_page_parent_block = (
{
"results": [
{
"object": "block",
"id": "child_block",
"has_children": False,
"type": "paragraph",
"paragraph": {
"rich_text": [
{
"plain_text": "Child Block",
"type": "text",
"text": {"content": "Child Block", "link": None},
"annotations": {
"bold": False,
"italic": False,
"underline": False,
"strikethrough": False,
"code": False,
"color": "default",
},
"href": None,
}
]
},
}
]
},
False,
None,
)
@pytest.fixture
def setup_notion_pages(monkeypatch):
from arcade_notion.tools import pages
monkeypatch.setattr(pages, "get_headers", lambda ctx: {"Authorization": "Bearer test"})
monkeypatch.setattr(
pages, "get_url", lambda endpoint, block_id=None: f"https://dummy/{block_id}"
)
return pages
@pytest.mark.asyncio
async def test_get_page_content_by_id_simple(mock_context, monkeypatch, setup_notion_pages):
pages = setup_notion_pages
# Patch get_object_metadata to return a dummy page with title 'Test Page'
async def fake_get_object_metadata(context, object_id=None, **kwargs):
return {
"id": object_id,
"object": "page",
"properties": {"title": {"title": [{"plain_text": "Test Page"}]}},
}
monkeypatch.setattr(pages, "get_object_metadata", fake_get_object_metadata)
# Patch get_next_page to
async def fake_get_next_page(client, url, headers, params, cursor):
return fake_get_next_page_simple
monkeypatch.setattr(pages, "get_next_page", fake_get_next_page)
# Call the function under test
result = await pages.get_page_content_by_id(mock_context, "test_page_id")
expected = "# Test Page\nHello World \n"
assert result == expected
@pytest.mark.asyncio
async def test_get_page_content_by_id_nested(mock_context, monkeypatch, setup_notion_pages):
pages = setup_notion_pages
# Patch get_object_metadata to return a dummy page with title 'Test Nested'
async def fake_get_object_metadata(context, object_id=None, **kwargs):
return {
"id": object_id,
"object": "page",
"properties": {"title": {"title": [{"plain_text": "Test Nested"}]}},
}
monkeypatch.setattr(pages, "get_object_metadata", fake_get_object_metadata)
# Patch get_next_page
async def fake_get_next_page(client, url, headers, params, cursor):
if url == "https://dummy/test_nested":
return fake_get_next_page_nested
elif url == "https://dummy/parent_block":
return fake_get_next_page_parent_block
return ({"results": []}, False, None)
monkeypatch.setattr(pages, "get_next_page", fake_get_next_page)
# Call the function under test
result = await pages.get_page_content_by_id(mock_context, "test_nested")
expected = "# Test Nested\nParent Block \n Child Block \n"
assert result == expected

View file

@ -0,0 +1,6 @@
import pytest
@pytest.mark.asyncio
async def test_search_by_title() -> None:
pass

View file

@ -0,0 +1,86 @@
import pytest
from arcade_notion.utils import simplify_search_result
@pytest.mark.parametrize(
"item, expected_title",
[
# Case 1: Database object with top-level "title"
(
{
"id": "db1",
"object": "database",
"title": [{"plain_text": "Database Title"}],
"parent": {"type": "workspace", "workspace": True},
"created_time": "2021-01-01T00:00:00.000Z",
"last_edited_time": "2021-01-02T00:00:00.000Z",
"url": "https://notion.so/database/db1",
"public_url": "https://notion.so/database/public/db1",
},
"Database Title",
),
# Case 2: Page object with properties "Title"
(
{
"id": "page1",
"object": "page",
"properties": {
"Income Item": {
"id": "title",
"title": [
{
"annotations": {
"bold": False,
"code": False,
"color": "default",
"italic": False,
"strikethrough": False,
"underline": False,
},
"href": None,
"plain_text": "Page title with database parent",
"text": {
"content": "Page title with database parent",
"link": None,
},
"type": "text",
}
],
"type": "title",
},
},
"parent": {"database_id": "db1"},
"created_time": "2021-01-03T00:00:00.000Z",
"last_edited_time": "2021-01-04T00:00:00.000Z",
"url": "https://notion.so/page/page1",
"public_url": "https://notion.so/page/public/page1",
},
"Page title with database parent",
),
# Case 3: Page object with properties "title"
(
{
"id": "page2",
"object": "page",
"properties": {"title": {"title": [{"plain_text": "Page Title from title prop"}]}},
"parent": {"page_id": "parent_id"},
"created_time": "2021-01-05T00:00:00.000Z",
"last_edited_time": "2021-01-06T00:00:00.000Z",
"url": "https://notion.so/page/page2",
"public_url": "https://notion.so/page/public/page2",
},
"Page Title from title prop",
),
],
)
def test_simplify_search_result(item, expected_title):
simplified = simplify_search_result(item)
assert simplified["title"] == expected_title
assert simplified["id"] == item.get("id")
assert simplified["object"] == item.get("object")
assert simplified["parent"] == item.get("parent")
assert simplified["created_time"] == item.get("created_time")
assert simplified["last_edited_time"] == item.get("last_edited_time")
assert simplified["url"] == item.get("url")
assert simplified["public_url"] == item.get("public_url")