arcade-mcp/libs/arcade-cli/arcade_cli/toolkit_docs/docs_builder.py

import json
import os
import pprint
from enum import Enum
from typing import Any, Callable, cast

import openai
from arcade_core import auth as auth_module
from arcade_core.schema import (
    ToolAuthRequirement,
    ToolDefinition,
    ToolInput,
    ToolSecretRequirement,
)

from arcade_cli.toolkit_docs.templates import (
    ENUM_ITEM,
    ENUM_MDX,
    ENUM_VALUE,
    GENERIC_PROVIDER_CONFIG,
    TABBED_EXAMPLES_LIST,
    TABLE_OF_CONTENTS,
    TABLE_OF_CONTENTS_ITEM,
    TOOL_CALL_EXAMPLE_JS,
    TOOL_CALL_EXAMPLE_PY,
    TOOL_PARAMETER,
    TOOL_SPEC,
    TOOL_SPEC_SECRETS,
    TOOLKIT_FOOTER,
    TOOLKIT_FOOTER_OAUTH2,
    TOOLKIT_HEADER,
    TOOLKIT_PAGE,
    WELL_KNOWN_PROVIDER_CONFIG,
)
from arcade_cli.toolkit_docs.utils import (
    clean_fully_qualified_name,
    find_enum_by_options,
    get_toolkit_auth_type,
    is_well_known_provider,
    pascal_to_snake_case,
)


def build_toolkit_mdx_path(docs_section: str, docs_root_dir: str, toolkit_name: str) -> str:
    return os.path.join(
        docs_root_dir,
        "pages",
        "toolkits",
        docs_section,
        f"{toolkit_name.lower()}.mdx",
    )


def build_reference_mdx_path(docs_section: str, docs_root_dir: str, toolkit_name: str) -> str:
    return os.path.join(
        docs_root_dir,
        "pages",
        "toolkits",
        docs_section,
        toolkit_name.lower(),
        "reference.mdx",
    )


def build_example_path(example_filename: str, docs_root_dir: str, toolkit_name: str) -> str:
    return os.path.join(
        docs_root_dir,
        "public",
        "examples",
        "integrations",
        "toolkits",
        toolkit_name.lower(),
        example_filename,
    )


def build_toolkit_mdx(
    toolkit_dir: str,
    tools: list[ToolDefinition],
    docs_section: str,
    enums: dict[str, type[Enum]],
    pip_package_name: str,
    openai_model: str,
    toolkit_header_template: str = TOOLKIT_HEADER,
    toolkit_page_template: str = TOOLKIT_PAGE,
) -> tuple[str, str]:
    sample_tool = tools[0]
    toolkit_name = sample_tool.toolkit.name
    toolkit_version = sample_tool.toolkit.version
    auth_type = get_toolkit_auth_type(sample_tool.requirements.authorization)
    toolkit_dirname = os.path.basename(os.path.dirname(toolkit_dir))

    header = toolkit_header_template.format(
        toolkit_title=toolkit_name,
        description=generate_toolkit_description(
            toolkit_name,
            [(tool.name, tool.description) for tool in tools],
            openai_model,
        ),
        pip_package_name=pip_package_name,
        toolkit_dirname=toolkit_dirname,
        auth_type=auth_type,
        version=toolkit_version,
    )
    table_of_contents = build_table_of_contents(tools)
    footer = build_footer(toolkit_name, pip_package_name, sample_tool.requirements.authorization)
    referenced_enums, tools_specs = build_tools_specs(tools, docs_section, enums)
    reference_mdx = build_reference_mdx(toolkit_name, referenced_enums) if referenced_enums else ""

    return reference_mdx, toolkit_page_template.format(
        header=header,
        table_of_contents=table_of_contents,
        tools_specs=tools_specs,
        footer=footer,
    )


def build_reference_mdx(
    toolkit_name: str,
    referenced_enums: list[tuple[str, type[Enum]]],
    enum_item_template: str = ENUM_ITEM,
    enum_value_template: str = ENUM_VALUE,
    enum_mdx_template: str = ENUM_MDX,
) -> str:
    enum_items = ""

    for enum_name, enum_class in referenced_enums:
        enum_items += enum_item_template.format(
            enum_name=enum_name,
            enum_values=build_enum_values(
                enum_class=enum_class,
                enum_value_template=enum_value_template,
            ),
        )

    return enum_mdx_template.format(
        toolkit_name=toolkit_name,
        enum_items=enum_items,
    )


def build_enum_values(
    enum_class: type[Enum],
    enum_value_template: str = ENUM_VALUE,
) -> str:
    enum_values = ""
    for enum_member in enum_class:
        enum_values += (
            enum_value_template.format(
                enum_option_name=enum_member.name,
                enum_option_value=enum_member.value,
            )
            + "\n"
        )
    return enum_values


def build_table_of_contents(
    tools: list[ToolDefinition],
    table_of_contents_item_template: str = TABLE_OF_CONTENTS_ITEM,
    table_of_contents_template: str = TABLE_OF_CONTENTS,
) -> str:
    tools_items = ""

    for tool in tools:
        tools_items += table_of_contents_item_template.format(
            tool_fully_qualified_name=clean_fully_qualified_name(tool.fully_qualified_name),
            description=tool.description.split("\n")[0],
        )

    return table_of_contents_template.format(tool_items=tools_items)


def build_footer(
    toolkit_name: str,
    pip_package_name: str,
    authorization: ToolAuthRequirement | None,
    footer_template: str = TOOLKIT_FOOTER,
    oauth2_footer_template: str = TOOLKIT_FOOTER_OAUTH2,
    well_known_provider_config_template: str = WELL_KNOWN_PROVIDER_CONFIG,
    generic_provider_config_template: str = GENERIC_PROVIDER_CONFIG,
) -> str:
    if authorization and authorization.provider_type == "oauth2" and authorization.provider_id:
        is_well_known = is_well_known_provider(
            provider_id=authorization.provider_id,
            auth_module=auth_module,
        )
        config_template = (
            well_known_provider_config_template
            if is_well_known
            else generic_provider_config_template
        )
        provider_configuration = config_template.format(
            toolkit_name=toolkit_name,
            provider_id=authorization.provider_id,
            provider_name=authorization.provider_id.capitalize(),
        )

        return oauth2_footer_template.format(
            pip_package_name=pip_package_name,
            provider_configuration=provider_configuration,
        )
    return footer_template.format(toolkit_name=toolkit_name, pip_package_name=pip_package_name)


def build_tools_specs(
    tools: list[ToolDefinition],
    docs_section: str,
    enums: dict[str, type[Enum]],
    tool_spec_template: str = TOOL_SPEC,
    tool_parameter_template: str = TOOL_PARAMETER,
    tool_spec_secrets_template: str = TOOL_SPEC_SECRETS,
) -> tuple[list[tuple[str, type[Enum]]], str]:
    tools_specs = ""
    referenced_enums = []
    for tool in tools:
        tool_referenced_enums, tool_spec = build_tool_spec(
            tool=tool,
            docs_section=docs_section,
            enums=enums,
            tool_spec_template=tool_spec_template,
            tool_parameter_template=tool_parameter_template,
            tool_spec_secrets_template=tool_spec_secrets_template,
        )
        tools_specs += tool_spec
        referenced_enums.extend(tool_referenced_enums)

    return referenced_enums, tools_specs


def build_tool_spec(
    tool: ToolDefinition,
    docs_section: str,
    enums: dict[str, type[Enum]],
    tool_spec_template: str = TOOL_SPEC,
    tool_parameter_template: str = TOOL_PARAMETER,
    tool_spec_secrets_template: str = TOOL_SPEC_SECRETS,
) -> tuple[list[tuple[str, type[Enum]]], str]:
    tabbed_examples_list = TABBED_EXAMPLES_LIST.format(
        toolkit_name=tool.toolkit.name.lower(),
        tool_name=pascal_to_snake_case(tool.name),
    )
    referenced_enums, parameters = build_tool_parameters(
        tool_input=tool.input,
        docs_section=docs_section,
        toolkit_name=tool.toolkit.name.lower(),
        enums=enums,
        tool_parameter_template=tool_parameter_template,
    )

    secrets = (
        build_tool_secrets(
            secrets=tool.requirements.secrets,
            template=tool_spec_secrets_template,
        )
        if tool.requirements.secrets
        else ""
    )

    return referenced_enums, tool_spec_template.format(
        tool_fully_qualified_name=clean_fully_qualified_name(tool.fully_qualified_name),
        tabbed_examples_list=tabbed_examples_list,
        description=tool.description.split("\n")[0],
        parameters=parameters,
        secrets=secrets,
    )


def build_tool_secrets(
    secrets: list[ToolSecretRequirement],
    template: str = TOOL_SPEC_SECRETS,
) -> str:
    if not secrets:
        return ""
    secret_keys_str = "`, `".join([secret.key for secret in secrets])
    return template.format(secrets=f"`{secret_keys_str}`")


def build_tool_parameters(
    tool_input: ToolInput,
    docs_section: str,
    toolkit_name: str,
    enums: dict[str, type[Enum]],
    tool_parameter_template: str = TOOL_PARAMETER,
) -> tuple[list[tuple[str, type[Enum]]], str]:
    referenced_enums = []
    parameters = ""
    for parameter in tool_input.parameters:
        schema = parameter.value_schema
        if schema.enum:
            enum_name, enum_class = find_enum_by_options(enums, schema.enum)
            referenced_enums.append((enum_name, enum_class))
            param_definition = f"`Enum` [{enum_name}](/toolkits/{docs_section}/{toolkit_name}/reference#{enum_name})"
        else:
            if schema.inner_val_type:
                param_definition = f"`{schema.val_type}[{schema.inner_val_type}]`"
            else:
                param_definition = f"`{schema.val_type}`"

        if parameter.required:
            param_definition += ", required"
        else:
            param_definition += ", optional"

        parameters += (
            tool_parameter_template.format(
                param_name=parameter.name,
                definition=param_definition,
                description=parameter.description,
            )
            + "\n"
        )

    return referenced_enums, parameters


def build_examples(
    print_debug: Callable,
    tools: list[ToolDefinition],
    openai_model: str,
) -> list[tuple[str, str]]:
    examples = []
    for tool in tools:
        print_debug(f"Generating tool-call examples for {tool.name}")
        input_map = generate_tool_input_map(tool, openai_model)
        fully_qualified_name = tool.fully_qualified_name.split("@")[0]
        examples.append((
            f"{pascal_to_snake_case(tool.name)}_example_call_tool.py",
            build_python_example(fully_qualified_name, input_map),
        ))
        examples.append((
            f"{pascal_to_snake_case(tool.name)}_example_call_tool.js",
            build_javascript_example(fully_qualified_name, input_map),
        ))
    return examples


def build_python_example(
    tool_fully_qualified_name: str,
    input_map: dict[str, Any],
    template: str = TOOL_CALL_EXAMPLE_PY,
) -> str:
    input_map_str = pprint.pformat(
        input_map,
        indent=4,
        width=100,
        compact=False,
        sort_dicts=False,
    )
    input_map_str = "{\n    " + input_map_str.lstrip("{   ").rstrip("}") + "\n}"  # noqa: B005
    return template.format(
        tool_fully_qualified_name=tool_fully_qualified_name,
        input_map=input_map_str,
    )


def build_javascript_example(
    tool_fully_qualified_name: str,
    input_map: dict,
    template: str = TOOL_CALL_EXAMPLE_JS,
) -> str:
    return template.format(
        tool_fully_qualified_name=tool_fully_qualified_name,
        input_map=json.dumps(input_map, indent=2, ensure_ascii=False),
    )


def generate_toolkit_description(
    toolkit_name: str,
    tools: list[tuple[str, str]],
    openai_model: str,
) -> str:
    response = openai.chat.completions.create(
        model=openai_model,
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a helpful assistant. "
                    "When given a toolkit name and a list of tools, you will generate a "
                    "short, yet descriptive of the toolkit and the main actions a user "
                    "or LLM can perform with it.\n\n"
                    "As an example, here is the Asana toolkit description:\n\n"
                    "The Arcade Asana toolkit provides a pre-built set of tools for "
                    "interacting with Asana. These tools make it easy to build agents "
                    "and AI apps that can:\n\n"
                    "- Manage teams, projects, and workspaces.\n"
                    "- Create, update, and search for tasks.\n"
                    "- Retrieve data about tasks, projects, workspaces, users, etc.\n"
                    "- Manage task attachments.\n\n"
                    "And here is a JSON string with the list of tools in the Asana toolkit:\n\n"
                    "```json\n\n"
                    '[["AttachFileToTask", "Attaches a file to an Asana task\n\nProvide exactly '
                    "one of file_content_str, file_content_base64, or file_content_url, never "
                    "more\nthan one.\n\n- Use file_content_str for text files (will be encoded "
                    "using file_encoding)\n- Use file_content_base64 for binary files like images, "
                    'PDFs, etc.\n- Use file_content_url if the file is hosted on an external URL"], '
                    '["CreateTag", "Create a tag in Asana"], ["CreateTask", "Creates a task in '
                    "Asana\n\nThe task must be associated to at least one of the following: "
                    "parent_task_id, project, or\nworkspace_id. If none of these are provided and "
                    "the account has only one workspace, the task\nwill be associated to that "
                    "workspace. If the account has multiple workspaces, an error will\nbe raised "
                    'with a list of available workspaces."], ["GetProjectById", "Get an Asana '
                    'project by its ID"], ["GetSubtasksFromATask", "Get the subtasks of a task"], '
                    '["GetTagById", "Get an Asana tag by its ID"], ["GetTaskById", "Get a task by '
                    'its ID"], ["GetTasksWithoutId", "Search for tasks"], ["GetTeamById", "Get an '
                    'Asana team by its ID"], ["GetUserById", "Get a user by ID"], ["GetWorkspaceById", '
                    '"Get an Asana workspace by its ID"], ["ListProjects", "List projects in Asana"], '
                    '["ListTags", "List tags in an Asana workspace"], ["ListTeams", "List teams in '
                    'an Asana workspace"], ["ListTeamsTheCurrentUserIsAMemberOf", "List teams in '
                    'Asana that the current user is a member of"], ["ListUsers", "List users in '
                    'Asana"], ["ListWorkspaces", "List workspaces in Asana that are visible to the '
                    'authenticated user"], ["MarkTaskAsCompleted", "Mark a task in Asana as '
                    'completed"], ["UpdateTask", "Updates a task in Asana"]]\n\n```\n\n'
                    "Keep the description concise and to the point. The user will provide you with "
                    "the toolkit name and the list of tools. Generate the description according to "
                    "the instructions above."
                ),
            },
            {
                "role": "user",
                "content": (
                    f"The toolkit name is {toolkit_name} and the list of tools is:\n\n"
                    "```json\n\n"
                    f"{json.dumps(tools, ensure_ascii=False)}\n\n"
                    "```\n\n"
                    "Please generate a description for the toolkit."
                ),
            },
        ],
        temperature=0.0,
        max_tokens=2048,
    )

    response_str = cast(str, response.choices[0].message.content)
    return response_str.strip()


def generate_tool_input_map(
    tool: ToolDefinition,
    openai_model: str,
    retries: int = 0,
    max_retries: int = 3,
) -> dict[str, Any]:
    interface_signature = build_tool_interface_signature(tool)
    response = openai.chat.completions.create(
        model=openai_model,
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a helpful assistant expert in generating data for documenting "
                    "sample scripts to calling tools. A tool is a function that is used in "
                    "context of LLM tool-calling / function-calling.\n\n"
                    "When given a tool signature with typed arguments, "
                    "you must return exactly one JSON object (no markdown, no extra text) "
                    "where each key is an argument name, and each value is a sample value "
                    "for that argument that would make sense in a sample script to showcase "
                    "human software engineers how the tool may be called. Generate the "
                    "argument sample value based on its name and description\n\n"
                    "Not every single argument must always be present in the input map. "
                    "In some cases, the tool may require only one of two arguments to be "
                    "provided, for example. In such cases, an indication will be present "
                    "either/or in the tool description or the argument description. "
                    "Always follow such instructions when present in the tool interface.\n\n"
                    "Keep argument values as short as possible. Values don't have to always "
                    "be valid. For instance, for file content base64-encoded arguments, "
                    "you can use a short text or a placeholder like `[file_content]`, it is "
                    "not necessary that the value is a valid base64-encoded string.\n\n"
                    "Remember that you MUST RESPOND ONLY WITH A VALID JSON STRING, NO ADDED "
                    "TEXT. Your response will be json.load'ed, so it must be a valid JSON "
                    "string."
                ),
            },
            {
                "role": "user",
                "content": (
                    "Here is a tool interface:\n\n"
                    f"{interface_signature}\n\n"
                    "Please provide a sample input map as a JSON object."
                ),
            },
        ],
        temperature=0.0,
        max_tokens=1024,
        stop=["\n\n"],
    )

    response_str = cast(str, response.choices[0].message.content)
    text = response_str.strip()

    try:
        return cast(dict[str, Any], json.loads(text))
    except json.JSONDecodeError:
        if retries < max_retries:
            return generate_tool_input_map(tool, openai_model, retries + 1, max_retries)
        raise ValueError(f"Failed to generate input map for tool {tool.name}: {text}")


def build_tool_interface_signature(tool: ToolDefinition) -> str:
    args = []
    for arg in tool.input.parameters:
        data: dict[str, Any] = {
            "arg_name": arg.name,
            "arg_description": arg.description,
            "is_arg_required": arg.required,
            "arg_type": arg.value_schema.val_type,
        }

        if arg.value_schema.enum:
            data["enum"] = {
                "accepted_values": arg.value_schema.enum,
            }

        args.append(data)

    return json.dumps({
        "tool_name": tool.name,
        "tool_description": tool.description,
        "tool_args": args,
    })