arcade-mcp/libs/arcade-cli/arcade_cli/display.py

from typing import TYPE_CHECKING, Any

from arcade_core.schema import ToolDefinition
from rich.console import Console
from rich.panel import Panel
from rich.table import Table
from rich.text import Text

if TYPE_CHECKING:
    from arcade_evals.eval import EvaluationResult
console = Console()


def display_tools_table(tools: list[ToolDefinition]) -> None:
    """
    Display a table of tools with their name, description, package, and version.
    """
    if not tools:
        console.print("No tools found.", style="bold")
        return

    table = Table(show_header=True, header_style="bold magenta")
    table.add_column("Name")
    table.add_column("Description")
    table.add_column("Package")
    table.add_column("Version")

    for tool in sorted(tools, key=lambda x: x.toolkit.name):
        table.add_row(
            str(tool.get_fully_qualified_name()),
            tool.description.split("\n")[0] if tool.description else "",
            tool.toolkit.name,
            tool.toolkit.version,
        )
    console.print(f"Found {len(tools)} tools.")
    console.print(table)


def display_tool_details(tool: ToolDefinition) -> None:
    """
    Display detailed information about a specific tool using multiple panels.
    """
    # Description Panel
    description_panel = Panel(
        tool.description or "No description available.",
        title=f"Tool: {tool.name}",
        border_style="cyan",
    )

    # Inputs Panel
    inputs = tool.input.parameters
    if inputs:
        inputs_table = Table(show_header=True, header_style="bold green")
        inputs_table.add_column("Name", style="cyan")
        inputs_table.add_column("Type", style="magenta")
        inputs_table.add_column("Required", style="yellow")
        inputs_table.add_column("Description", style="white")
        inputs_table.add_column("Default", style="blue")
        for param in inputs:
            # Since InputParameter does not have a default field, we use "N/A"
            default_value = "N/A"
            if param.value_schema.enum:
                default_value = f"One of {param.value_schema.enum}"
            inputs_table.add_row(
                param.name,
                param.value_schema.val_type,
                str(param.required),
                param.description or "",
                default_value,
            )
        inputs_panel = Panel(
            inputs_table,
            title="Input Parameters",
            border_style="green",
        )
    else:
        inputs_panel = Panel(
            "No input parameters.",
            title="Input Parameters",
            border_style="green",
        )

    # Output Panel
    output = tool.output
    if output:
        output_description = output.description or "No description available."
        output_types = ", ".join(output.available_modes)
        output_val_type = output.value_schema.val_type if output.value_schema else "N/A"
        output_details = Text.assemble(
            ("Description: ", "bold"),
            (output_description, ""),
            "\n",
            ("Available Modes: ", "bold"),
            (output_types, ""),
            "\n",
            ("Value Type: ", "bold"),
            (output_val_type, ""),
        )
        output_panel = Panel(
            output_details,
            title="Expected Output",
            border_style="blue",
        )
    else:
        output_panel = Panel(
            "No output information available.",
            title="Expected Output",
            border_style="blue",
        )

    # Combine all panels vertically
    console.print(description_panel)
    console.print(inputs_panel)
    console.print(output_panel)


def display_tool_messages(tool_messages: list[dict]) -> None:
    for message in tool_messages:
        if message["role"] == "assistant":
            for tool_call in message.get("tool_calls", []):
                console.print(
                    f"[bold]Called tool '{tool_call['function']['name']}' with parameters:[/bold] {tool_call['function']['arguments']}",
                    style="dim",
                )
        elif message["role"] == "tool":
            console.print(
                f"[bold]'{message['name']}' tool returned:[/bold] {message['content']}", style="dim"
            )


def display_eval_results(results: list[list[dict[str, Any]]], show_details: bool = False) -> None:
    """
    Display evaluation results in a format inspired by pytest's output.

    Args:
        results: List of dictionaries containing evaluation results for each model.
        show_details: Whether to show detailed results for each case.
    """
    total_passed = 0
    total_failed = 0
    total_warned = 0
    total_cases = 0

    for eval_suite in results:
        for model_results in eval_suite:
            model = model_results.get("model", "Unknown Model")
            rubric = model_results.get("rubric", "Unknown Rubric")
            cases = model_results.get("cases", [])
            total_cases += len(cases)

            console.print(f"[bold]Model:[/bold] [bold magenta]{model}[/bold magenta]")
            if show_details:
                console.print(f"[bold magenta]{rubric}[/bold magenta]")

            for case in cases:
                evaluation = case["evaluation"]
                status = (
                    "[green]PASSED[/green]"
                    if evaluation.passed
                    else "[yellow]WARNED[/yellow]"
                    if evaluation.warning
                    else "[red]FAILED[/red]"
                )
                if evaluation.passed:
                    total_passed += 1
                elif evaluation.warning:
                    total_warned += 1
                else:
                    total_failed += 1

                # Display one-line summary for each case with score as a percentage
                score_percentage = evaluation.score * 100
                console.print(f"{status} {case['name']} -- Score: {score_percentage:.2f}%")

                if show_details:
                    # Show detailed information for each case
                    console.print(f"[bold]User Input:[/bold] {case['input']}\n")
                    console.print("[bold]Details:[/bold]")
                    console.print(_format_evaluation(evaluation))
                    console.print("-" * 80)

    # Summary
    summary = (
        f"[bold]Summary -- [/bold]Total: {total_cases} -- [green]Passed: {total_passed}[/green]"
    )
    if total_warned > 0:
        summary += f" -- [yellow]Warnings: {total_warned}[/yellow]"
    if total_failed > 0:
        summary += f" -- [red]Failed: {total_failed}[/red]"
    console.print(summary + "\n")


def _format_evaluation(evaluation: "EvaluationResult") -> str:
    """
    Format evaluation results with color-coded matches and scores.

    Args:
        evaluation: An EvaluationResult object containing the evaluation results.

    Returns:
        A formatted string representation of the evaluation details.
    """
    result_lines = []
    if evaluation.failure_reason:
        result_lines.append(f"[bold red]Failure Reason:[/bold red] {evaluation.failure_reason}")
    else:
        for critic_result in evaluation.results:
            is_criticized = critic_result.get("is_criticized", True)
            match_color = (
                "yellow" if not is_criticized else "green" if critic_result["match"] else "red"
            )
            field = critic_result["field"]
            score = critic_result["score"]
            weight = critic_result["weight"]
            expected = critic_result["expected"]
            actual = critic_result["actual"]

            if is_criticized:
                result_lines.append(
                    f"[bold]{field}:[/bold] "
                    f"[{match_color}]Match: {critic_result['match']}"
                    f"\n     Score: {score:.2f}/{weight:.2f}[/{match_color}]"
                    f"\n     Expected: {expected}"
                    f"\n     Actual: {actual}"
                )
            else:
                result_lines.append(
                    f"[bold]{field}:[/bold] "
                    f"[{match_color}]Un-criticized[/{match_color}]"
                    f"\n     Expected: {expected}"
                    f"\n     Actual: {actual}"
                )
    return "\n".join(result_lines)


def display_arcade_chat_header(base_url: str, stream: bool) -> None:
    chat_header = Text.assemble(
        "\n",
        (
            "=== Arcade Chat ===",
            "bold magenta underline",
        ),
        "\n",
        "\n",
        "Chatting with Arcade Engine at ",
        (
            base_url,
            "bold blue",
        ),
    )
    if stream:
        chat_header.append(" (streaming)")
    console.print(chat_header)