"""Tests for evaluation result formatters."""
import json
import pytest
from arcade_cli.formatters import (
FORMATTERS,
EvalResultFormatter,
HtmlFormatter,
JsonFormatter,
MarkdownFormatter,
TextFormatter,
get_formatter,
)
class MockEvaluation:
"""Mock EvaluationResult for testing."""
def __init__(
self,
passed: bool = True,
warning: bool = False,
score: float = 1.0,
failure_reason: str | None = None,
results: list[dict] | None = None,
):
self.passed = passed
self.warning = warning
self.score = score
self.failure_reason = failure_reason
self.results = results or []
def make_mock_results(
model: str = "gpt-4o",
cases: list[dict] | None = None,
suite_name: str = "test_eval_suite",
) -> list[list[dict]]:
"""Create mock evaluation results structure."""
if cases is None:
cases = [
{
"name": "test_case_1",
"input": "Test input 1",
"evaluation": MockEvaluation(passed=True, score=1.0),
},
{
"name": "test_case_2",
"input": "Test input 2",
"evaluation": MockEvaluation(passed=False, score=0.5),
},
]
return [[{"model": model, "suite_name": suite_name, "rubric": "Test Rubric", "cases": cases}]]
class TestGetFormatter:
"""Tests for get_formatter function."""
def test_get_text_formatter(self) -> None:
"""Should return TextFormatter for 'txt'."""
formatter = get_formatter("txt")
assert isinstance(formatter, TextFormatter)
def test_get_markdown_formatter(self) -> None:
"""Should return MarkdownFormatter for 'md'."""
formatter = get_formatter("md")
assert isinstance(formatter, MarkdownFormatter)
def test_case_insensitive(self) -> None:
"""Should be case-insensitive."""
assert isinstance(get_formatter("TXT"), TextFormatter)
assert isinstance(get_formatter("MD"), MarkdownFormatter)
def test_invalid_format_raises_error(self) -> None:
"""Should raise ValueError for unknown format."""
with pytest.raises(ValueError, match="Unsupported format"):
get_formatter("invalid")
def test_fuzzy_matching_suggests_close_match(self) -> None:
"""Should suggest 'txt' when 'txtt' is provided."""
with pytest.raises(ValueError) as excinfo:
get_formatter("txtt")
assert "Did you mean 'txt'?" in str(excinfo.value)
def test_fuzzy_matching_suggests_html_for_htm(self) -> None:
"""Should suggest 'html' when 'htm' is provided."""
with pytest.raises(ValueError) as excinfo:
get_formatter("htm")
assert "Did you mean 'html'?" in str(excinfo.value)
def test_no_suggestion_for_completely_different_format(self) -> None:
"""Should not suggest anything for completely different format names."""
with pytest.raises(ValueError) as excinfo:
get_formatter("xyz123")
assert "Did you mean" not in str(excinfo.value)
assert "Supported formats:" in str(excinfo.value)
class TestFormattersRegistry:
"""Tests for FORMATTERS registry."""
def test_registry_has_expected_formats(self) -> None:
"""Registry should contain txt and md formats."""
assert "txt" in FORMATTERS
assert "md" in FORMATTERS
def test_registry_values_are_formatter_classes(self) -> None:
"""All registry values should be EvalResultFormatter subclasses."""
for name, formatter_cls in FORMATTERS.items():
assert issubclass(formatter_cls, EvalResultFormatter), f"{name} is not a formatter"
class TestTextFormatter:
"""Tests for TextFormatter."""
def test_file_extension(self) -> None:
"""File extension should be 'txt'."""
formatter = TextFormatter()
assert formatter.file_extension == "txt"
def test_format_basic_results(self) -> None:
"""Should format basic results correctly."""
formatter = TextFormatter()
results = make_mock_results()
output = formatter.format(results)
assert "Model: gpt-4o" in output
assert "PASSED test_case_1" in output
assert "FAILED test_case_2" in output
assert "Score: 100.00%" in output
assert "Score: 50.00%" in output
assert "Summary" in output
assert "Total: 2" in output
assert "Passed: 1" in output
assert "Failed: 1" in output
def test_format_with_warnings(self) -> None:
"""Should show warnings correctly."""
cases = [
{
"name": "warned_case",
"input": "Test",
"evaluation": MockEvaluation(passed=False, warning=True, score=0.7),
}
]
formatter = TextFormatter()
output = formatter.format(make_mock_results(cases=cases))
assert "WARNED warned_case" in output
assert "Warnings: 1" in output
def test_format_with_details(self) -> None:
"""Should include detailed output when show_details=True."""
cases = [
{
"name": "detailed_case",
"input": "Detailed test input",
"evaluation": MockEvaluation(
passed=True,
score=0.9,
results=[
{
"field": "param1",
"match": True,
"score": 0.5,
"weight": 0.5,
"expected": "expected_val",
"actual": "actual_val",
"is_criticized": True,
}
],
),
}
]
formatter = TextFormatter()
output = formatter.format(make_mock_results(cases=cases), show_details=True)
assert "User Input: Detailed test input" in output
assert "Details:" in output
assert "param1:" in output
assert "Expected: expected_val" in output
assert "Actual: actual_val" in output
def test_format_failed_only_with_original_counts(self) -> None:
"""Should show original counts with failed_only mode."""
formatter = TextFormatter()
results = make_mock_results()
output = formatter.format(
results,
failed_only=True,
original_counts=(10, 8, 2, 0),
)
assert "Showing only 2 failed evaluation(s)" in output
assert "Total: 10" in output
assert "Passed: 8" in output
assert "Failed: 2" in output
class TestMarkdownFormatter:
"""Tests for MarkdownFormatter."""
def test_file_extension(self) -> None:
"""File extension should be 'md'."""
formatter = MarkdownFormatter()
assert formatter.file_extension == "md"
def test_format_has_markdown_structure(self) -> None:
"""Should produce valid markdown structure."""
formatter = MarkdownFormatter()
results = make_mock_results()
output = formatter.format(results)
# Check headers
assert "# Evaluation Results" in output
assert "## Summary" in output
assert "## Results by Model" in output
assert "### 🤖 gpt-4o" in output
# Check table markers
assert "|" in output
assert "---" in output
def test_format_summary_table(self) -> None:
"""Should include summary table with stats."""
formatter = MarkdownFormatter()
results = make_mock_results()
output = formatter.format(results)
assert "| Metric | Count |" in output
assert "| **Total** | 2 |" in output
assert "| ✅ Passed | 1 |" in output
assert "| ❌ Failed | 1 |" in output
def test_format_results_table(self) -> None:
"""Should include results table per model."""
formatter = MarkdownFormatter()
results = make_mock_results()
output = formatter.format(results)
assert "| Status | Case | Score |" in output
assert "| ✅ | test_case_1 | 100.0% |" in output
assert "| ❌ | test_case_2 | 50.0% |" in output
def test_format_with_warnings_emoji(self) -> None:
"""Should use warning emoji for warned cases."""
cases = [
{
"name": "warned_case",
"input": "Test",
"evaluation": MockEvaluation(passed=False, warning=True, score=0.7),
}
]
formatter = MarkdownFormatter()
output = formatter.format(make_mock_results(cases=cases))
assert "⚠️" in output
def test_format_with_details_collapsible(self) -> None:
"""Should use collapsible details section."""
cases = [
{
"name": "detailed_case",
"input": "Test input",
"evaluation": MockEvaluation(
passed=True,
score=0.9,
results=[
{
"field": "param1",
"match": True,
"score": 0.5,
"weight": 0.5,
"expected": "exp",
"actual": "act",
"is_criticized": True,
}
],
),
}
]
formatter = MarkdownFormatter()
output = formatter.format(make_mock_results(cases=cases), show_details=True)
assert "" in output
assert "