From 7d9354b4b4e5791fc7736a65afdbf132da089de5 Mon Sep 17 00:00:00 2001 From: Sam Partee Date: Wed, 25 Sep 2024 07:35:25 -0700 Subject: [PATCH] Address alpha release tweaks and bugs (#62) # Address Alpha Release Tweaks and Bugs This PR addresses several issues and tweaks identified during the alpha release: - **Ensure `~/.arcade` directory exists before writing the config file** In `arcade/cli/authn.py`, added code to create the `~/.arcade` directory if it doesn't exist. This prevents errors when writing the configuration file during the login process. - **Fix retry logic in process management** In `arcade/cli/launcher.py`, corrected an off-by-one error in the retry logic within the `_manage_processes` function. This ensures that the process management behaves as expected when retries are exhausted. - **Allow passing environment variables to the engine process** (technically this option isn't exposed yet) Updated the `start_servers`, `_manage_processes`, and `_start_process` functions in `arcade/cli/launcher.py` to accept an `engine_env` parameter. This allows custom environment variables to be set for the engine process. Also, set `GIN_MODE` to `"release"` by default. - **Handle cases with no critics in evaluations** Modified the `EvalCase` class in `arcade/sdk/eval/eval.py` to handle scenarios where no critics are provided. This avoids potential errors during the evaluation process when critics are absent. Should add a test for this. - **Adjust dependencies in `pyproject.toml`** - Moved `uvicorn` to be an optional dependency and included it in the `fastapi` extra. - Removed unnecessary development dependencies (`mkdocs`, `mkdocs-material`, `mkdocstrings`). - Ensured that `uvicorn` is updated to version `^0.30.0`. --------- Co-authored-by: Nate Barbettini --- arcade/arcade/cli/authn.py | 5 +++++ arcade/arcade/cli/launcher.py | 28 +++++++++++++++++++------ arcade/arcade/cli/main.py | 3 ++- arcade/arcade/sdk/eval/eval.py | 37 +++++++++++++++++++++++++--------- arcade/pyproject.toml | 7 ++----- 5 files changed, 58 insertions(+), 22 deletions(-) diff --git a/arcade/arcade/cli/authn.py b/arcade/arcade/cli/authn.py index 68a27c87..201c5a27 100644 --- a/arcade/arcade/cli/authn.py +++ b/arcade/arcade/cli/authn.py @@ -55,6 +55,11 @@ class LoginCallbackHandler(BaseHTTPRequestHandler): ) return False + # ensure the ~/.arcade directory exists + # TODO: this should use WORK_DIR from env if set + if not os.path.exists(os.path.expanduser("~/.arcade")): + os.makedirs(os.path.expanduser("~/.arcade"), exist_ok=True) + # TODO don't overwrite existing config config_file_path = os.path.expanduser("~/.arcade/arcade.toml") new_config = {"api": {"key": api_key}, "user": {"email": email}} diff --git a/arcade/arcade/cli/launcher.py b/arcade/arcade/cli/launcher.py index 180a44a1..1e2f6bb3 100644 --- a/arcade/arcade/cli/launcher.py +++ b/arcade/arcade/cli/launcher.py @@ -21,6 +21,7 @@ def start_servers( host: str, port: int, engine_config: str | None, + engine_env: dict[str, str] | None = None, ) -> None: """ Start the actor and engine servers. @@ -42,7 +43,7 @@ def start_servers( engine_cmd = _build_engine_command(engine_config) # Start and manage the processes - _manage_processes(actor_cmd, engine_cmd) + _manage_processes(actor_cmd, engine_cmd, engine_env) def _validate_host(host: str) -> str: @@ -177,13 +178,16 @@ def _build_engine_command(engine_config: str) -> list[str]: return cmd -def _manage_processes(actor_cmd: list[str], engine_cmd: list[str]) -> None: +def _manage_processes( + actor_cmd: list[str], engine_cmd: list[str], engine_env: dict[str, str] | None = None +) -> None: """ Manages the lifecycle of the actor and engine processes. Args: actor_cmd: The command to start the actor server. engine_cmd: The command to start the engine. + engine_env: Environment variables to set for the engine. """ actor_process: subprocess.Popen | None = None engine_process: subprocess.Popen | None = None @@ -211,7 +215,7 @@ def _manage_processes(actor_cmd: list[str], engine_cmd: list[str]) -> None: # Start the engine console.print("Starting engine...", style="bold green") - engine_process = _start_process("Engine", engine_cmd) + engine_process = _start_process("Engine", engine_cmd, engine_env) # Monitor processes _monitor_processes(actor_process, engine_process) @@ -222,8 +226,8 @@ def _manage_processes(actor_cmd: list[str], engine_cmd: list[str]) -> None: f"Processes exited. Retry {retry_count} of {max_retries}.", style="bold yellow" ) - if retry_count > max_retries: - console.print(f"❌ Exiting after {retry_count - 1} retries", style="bold red") + if retry_count >= max_retries: + console.print(f"❌ Exiting after {max_retries} retries", style="bold red") terminate_processes(exit_program=True) break # Exit the loop @@ -243,13 +247,16 @@ def _manage_processes(actor_cmd: list[str], engine_cmd: list[str]) -> None: sys.exit(1) -def _start_process(name: str, cmd: list[str]) -> subprocess.Popen: +def _start_process( + name: str, cmd: list[str], env: dict[str, str] | None = None +) -> subprocess.Popen: """ Starts a subprocess and begins streaming its output. Args: name: Name of the process. cmd: Command to execute. + env: Environment variables to set for the process. Returns: The subprocess.Popen object. @@ -257,9 +264,17 @@ def _start_process(name: str, cmd: list[str]) -> subprocess.Popen: Raises: RuntimeError: If the process fails to start. """ + _env = os.environ.copy() + if env: + _env.update(env) + + # TODO temporary fix for GIN_MODE + _env["GIN_MODE"] = "release" + try: process = subprocess.Popen( # noqa: S603, RUF100 cmd, + env=_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, @@ -302,6 +317,7 @@ def _monitor_processes(actor_process: subprocess.Popen, engine_process: subproce actor_process: The actor subprocess. engine_process: The engine subprocess. """ + while True: actor_status = actor_process.poll() engine_status = engine_process.poll() diff --git a/arcade/arcade/cli/main.py b/arcade/arcade/cli/main.py index 4967faae..b8624ffc 100644 --- a/arcade/arcade/cli/main.py +++ b/arcade/arcade/cli/main.py @@ -529,8 +529,9 @@ def up( Start both the actor and engine servers. """ try: + # TODO: pass Engine env vars from here start_servers(host, port, engine_config) except Exception as e: error_message = f"❌ Failed to start servers: {escape(str(e))}" console.print(error_message, style="bold red") - raise typer.Exit(code=1) + typer.Exit(code=1) diff --git a/arcade/arcade/sdk/eval/eval.py b/arcade/arcade/sdk/eval/eval.py index 167c34c2..28cff3c4 100644 --- a/arcade/arcade/sdk/eval/eval.py +++ b/arcade/arcade/sdk/eval/eval.py @@ -162,12 +162,16 @@ class EvalCase: system_message: str user_message: str expected_tool_calls: list[ExpectedToolCall] - critics: list["Critic"] + critics: list["Critic"] | None = None additional_messages: list[dict[str, str]] = field(default_factory=list) rubric: EvalRubric = field(default_factory=EvalRubric) def __post_init__(self) -> None: - self._validate_critics() + if self.critics is not None: + self._validate_critics() + else: + # if no critics are provided, set to empty list + self.critics = [] def _validate_critics(self) -> None: """ @@ -176,6 +180,9 @@ class EvalCase: Raises: WeightError: If the sum of critic weights exceeds 1.0. """ + if not self.critics: + return + total_weight = sum(critic.weight for critic in self.critics) if total_weight > 1.0: raise WeightError(f"Sum of critic weights must not exceed 1.0, got {total_weight}") @@ -252,6 +259,15 @@ class EvalCase: evaluation_result.failure_reason = f"Tool selection mismatch. Expected tools: {expected_tools}, but got: {actual_tools}" return evaluation_result + # if no critics for tool call arguments, then return + # passing score as only tool selection and quantity is checked + if not self.critics or len(self.critics) == 0: + evaluation_result.score = 1.0 + evaluation_result.passed = True + evaluation_result.warning = False + # TODO passing reason should be added + return evaluation_result + # Create a cost matrix for the assignment problem cost_matrix = self._create_cost_matrix(actual_tool_calls) @@ -322,12 +338,13 @@ class EvalCase: if expected.name == actual_tool: score += self.rubric.tool_selection_weight - for critic in self.critics: - expected_value = expected.args.get(critic.critic_field) - actual_value = actual_args.get(critic.critic_field) - if expected_value is not None and actual_value is not None: - result = critic.evaluate(expected_value, actual_value) - score += result["score"] + if self.critics: + for critic in self.critics: + expected_value = expected.args.get(critic.critic_field) + actual_value = actual_args.get(critic.critic_field) + if expected_value is not None and actual_value is not None: + result = critic.evaluate(expected_value, actual_value) + score += result["score"] cost_matrix[i, j] = score return cost_matrix @@ -463,7 +480,7 @@ class EvalSuite: name: str, user_message: str, expected_tool_calls: list[tuple[Callable, dict[str, Any]]], - critics: list["Critic"], + critics: list["Critic"] | None = None, system_message: str | None = None, rubric: EvalRubric | None = None, additional_messages: list[dict[str, str]] | None = None, @@ -550,7 +567,7 @@ class EvalSuite: user_message=user_message, expected_tool_calls=expected, rubric=rubric or self.rubric, - critics=critics or last_case.critics.copy(), + critics=critics or (last_case.critics.copy() if last_case.critics else None), additional_messages=new_additional_messages, ) diff --git a/arcade/pyproject.toml b/arcade/pyproject.toml index 09e3d33f..66b59bff 100644 --- a/arcade/pyproject.toml +++ b/arcade/pyproject.toml @@ -23,12 +23,13 @@ openai = "^1.36.0" # TODO: relax to an earlier version that still has what we ne pyjwt = "^2.8.0" loguru = "^0.7.0" fastapi = {version = "^0.110.0", optional = true} +uvicorn = {version = "^0.30.0", optional = true} scipy = {version = "^1.14.0", optional = true} numpy = {version = "^2.0.0", optional = true} scikit-learn = {version = "^1.5.0", optional = true} [tool.poetry.extras] -fastapi = ["fastapi"] +fastapi = ["fastapi", "uvicorn"] evals = ["scipy", "numpy", "scikit-learn"] [tool.poetry.group.dev.dependencies] @@ -39,10 +40,6 @@ pre-commit = "^3.4.0" tox = "^4.11.1" pytest-asyncio = "^0.23.7" types-toml = "^0.10.8" -uvicorn = "^0.22.0" -mkdocs = ">=1.5.2" -mkdocs-material = ">=9.3.0" -mkdocstrings = {extras = ["python"], version = ">=0.23.1"} [tool.poetry.scripts]