56 lines
2.1 KiB
Python
56 lines
2.1 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Literal
|
|
|
|
|
|
@dataclass
|
|
class ModelSettings:
|
|
"""Settings to use when calling an LLM.
|
|
|
|
This class holds optional model configuration parameters (e.g. temperature,
|
|
top_p, penalties, truncation, etc.).
|
|
|
|
Not all models/providers support all of these parameters, so please check the API documentation
|
|
for the specific model and provider you are using.
|
|
"""
|
|
|
|
temperature: float | None = None
|
|
"""The temperature to use when calling the model."""
|
|
|
|
top_p: float | None = None
|
|
"""The top_p to use when calling the model."""
|
|
|
|
frequency_penalty: float | None = None
|
|
"""The frequency penalty to use when calling the model."""
|
|
|
|
presence_penalty: float | None = None
|
|
"""The presence penalty to use when calling the model."""
|
|
|
|
tool_choice: Literal["auto", "required", "none"] | str | None = None
|
|
"""The tool choice to use when calling the model."""
|
|
|
|
parallel_tool_calls: bool | None = False
|
|
"""Whether to use parallel tool calls when calling the model."""
|
|
|
|
truncation: Literal["auto", "disabled"] | None = None
|
|
"""The truncation strategy to use when calling the model."""
|
|
|
|
max_tokens: int | None = None
|
|
"""The maximum number of output tokens to generate."""
|
|
|
|
def resolve(self, override: ModelSettings | None) -> ModelSettings:
|
|
"""Produce a new ModelSettings by overlaying any non-None values from the
|
|
override on top of this instance."""
|
|
if override is None:
|
|
return self
|
|
return ModelSettings(
|
|
temperature=override.temperature or self.temperature,
|
|
top_p=override.top_p or self.top_p,
|
|
frequency_penalty=override.frequency_penalty or self.frequency_penalty,
|
|
presence_penalty=override.presence_penalty or self.presence_penalty,
|
|
tool_choice=override.tool_choice or self.tool_choice,
|
|
parallel_tool_calls=override.parallel_tool_calls or self.parallel_tool_calls,
|
|
truncation=override.truncation or self.truncation,
|
|
max_tokens=override.max_tokens or self.max_tokens,
|
|
)
|