Add max_tokens and documentation to model settings
This commit is contained in:
parent
96913b847f
commit
fde49cfcf7
3 changed files with 22 additions and 0 deletions
|
|
@ -10,15 +10,34 @@ class ModelSettings:
|
|||
|
||||
This class holds optional model configuration parameters (e.g. temperature,
|
||||
top_p, penalties, truncation, etc.).
|
||||
|
||||
Not all models/providers support all of these parameters, so please check the API documentation
|
||||
for the specific model and provider you are using.
|
||||
"""
|
||||
|
||||
temperature: float | None = None
|
||||
"""The temperature to use when calling the model."""
|
||||
|
||||
top_p: float | None = None
|
||||
"""The top_p to use when calling the model."""
|
||||
|
||||
frequency_penalty: float | None = None
|
||||
"""The frequency penalty to use when calling the model."""
|
||||
|
||||
presence_penalty: float | None = None
|
||||
"""The presence penalty to use when calling the model."""
|
||||
|
||||
tool_choice: Literal["auto", "required", "none"] | str | None = None
|
||||
"""The tool choice to use when calling the model."""
|
||||
|
||||
parallel_tool_calls: bool | None = False
|
||||
"""Whether to use parallel tool calls when calling the model."""
|
||||
|
||||
truncation: Literal["auto", "disabled"] | None = None
|
||||
"""The truncation strategy to use when calling the model."""
|
||||
|
||||
max_tokens: int | None = None
|
||||
"""The maximum number of output tokens to generate."""
|
||||
|
||||
def resolve(self, override: ModelSettings | None) -> ModelSettings:
|
||||
"""Produce a new ModelSettings by overlaying any non-None values from the
|
||||
|
|
@ -33,4 +52,5 @@ class ModelSettings:
|
|||
tool_choice=override.tool_choice or self.tool_choice,
|
||||
parallel_tool_calls=override.parallel_tool_calls or self.parallel_tool_calls,
|
||||
truncation=override.truncation or self.truncation,
|
||||
max_tokens=override.max_tokens or self.max_tokens,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -503,6 +503,7 @@ class OpenAIChatCompletionsModel(Model):
|
|||
top_p=self._non_null_or_not_given(model_settings.top_p),
|
||||
frequency_penalty=self._non_null_or_not_given(model_settings.frequency_penalty),
|
||||
presence_penalty=self._non_null_or_not_given(model_settings.presence_penalty),
|
||||
max_tokens=self._non_null_or_not_given(model_settings.max_tokens),
|
||||
tool_choice=tool_choice,
|
||||
response_format=response_format,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
|
|
|
|||
|
|
@ -235,6 +235,7 @@ class OpenAIResponsesModel(Model):
|
|||
temperature=self._non_null_or_not_given(model_settings.temperature),
|
||||
top_p=self._non_null_or_not_given(model_settings.top_p),
|
||||
truncation=self._non_null_or_not_given(model_settings.truncation),
|
||||
max_output_tokens=self._non_null_or_not_given(model_settings.max_tokens),
|
||||
tool_choice=tool_choice,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
stream=stream,
|
||||
|
|
|
|||
Loading…
Reference in a new issue