From 6532411d33cac399fe3e596a59e1eabb514af42e Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Sun, 8 Jun 2025 19:39:07 -0300 Subject: [PATCH] remove old model management code --- open_notebook/models/__init__.py | 77 ----- open_notebook/models/embedding_models.py | 104 ------- open_notebook/models/llms.py | 294 ------------------ open_notebook/models/speech_to_text_models.py | 61 ---- open_notebook/models/text_to_speech_models.py | 31 -- 5 files changed, 567 deletions(-) delete mode 100644 open_notebook/models/__init__.py delete mode 100644 open_notebook/models/embedding_models.py delete mode 100644 open_notebook/models/llms.py delete mode 100644 open_notebook/models/speech_to_text_models.py delete mode 100644 open_notebook/models/text_to_speech_models.py diff --git a/open_notebook/models/__init__.py b/open_notebook/models/__init__.py deleted file mode 100644 index c131abd..0000000 --- a/open_notebook/models/__init__.py +++ /dev/null @@ -1,77 +0,0 @@ -from typing import Dict, Type, Union - -from open_notebook.models.embedding_models import ( - EmbeddingModel, - GeminiEmbeddingModel, - OllamaEmbeddingModel, - OpenAIEmbeddingModel, - VertexEmbeddingModel, -) -from open_notebook.models.llms import ( - AnthropicLanguageModel, - GeminiLanguageModel, - GroqLanguageModel, - LanguageModel, - LiteLLMLanguageModel, - OllamaLanguageModel, - OpenAILanguageModel, - OpenRouterLanguageModel, - VertexAILanguageModel, - VertexAnthropicLanguageModel, - XAILanguageModel, -) -from open_notebook.models.speech_to_text_models import ( - GroqSpeechToTextModel, - OpenAISpeechToTextModel, - SpeechToTextModel, -) -from open_notebook.models.text_to_speech_models import ( - ElevenLabsTextToSpeechModel, - GeminiTextToSpeechModel, - OpenAITextToSpeechModel, - TextToSpeechModel, -) - -ModelType = Union[LanguageModel, EmbeddingModel, SpeechToTextModel, TextToSpeechModel] - - -ProviderMap = Dict[str, Type[ModelType]] - -MODEL_CLASS_MAP: Dict[str, ProviderMap] = { - "language": { - "ollama": OllamaLanguageModel, - "openrouter": OpenRouterLanguageModel, - "vertexai-anthropic": VertexAnthropicLanguageModel, - "litellm": LiteLLMLanguageModel, - "vertexai": VertexAILanguageModel, - "anthropic": AnthropicLanguageModel, - "openai": OpenAILanguageModel, - "gemini": GeminiLanguageModel, - "xai": XAILanguageModel, - "groq": GroqLanguageModel, - }, - "embedding": { - "openai": OpenAIEmbeddingModel, - "gemini": GeminiEmbeddingModel, - "vertexai": VertexEmbeddingModel, - "ollama": OllamaEmbeddingModel, - }, - "speech_to_text": { - "openai": OpenAISpeechToTextModel, - "groq": GroqSpeechToTextModel, - }, - "text_to_speech": { - "openai": OpenAITextToSpeechModel, - "elevenlabs": ElevenLabsTextToSpeechModel, - "gemini": GeminiTextToSpeechModel, - }, -} - -__all__ = [ - "MODEL_CLASS_MAP", - "EmbeddingModel", - "LanguageModel", - "SpeechToTextModel", - "TextToSpeechModel", - "ModelType", -] diff --git a/open_notebook/models/embedding_models.py b/open_notebook/models/embedding_models.py deleted file mode 100644 index 43119cf..0000000 --- a/open_notebook/models/embedding_models.py +++ /dev/null @@ -1,104 +0,0 @@ -""" -Classes for supporting different embedding models -""" - -from __future__ import annotations - -import os -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import List, Optional - -import requests - -# todo: add support for multiple embeddings (array) - - -@dataclass -class EmbeddingModel(ABC): - """ - Abstract base class for language models. - """ - - model_name: Optional[str] = None - - @abstractmethod - def embed(self, text: str) -> List[float]: - """ - Generates an embedding - """ - raise NotImplementedError - - -@dataclass -class OllamaEmbeddingModel(EmbeddingModel): - model_name: str - base_url: str = os.environ.get("OLLAMA_API_BASE", "http://localhost:11434") - - def embed(self, text: str) -> List[float]: - """ - Embeds the content using Open AI embedding - """ - text = text.replace("\n", " ") - response = requests.post( - f"{self.base_url}/api/embed", - json={"model": self.model_name, "input": [text]}, - ) - return response.json()["embeddings"][0] - - -@dataclass -class GeminiEmbeddingModel(EmbeddingModel): - model_name: str - - def embed(self, text: str) -> List[float]: - import google.generativeai as genai - - """ - Embeds the content using Open AI embedding - """ - model_name = ( - self.model_name - if self.model_name.startswith("models/") - else f"models/{self.model_name}" - ) - result = genai.embed_content(model=model_name, content=text) - - return result["embedding"] - - -@dataclass -class VertexEmbeddingModel(EmbeddingModel): - model_name: str - - def embed(self, text: str) -> List[float]: - from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel - - texts = [text] - # The dimensionality of the output embeddings. - # dimensionality = 256 - # The task type for embedding. Check the available tasks in the model's documentation. - model = TextEmbeddingModel.from_pretrained(self.model_name) - inputs = [TextEmbeddingInput(text) for text in texts] - embeddings = model.get_embeddings(inputs) - return embeddings[0].values - - -@dataclass -class OpenAIEmbeddingModel(EmbeddingModel): - model_name: str - - def embed(self, text: str) -> List[float]: - from openai import OpenAI - - """ - Embeds the content using Open AI embedding - """ - # todo: make this Singleton - client = OpenAI() - text = text.replace("\n", " ") - return ( - client.embeddings.create(input=[text], model=self.model_name) - .data[0] - .embedding - ) diff --git a/open_notebook/models/llms.py b/open_notebook/models/llms.py deleted file mode 100644 index 2f70670..0000000 --- a/open_notebook/models/llms.py +++ /dev/null @@ -1,294 +0,0 @@ -""" -Classes for supporting different language models -""" - -import os -from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from typing import Any, Dict, Optional - -from langchain_anthropic import ChatAnthropic -from langchain_community.chat_models import ChatLiteLLM -from langchain_core.language_models.chat_models import BaseChatModel -from langchain_google_genai import ChatGoogleGenerativeAI -from langchain_google_vertexai import ChatVertexAI -from langchain_google_vertexai.model_garden import ChatAnthropicVertex -from langchain_groq.chat_models import ChatGroq -from langchain_ollama.chat_models import ChatOllama -from langchain_openai.chat_models import ChatOpenAI -from pydantic import SecretStr - -# future: is there a value on returning langchain specific models? - - -@dataclass -class LanguageModel(ABC): - """ - Abstract base class for language models. - """ - - model_name: Optional[str] = None - max_tokens: Optional[int] = 850 - temperature: Optional[float] = 1.0 - streaming: bool = True - top_p: Optional[float] = 0.9 - kwargs: Dict[str, Any] = field(default_factory=dict) - json: bool = False - - @abstractmethod - def to_langchain(self) -> BaseChatModel: - """ - Convert the language model to a LangChain chat model. - """ - raise NotImplementedError - - -@dataclass -class OllamaLanguageModel(LanguageModel): - """ - Language model that uses the Ollama chat model. - """ - - model_name: str - base_url: str = os.environ.get("OLLAMA_API_BASE", "http://localhost:11434") - max_tokens: Optional[int] = 650 - json: bool = False - - def to_langchain(self) -> ChatOllama: - """ - Convert the language model to a LangChain chat model. - """ - return ChatOllama( - # api_key="ollama", - model=self.model_name, - base_url=self.base_url, - # keep_alive="10m", - num_predict=self.max_tokens, - temperature=self.temperature or 0.5, - verbose=True, - top_p=self.top_p, - ) - - -@dataclass -class VertexAnthropicLanguageModel(LanguageModel): - """ - Language model that uses the Vertex Anthropic chat model. - """ - - model_name: str - project: Optional[str] = os.environ.get("VERTEX_PROJECT", "no-project") - location: Optional[str] = os.environ.get("VERTEX_LOCATION", "us-central1") - - def to_langchain(self) -> ChatAnthropicVertex: - """ - Convert the language model to a LangChain chat model. - """ - return ChatAnthropicVertex( - model=self.model_name, - project=self.project, - location=self.location, - max_tokens=self.max_tokens, - streaming=False, - kwargs=self.kwargs, - top_p=self.top_p, - temperature=self.temperature or 0.5, - ) - - -@dataclass -class LiteLLMLanguageModel(LanguageModel): - """ - Language model that uses the LiteLLM chat model. - """ - - model_name: str - - def to_langchain(self) -> ChatLiteLLM: - """ - Convert the language model to a LangChain chat model. - """ - return ChatLiteLLM( - model=self.model_name, - temperature=self.temperature or 0.5, - max_tokens=self.max_tokens, - streaming=self.streaming, - top_p=self.top_p, - ) - - -@dataclass -class VertexAILanguageModel(LanguageModel): - """ - Language model that uses the Vertex AI chat model. - """ - - model_name: str - project: Optional[str] = os.environ.get("VERTEX_PROJECT", "no-project") - location: Optional[str] = os.environ.get("VERTEX_LOCATION", "us-central1") - - def to_langchain(self) -> ChatVertexAI: - """ - Convert the language model to a LangChain chat model. - """ - return ChatVertexAI( - model=self.model_name, - streaming=self.streaming, - max_tokens=self.max_tokens, - top_p=self.top_p, - location=self.location, - project=self.project, - safety_settings=None, - temperature=self.temperature or 0.5, - ) - - -@dataclass -class GeminiLanguageModel(LanguageModel): - """ - Language model that uses the Gemini Family of chat models. - """ - - model_name: str - - def to_langchain(self) -> ChatGoogleGenerativeAI: - """ - Convert the language model to a LangChain chat model. - """ - return ChatGoogleGenerativeAI( - model=self.model_name, - max_tokens=self.max_tokens, - temperature=self.temperature or 0.5, - ) - - -@dataclass -class OpenRouterLanguageModel(LanguageModel): - """ - Language model that uses the OpenAI chat model. - """ - - model_name: str - - def to_langchain(self) -> ChatOpenAI: - """ - Convert the language model to a LangChain chat model for Open Router. - """ - kwargs = self.kwargs - if self.json: - kwargs["response_format"] = {"type": "json_object"} - - return ChatOpenAI( - model=self.model_name, - temperature=self.temperature or 0.5, - base_url=os.environ.get( - "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1" - ), - max_tokens=self.max_tokens, - model_kwargs=kwargs, - streaming=self.streaming, - api_key=SecretStr(os.environ.get("OPENROUTER_API_KEY", "openrouter")), - top_p=self.top_p, - ) - - -@dataclass -class GroqLanguageModel(LanguageModel): - """ - Language model that uses the Groq chat model. - """ - - model_name: str - - def to_langchain(self) -> ChatGroq: - """ - Convert the language model to a LangChain chat model for Groq. - """ - kwargs = self.kwargs - kwargs["top_p"] = self.top_p - - return ChatGroq( - model=self.model_name, - temperature=self.temperature or 0.5, - max_tokens=self.max_tokens, - model_kwargs=kwargs, - stop_sequences=None, - ) - - -@dataclass -class XAILanguageModel(LanguageModel): - """ - Language model that uses the OpenAI chat model for X.AI. - """ - - model_name: str - - def to_langchain(self) -> ChatOpenAI: - """ - Convert the language model to a LangChain chat model. - """ - kwargs = self.kwargs - if self.json: - kwargs["response_format"] = {"type": "json_object"} - - return ChatOpenAI( - model=self.model_name, - temperature=self.temperature or 0.5, - base_url=os.environ.get("XAI_BASE_URL", "https://api.x.ai/v1"), - max_tokens=self.max_tokens, - model_kwargs=kwargs, - streaming=self.streaming, - api_key=SecretStr(os.environ.get("XAI_API_KEY", "xai")), - top_p=self.top_p, - ) - - -@dataclass -class AnthropicLanguageModel(LanguageModel): - """ - Language model that uses the Anthropic chat model. - """ - - model_name: str - - def to_langchain(self) -> ChatAnthropic: - """ - Convert the language model to a LangChain chat model. - """ - return ChatAnthropic( # type: ignore[call-arg] - model_name=self.model_name, - max_tokens_to_sample=self.max_tokens or 850, - model_kwargs=self.kwargs, - streaming=False, - timeout=30, - top_p=self.top_p, - temperature=self.temperature or 0.5, - ) - - -@dataclass -class OpenAILanguageModel(LanguageModel): - """ - Language model that uses the OpenAI chat model. - """ - - model_name: str - - def to_langchain(self) -> ChatOpenAI: - """ - Convert the language model to a LangChain chat model. - """ - - kwargs = self.kwargs.copy() # Make a copy to avoid modifying the original - if self.json: - kwargs["response_format"] = {"type": "json_object"} - - return ChatOpenAI( - model=self.model_name, - temperature=self.temperature or 0.5, - streaming=self.streaming, - max_tokens=self.max_tokens, - top_p=self.top_p, - model_kwargs=kwargs, - ) diff --git a/open_notebook/models/speech_to_text_models.py b/open_notebook/models/speech_to_text_models.py deleted file mode 100644 index 113339b..0000000 --- a/open_notebook/models/speech_to_text_models.py +++ /dev/null @@ -1,61 +0,0 @@ -""" -Classes for supporting different transcription models -""" - -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Optional - - -@dataclass -class SpeechToTextModel(ABC): - """ - Abstract base class for speech to text models. - """ - - model_name: Optional[str] = None - - @abstractmethod - def transcribe(self, audio_file_path: str) -> str: - """ - Generates a text transcription from audio - """ - raise NotImplementedError - - -@dataclass -class OpenAISpeechToTextModel(SpeechToTextModel): - model_name: str - - def transcribe(self, audio_file_path: str) -> str: - """ - Transcribes an audio file into text - """ - from openai import OpenAI - - # todo: make this Singleton - client = OpenAI() - with open(audio_file_path, "rb") as audio: - transcription = client.audio.transcriptions.create( - model=self.model_name, file=audio - ) - return transcription.text - - -@dataclass -class GroqSpeechToTextModel(SpeechToTextModel): - model_name: str - - def transcribe(self, audio_file_path: str) -> str: - """ - Transcribes an audio file into text - """ - from groq import Groq - - # todo: make this Singleton - client = Groq() - with open(audio_file_path, "rb") as audio: - transcription = client.audio.transcriptions.create( - model=self.model_name, file=audio - ) - return transcription.text diff --git a/open_notebook/models/text_to_speech_models.py b/open_notebook/models/text_to_speech_models.py deleted file mode 100644 index 05c7357..0000000 --- a/open_notebook/models/text_to_speech_models.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Classes for supporting different text to speech models -""" - -from abc import ABC -from dataclasses import dataclass -from typing import Optional - - -@dataclass -class TextToSpeechModel(ABC): - """ - Abstract base class for text to speech models. - """ - - model_name: Optional[str] = None - - -@dataclass -class OpenAITextToSpeechModel(TextToSpeechModel): - model_name: str - - -@dataclass -class ElevenLabsTextToSpeechModel(TextToSpeechModel): - model_name: str - - -@dataclass -class GeminiTextToSpeechModel(TextToSpeechModel): - model_name: str