Merge pull request #74 from lfnovo/esperanto

Esperanto
This commit is contained in:
Luis Novo 2025-06-10 13:56:39 -03:00 committed by GitHub
commit d0ca466e40
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 1182 additions and 1479 deletions

View file

@ -11,9 +11,13 @@ google-credentials.json
docker-compose*
.docker_data/
docs/
surreal_data/
surreal-data/
notebook_data/
temp/
*.env
.mypy_cache/
.ruff_cache/
.pytest_cache
.ruff_cache
notebooks/

View file

@ -1,19 +1,26 @@
# OPENAI
OPENAI_API_KEY=
# OPENAI_API_BASE=
# OPENAI_API_KEY=
# ANTHROPIC
# ANTHROPIC_API_KEY=
# GEMINI
# this is the best model for long context and podcast generation
# GOOGLE_API_KEY=
# GEMINI_API_KEY=
# VERTEXAI
# VERTEX_PROJECT=my-google-cloud-project-name
# GOOGLE_APPLICATION_CREDENTIALS=./google-credentials.json
# VERTEX_LOCATION=us-east5
# MISTRAL
# MISTRAL_API_KEY=
# DEEPSEEK
# DEEPSEEK_API_KEY=
# OLLAMA
# OLLAMA_API_BASE="http://10.20.30.20:11434"
@ -30,8 +37,16 @@ OPENAI_API_KEY=
# ELEVENLABS
# Used only by the podcast feature
ELEVENLABS_API_KEY=
# ELEVENLABS_API_KEY=
# VOYAGE AI
# VOYAGE_API_KEY=
# AZURE OPENAI
# AZURE_OPENAI_API_KEY=
# AZURE_OPENAI_ENDPOINT=
# AZURE_OPENAI_API_VERSION="2024-12-01-preview"
# AZURE_OPENAI_DEPLOYMENT_NAME=
# USE THIS IF YOU WANT TO DEBUG THE APP ON LANGSMITH
# LANGCHAIN_TRACING_V2=true
@ -48,16 +63,6 @@ SURREAL_PASS="root"
SURREAL_NAMESPACE="open_notebook"
SURREAL_DATABASE="staging"
# This is used for the summarization feature when the content is to big to fit a single context window
# It is measured in characters, not tokens.
SUMMARY_CHUNK_SIZE=200000
SUMMARY_CHUNK_OVERLAP=1000
# This is used for vector embeddings
# It is measured in characters, not tokens.
EMBEDDING_CHUNK_SIZE=1000
EMBEDDING_CHUNK_OVERLAP=50
# FIRECRAWL - Get a key at https://firecrawl.dev/
FIRECRAWL_API_KEY=

View file

@ -1,16 +1,16 @@
[server]
port = 8502
maxMessageSize = 500
fileWatcherType = "none"
[browser]
serverPort = 8502
# [theme]
[theme]
# # The preset Streamlit theme that your custom theme inherits from.
# # One of "light" or "dark".
# base =
base = "light"
# # Primary accent color for interactive elements.
# primaryColor =

View file

@ -18,6 +18,8 @@ WORKDIR /app
COPY . /app
RUN uv sync
EXPOSE 8502
RUN mkdir -p /app/data

View file

@ -105,6 +105,8 @@ Learn more about our project at [https://www.open-notebook.ai](https://www.open-
Go to the [Setup Guide](docs/SETUP.md) to learn how to set up the tool in details.
📚 **Need help choosing AI models?** Check out our [Model Selection Guide](https://github.com/lfnovo/open-notebook/blob/main/docs/models.md) for recommended combinations and provider comparisons.
You don't need to clone this repo if you just want to use the app without building from source!
Take a look at the [Open Notebook Boilerplate](https://github.com/lfnovo/open-notebook-boilerplate) repo with a sample of how to set it up for maximum feature usability.
@ -166,6 +168,27 @@ Then run the Streamlit application:
uv run --env-file .env streamlit run app_home.py
```
## Provider Support Matrix
Thanks to the [Esperanto](https://github.com/lfnovo/esperanto) library, we support this providers out of the box!
| Provider | LLM Support | Embedding Support | Speech-to-Text | Text-to-Speech |
|--------------|-------------|------------------|----------------|----------------|
| OpenAI | ✅ | ✅ | ✅ | ✅ |
| Anthropic | ✅ | ❌ | ❌ | ❌ |
| Groq | ✅ | ❌ | ✅ | ❌ |
| Google (GenAI) | ✅ | ✅ | ❌ | ✅ |
| Vertex AI | ✅ | ✅ | ❌ | ✅ |
| Ollama | ✅ | ✅ | ❌ | ❌ |
| Perplexity | ✅ | ❌ | ❌ | ❌ |
| ElevenLabs | ❌ | ❌ | ✅ | ✅ |
| Azure OpenAI | ✅ | ❌ | ❌ | ❌ |
| Mistral | ✅ | ✅ | ❌ | ❌ |
| DeepSeek | ✅ | ❌ | ❌ | ❌ |
| Voyage | ❌ | ✅ | ❌ | ❌ |
| xAI | ✅ | ❌ | ❌ | ❌ |
| OpenRouter | ✅ | ❌ | ❌ | ❌ |
### Common Issues and Solutions
If you encounter a port already in use error:
@ -206,7 +229,7 @@ Go to the [Usage](docs/USAGE.md) page to learn how to use all features.
## Features
- **Multi-Notebook Support**: Organize your research across multiple notebooks effortlessly.
- **Multi-model support**: Open AI, Anthropic, Gemini, Vertex AI, Open Router, X.AI, Groq,Ollama.
- **Multi-model support**: Open AI, Anthropic, Gemini, Vertex AI, Open Router, X.AI, Groq, Ollama. ([Model Selection Guide](https://github.com/lfnovo/open-notebook/blob/main/docs/models.md))
- **Podcast Generator**: Automatically convert your notes into a podcast format.
- **Broad Content Integration**: Works with links, PDFs, EPUB, Office, TXT, Markdown files, YouTube videos, Audio files, Video files and pasted text.
- **Content Transformation**: Powerful customizable actions to summarize, extract insights, and more.
@ -226,7 +249,7 @@ Go to the [Usage](docs/USAGE.md) page to learn how to use all features.
Three intuitive columns to streamline your work:
1. **Sources**: Manage all research materials.
2. **Notes**: Create or AI-generate notes.
2. **Notes**: Create or AI-generated notes.
3. **Chat**: Chat with the AI, leveraging your content.
### ⚙️ Context Configuration

213
docs/models.md Normal file
View file

@ -0,0 +1,213 @@
# AI Model Selection Guide
This guide helps you choose the best AI models for your Open Notebook setup. We'll cover what makes each provider special, which models work best for different tasks, and give you ready-to-use combinations to get started quickly.
## Understanding Model Types
Open Notebook uses four types of AI models:
- **Language Models**: For chat, text generation, summaries, and tool calling
- **Embedding Models**: For semantic search and content similarity
- **Text-to-Speech (TTS)**: For generating podcasts and audio content
- **Speech-to-Text (STT)**: For transcribing audio files
## What to Consider When Choosing Models
**💰 Cost**: Some models are free (Ollama), others charge per token
**🎯 Quality**: Higher quality models often cost more but produce better results
**⚡ Speed**: Smaller models are faster but may be less capable
**🔧 Features**: Some models excel at specific tasks like tool calling or large contexts
---
## Provider Breakdown
### 🟦 Google (Gemini)
**Best for**: Large context processing, affordable high-quality models
**Language Models**
- `gemini-2.0-flash` - Excellent balance of price and performance with 1M context window
- `gemini-2.5-pro-preview-06-05` - Premium model for complex reasoning tasks
**Text-to-Speech**
- `gemini-2.5-flash-preview-tts` - Good quality at $10 per 1M tokens
- `gemini-2.5-pro-preview-tts` - Higher quality at $20 per 1M tokens
**Embedding**
- `text-embedding-004` - Solid performance with generous free tier
---
### 🟢 OpenAI
**Best for**: Reliable performance, excellent tool calling, wide ecosystem support
**Language Models**
- `gpt-4o-mini` - Great value for most tasks, perfect for everyday use
- `gpt-4o` - Premium quality with excellent tool calling capabilities
**Text-to-Speech**
- `tts-1` - Good quality for personal use and podcasts
**Speech-to-Text**
- `whisper-1` - Industry-standard transcription quality
**Embedding**
- `text-embedding-3-small` - Affordable at $0.02 per 1M tokens with solid performance
---
### 🎤 ElevenLabs
**Best for**: High-quality voice synthesis and transcription
**Text-to-Speech**
- `eleven_turbo_v2_5` - Excellent voice quality with reasonable pricing
**Speech-to-Text**
- `scribe_v1` - High-quality transcription service
---
### 🔵 DeepSeek
**Best for**: Cost-effective language models with good performance
**Language Models**
- `deepseek-chat` - Excellent quality-to-price ratio with 64k context window
---
### 🟡 Mistral
**Best for**: European-based alternative with competitive pricing
**Language Models**
- `mistral-medium-latest` - Good balance of quality and price
- `ministral-8b-latest` - Perfect for simple tasks like transformations
**Embedding**
- `mistral-embed` - Good quality, though not the most cost-effective
---
### ⚡ Grok (xAI)
**Best for**: Cutting-edge intelligence and reasoning
**Language Models**
- `grok-3` - Top-tier intelligence, premium pricing
- `grok-3-mini` - Excellent performance at more accessible pricing
---
### 🚢 Voyage AI
**Best for**: Specialized embedding models
**Embedding**
- `voyage-3.5-lite` - Competitive with OpenAI's offering at similar pricing
---
### 🟣 Anthropic (Claude)
**Best for**: High-quality reasoning and safety
**Language Models**
- `claude-3-5-sonnet-latest` - Exceptional quality for complex tasks
---
### 🦙 Ollama (Local/Free)
**Best for**: Privacy, offline use, and zero ongoing costs
**Language Models**
- `qwen3` - Excellent free alternative for most language tasks
- `gemma3` - Great for chat and simple transformations
- `phi4` - Compact but capable model
- `deepseek-r1` - Advanced reasoning capabilities
- `llama4` - Well-rounded performance
**Embedding**
- `mxbai-embed-large` - Outstanding free embedding model
---
## Recommended Combinations
### 🌟 Best Value (Mixed Providers)
Perfect balance of cost and performance
- **Chat**: `gpt-4o-mini` (OpenAI) - Reliable and affordable
- **Tools**: `gpt-4o` (OpenAI) - Excellent tool calling
- **Transformations**: `ministral-8b-latest` (Mistral) - Cost-effective
- **Large Context**: `gemini-2.0-flash` (Google) - 1M context window
- **Embedding**: `text-embedding-3-small` (OpenAI) - Good price/performance
- **TTS**: `gemini-2.5-flash-preview-tts` (Google) - Affordable quality
- **STT**: `whisper-1` (OpenAI) - Industry standard
### 💰 Budget-Friendly (Mostly Free)
Great for getting started or keeping costs low
- **Language**: `qwen3` (Ollama) - Free and capable
- **Tools**: `qwen3` (Ollama) - Handles basic tool calling
- **Transformations**: `gemma3` (Ollama) - Free and fast
- **Embedding**: `mxbai-embed-large` (Ollama) - Free, high quality
- **TTS**: `gpt-4o-mini-tts` (OpenAI) - Reasonable cost
- **STT**: `whisper-1` (OpenAI) - Best value
### 🚀 High Performance (Premium)
When quality is your top priority
- **Chat**: `claude-3-5-sonnet-latest` (Anthropic) or `grok-3` (xAI) - Exceptional reasoning
- **Tools**: `gpt-4o` (OpenAI) or `claude-3-5-sonnet-latest` (Anthropic) or `grok-3` (xAI) - Best tool calling
- **Transformations**: `grok-3-mini` (xAI) - Smart and efficient
- **Large Context**: `gemini-2.5-pro-preview-06-05` (Google) - Premium quality
- **Embedding**: `voyage-3.5-lite` (Voyage) - Specialized performance
- **TTS**: `eleven_turbo_v2_5` (ElevenLabs) - Premium voice quality
- **STT**: `whisper-1` (OpenAI) - Proven reliability
### 🏢 Single Provider (OpenAI)
Simplify billing and setup with one provider
- **Chat**: `gpt-4o-mini` - Everyday conversations
- **Tools**: `gpt-4o` - Complex operations
- **Transformations**: `gpt-4o-mini` - Cost-effective processing
- **Embedding**: `text-embedding-3-small` - Solid performance
- **TTS**: `gpt-4o-mini-tts` - Great quality
- **STT**: `whisper-1` - Industry standard
## Setting up Models
Here are the environment variables that you need to set up for each provider:
| Provider | Environment Variables |
|----------|----------------------|
| Mistral | `MISTRAL_API_KEY` |
| Deepseek | `DEEPSEEK_API_KEY` |
| OpenAI | `OPENAI_API_KEY` |
| Google (Gemini) | `GEMINI_API_KEY` |
| X.AI | `XAI_API_KEY` |
| ElevenLabs | `ELEVENLABS_API_KEY` |
| Anthropic | `ANTHROPIC_API_KEY` |
| Ollama | `OLLAMA_BASE_URL` |
| Azure OpenAI | `AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_API_VERSION`, `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_DEPLOYMENT_NAME` |
| Groq | `GROQ_API_KEY` |
| Vertex AI | `VERTEX_PROJECT`, `GOOGLE_APPLICATION_CREDENTIALS`, `VERTEX_LOCATION` |
| VOYAGE AI | `VOYAGE_API_KEY` |
## Tips to use Text to Speech
### OpenAI
To use the OpenAI provider for audio:
1. Use "gpt-4o-mini-tts" as the audio model name
2. pass the following Environment Variables
- OPENAI_API_KEY=your-openai-api-key
### Gemini
To use the Gemini provider for audio:
1. Use "default" as the audio model name (since vertex does not require a model name)
2. pass the following Environment Variables
- GEMINI_API_KEY=gemini-2.5-flash-preview-tts
### Google Cloud / Vertex (previously supported as Google)
**If you were using this before, we recommend moving to GEMINI for better quality, price and ease of configuration.**
To use the Google Cloud (Vertex) provider for audio:
1. Use "default" as the audio model name (since vertex does not require a model name)
2. pass the following Environment Variables
- VERTEX_PROJECT=your-google-cloud-project-name
- GOOGLE_APPLICATION_CREDENTIALS=./google-credentials.json
- VERTEX_LOCATION=your-google-cloud-project-location
3. Setup the correct permissions in the [Google Cloud Console](https://github.com/souzatharsis/podcastfy/blob/main/usage/config.md)

1
migrations/6.surrealql Normal file
View file

@ -0,0 +1 @@
update model set provider='vertex' where provider='vertexai';

View file

@ -0,0 +1 @@
update model set provider='vertexai' where provider='vertex';

View file

@ -0,0 +1,3 @@
from dotenv import load_dotenv
load_dotenv()

View file

@ -24,6 +24,7 @@ class MigrationManager:
Migration.from_file("migrations/3.surrealql"),
Migration.from_file("migrations/4.surrealql"),
Migration.from_file("migrations/5.surrealql"),
Migration.from_file("migrations/6.surrealql"),
]
self.down_migrations = [
Migration.from_file(
@ -33,6 +34,7 @@ class MigrationManager:
Migration.from_file("migrations/3_down.surrealql"),
Migration.from_file("migrations/4_down.surrealql"),
Migration.from_file("migrations/5_down.surrealql"),
Migration.from_file("migrations/6_down.surrealql"),
]
self.runner = MigrationRunner(
up_migrations=self.up_migrations,

View file

@ -1,22 +1,8 @@
from datetime import datetime
from typing import (
Any,
ClassVar,
Dict,
List,
Optional,
Type,
TypeVar,
cast,
)
from typing import Any, ClassVar, Dict, List, Optional, Type, TypeVar, cast
from loguru import logger
from pydantic import (
BaseModel,
ValidationError,
field_validator,
model_validator,
)
from pydantic import BaseModel, ValidationError, field_validator, model_validator
from open_notebook.database.repository import (
repo_create,
@ -140,7 +126,7 @@ class ObjectModel(BaseModel):
"No embedding model found. Content will not be searchable."
)
data["embedding"] = (
EMBEDDING_MODEL.embed(embedding_content)
EMBEDDING_MODEL.embed([embedding_content])[0]
if EMBEDDING_MODEL
else []
)

View file

@ -1,16 +1,18 @@
from typing import ClassVar, Dict, Optional
from typing import ClassVar, Dict, Optional, Union
from open_notebook.database.repository import repo_query
from open_notebook.domain.base import ObjectModel, RecordModel
from open_notebook.models import (
MODEL_CLASS_MAP,
from esperanto import (
AIFactory,
EmbeddingModel,
LanguageModel,
ModelType,
SpeechToTextModel,
TextToSpeechModel,
)
from open_notebook.database.repository import repo_query
from open_notebook.domain.base import ObjectModel, RecordModel
ModelType = Union[LanguageModel, EmbeddingModel, SpeechToTextModel, TextToSpeechModel]
class Model(ObjectModel):
table_name: ClassVar[str] = "model"
@ -75,21 +77,38 @@ class ModelManager:
if not model:
raise ValueError(f"Model with ID {model_id} not found")
if not model.type or model.type not in MODEL_CLASS_MAP:
if not model.type or model.type not in [
"language",
"embedding",
"speech_to_text",
"text_to_speech",
]:
raise ValueError(f"Invalid model type: {model.type}")
provider_map = MODEL_CLASS_MAP[model.type]
if model.provider not in provider_map:
raise ValueError(
f"Provider {model.provider} not compatible with {model.type} models"
)
model_class = provider_map[model.provider]
model_instance = model_class(model_name=model.name, **kwargs)
# Special handling for language models that need langchain conversion
if model.type == "language":
model_instance = model_instance
model_instance: LanguageModel = AIFactory.create_language(
model_name=model.name,
provider=model.provider,
config=kwargs,
)
elif model.type == "embedding":
model_instance: EmbeddingModel = AIFactory.create_embedding(
model_name=model.name,
provider=model.provider,
config=kwargs,
)
elif model.type == "speech_to_text":
model_instance: SpeechToTextModel = AIFactory.create_speech_to_text(
model_name=model.name,
provider=model.provider,
config=kwargs,
)
elif model.type == "text_to_speech":
model_instance: TextToSpeechModel = AIFactory.create_text_to_speech(
model_name=model.name,
provider=model.provider,
config=kwargs,
)
self._model_cache[cache_key] = model_instance
return model_instance
@ -114,9 +133,9 @@ class ModelManager:
if not model_id:
return None
model = self.get_model(model_id, **kwargs)
assert model is None or isinstance(
model, SpeechToTextModel
), f"Expected SpeechToTextModel but got {type(model)}"
assert model is None or isinstance(model, SpeechToTextModel), (
f"Expected SpeechToTextModel but got {type(model)}"
)
return model
@property
@ -126,9 +145,9 @@ class ModelManager:
if not model_id:
return None
model = self.get_model(model_id, **kwargs)
assert model is None or isinstance(
model, TextToSpeechModel
), f"Expected TextToSpeechModel but got {type(model)}"
assert model is None or isinstance(model, TextToSpeechModel), (
f"Expected TextToSpeechModel but got {type(model)}"
)
return model
@property
@ -138,9 +157,9 @@ class ModelManager:
if not model_id:
return None
model = self.get_model(model_id, **kwargs)
assert model is None or isinstance(
model, EmbeddingModel
), f"Expected EmbeddingModel but got {type(model)}"
assert model is None or isinstance(model, EmbeddingModel), (
f"Expected EmbeddingModel but got {type(model)}"
)
return model
def get_default_model(self, model_type: str, **kwargs) -> Optional[ModelType]:

View file

@ -4,15 +4,10 @@ from typing import Any, ClassVar, Dict, List, Literal, Optional, Tuple
from loguru import logger
from pydantic import BaseModel, Field, field_validator
from open_notebook.database.repository import (
repo_query,
)
from open_notebook.database.repository import repo_query
from open_notebook.domain.base import ObjectModel
from open_notebook.domain.models import model_manager
from open_notebook.exceptions import (
DatabaseOperationError,
InvalidInputError,
)
from open_notebook.exceptions import DatabaseOperationError, InvalidInputError
from open_notebook.utils import split_text, surreal_clean
@ -212,7 +207,7 @@ class Source(ObjectModel):
idx, chunk = args
logger.debug(f"Processing chunk {idx}/{chunk_count}")
try:
embedding = EMBEDDING_MODEL.embed(chunk)
embedding = EMBEDDING_MODEL.embed([chunk])[0]
cleaned_content = surreal_clean(chunk)
logger.debug(f"Successfully processed chunk {idx}")
return (idx, embedding, cleaned_content)
@ -259,7 +254,7 @@ class Source(ObjectModel):
if not insight_type or not content:
raise InvalidInputError("Insight type and content must be provided")
try:
embedding = EMBEDDING_MODEL.embed(content) if EMBEDDING_MODEL else []
embedding = EMBEDDING_MODEL.embed([content])[0] if EMBEDDING_MODEL else []
return repo_query(
f"""
CREATE source_insight CONTENT {{
@ -351,7 +346,7 @@ def vector_search(
raise InvalidInputError("Search keyword cannot be empty")
try:
EMBEDDING_MODEL = model_manager.embedding_model
embed = EMBEDDING_MODEL.embed(keyword)
embed = EMBEDDING_MODEL.embed([keyword])[0]
results = repo_query(
"""
SELECT * FROM fn::vector_search($embed, $results, $source, $note, $minimum_score);

View file

@ -1,8 +1,8 @@
from esperanto import LanguageModel
from langchain_core.language_models.chat_models import BaseChatModel
from loguru import logger
from open_notebook.domain.models import model_manager
from open_notebook.models.llms import LanguageModel
from open_notebook.utils import token_count
@ -27,5 +27,6 @@ def provision_langchain_model(
else:
model = model_manager.get_default_model(default_type, **kwargs)
logger.debug(f"Using model: {model}")
assert isinstance(model, LanguageModel), f"Model is not a LanguageModel: {model}"
return model.to_langchain()

View file

@ -1,77 +0,0 @@
from typing import Dict, Type, Union
from open_notebook.models.embedding_models import (
EmbeddingModel,
GeminiEmbeddingModel,
OllamaEmbeddingModel,
OpenAIEmbeddingModel,
VertexEmbeddingModel,
)
from open_notebook.models.llms import (
AnthropicLanguageModel,
GeminiLanguageModel,
GroqLanguageModel,
LanguageModel,
LiteLLMLanguageModel,
OllamaLanguageModel,
OpenAILanguageModel,
OpenRouterLanguageModel,
VertexAILanguageModel,
VertexAnthropicLanguageModel,
XAILanguageModel,
)
from open_notebook.models.speech_to_text_models import (
GroqSpeechToTextModel,
OpenAISpeechToTextModel,
SpeechToTextModel,
)
from open_notebook.models.text_to_speech_models import (
ElevenLabsTextToSpeechModel,
GeminiTextToSpeechModel,
OpenAITextToSpeechModel,
TextToSpeechModel,
)
ModelType = Union[LanguageModel, EmbeddingModel, SpeechToTextModel, TextToSpeechModel]
ProviderMap = Dict[str, Type[ModelType]]
MODEL_CLASS_MAP: Dict[str, ProviderMap] = {
"language": {
"ollama": OllamaLanguageModel,
"openrouter": OpenRouterLanguageModel,
"vertexai-anthropic": VertexAnthropicLanguageModel,
"litellm": LiteLLMLanguageModel,
"vertexai": VertexAILanguageModel,
"anthropic": AnthropicLanguageModel,
"openai": OpenAILanguageModel,
"gemini": GeminiLanguageModel,
"xai": XAILanguageModel,
"groq": GroqLanguageModel,
},
"embedding": {
"openai": OpenAIEmbeddingModel,
"gemini": GeminiEmbeddingModel,
"vertexai": VertexEmbeddingModel,
"ollama": OllamaEmbeddingModel,
},
"speech_to_text": {
"openai": OpenAISpeechToTextModel,
"groq": GroqSpeechToTextModel,
},
"text_to_speech": {
"openai": OpenAITextToSpeechModel,
"elevenlabs": ElevenLabsTextToSpeechModel,
"gemini": GeminiTextToSpeechModel,
},
}
__all__ = [
"MODEL_CLASS_MAP",
"EmbeddingModel",
"LanguageModel",
"SpeechToTextModel",
"TextToSpeechModel",
"ModelType",
]

View file

@ -1,104 +0,0 @@
"""
Classes for supporting different embedding models
"""
from __future__ import annotations
import os
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import List, Optional
import requests
# todo: add support for multiple embeddings (array)
@dataclass
class EmbeddingModel(ABC):
"""
Abstract base class for language models.
"""
model_name: Optional[str] = None
@abstractmethod
def embed(self, text: str) -> List[float]:
"""
Generates an embedding
"""
raise NotImplementedError
@dataclass
class OllamaEmbeddingModel(EmbeddingModel):
model_name: str
base_url: str = os.environ.get("OLLAMA_API_BASE", "http://localhost:11434")
def embed(self, text: str) -> List[float]:
"""
Embeds the content using Open AI embedding
"""
text = text.replace("\n", " ")
response = requests.post(
f"{self.base_url}/api/embed",
json={"model": self.model_name, "input": [text]},
)
return response.json()["embeddings"][0]
@dataclass
class GeminiEmbeddingModel(EmbeddingModel):
model_name: str
def embed(self, text: str) -> List[float]:
import google.generativeai as genai
"""
Embeds the content using Open AI embedding
"""
model_name = (
self.model_name
if self.model_name.startswith("models/")
else f"models/{self.model_name}"
)
result = genai.embed_content(model=model_name, content=text)
return result["embedding"]
@dataclass
class VertexEmbeddingModel(EmbeddingModel):
model_name: str
def embed(self, text: str) -> List[float]:
from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel
texts = [text]
# The dimensionality of the output embeddings.
# dimensionality = 256
# The task type for embedding. Check the available tasks in the model's documentation.
model = TextEmbeddingModel.from_pretrained(self.model_name)
inputs = [TextEmbeddingInput(text) for text in texts]
embeddings = model.get_embeddings(inputs)
return embeddings[0].values
@dataclass
class OpenAIEmbeddingModel(EmbeddingModel):
model_name: str
def embed(self, text: str) -> List[float]:
from openai import OpenAI
"""
Embeds the content using Open AI embedding
"""
# todo: make this Singleton
client = OpenAI()
text = text.replace("\n", " ")
return (
client.embeddings.create(input=[text], model=self.model_name)
.data[0]
.embedding
)

View file

@ -1,294 +0,0 @@
"""
Classes for supporting different language models
"""
import os
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any, Dict, Optional
from langchain_anthropic import ChatAnthropic
from langchain_community.chat_models import ChatLiteLLM
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_vertexai import ChatVertexAI
from langchain_google_vertexai.model_garden import ChatAnthropicVertex
from langchain_groq.chat_models import ChatGroq
from langchain_ollama.chat_models import ChatOllama
from langchain_openai.chat_models import ChatOpenAI
from pydantic import SecretStr
# future: is there a value on returning langchain specific models?
@dataclass
class LanguageModel(ABC):
"""
Abstract base class for language models.
"""
model_name: Optional[str] = None
max_tokens: Optional[int] = 850
temperature: Optional[float] = 1.0
streaming: bool = True
top_p: Optional[float] = 0.9
kwargs: Dict[str, Any] = field(default_factory=dict)
json: bool = False
@abstractmethod
def to_langchain(self) -> BaseChatModel:
"""
Convert the language model to a LangChain chat model.
"""
raise NotImplementedError
@dataclass
class OllamaLanguageModel(LanguageModel):
"""
Language model that uses the Ollama chat model.
"""
model_name: str
base_url: str = os.environ.get("OLLAMA_API_BASE", "http://localhost:11434")
max_tokens: Optional[int] = 650
json: bool = False
def to_langchain(self) -> ChatOllama:
"""
Convert the language model to a LangChain chat model.
"""
return ChatOllama(
# api_key="ollama",
model=self.model_name,
base_url=self.base_url,
# keep_alive="10m",
num_predict=self.max_tokens,
temperature=self.temperature or 0.5,
verbose=True,
top_p=self.top_p,
)
@dataclass
class VertexAnthropicLanguageModel(LanguageModel):
"""
Language model that uses the Vertex Anthropic chat model.
"""
model_name: str
project: Optional[str] = os.environ.get("VERTEX_PROJECT", "no-project")
location: Optional[str] = os.environ.get("VERTEX_LOCATION", "us-central1")
def to_langchain(self) -> ChatAnthropicVertex:
"""
Convert the language model to a LangChain chat model.
"""
return ChatAnthropicVertex(
model=self.model_name,
project=self.project,
location=self.location,
max_tokens=self.max_tokens,
streaming=False,
kwargs=self.kwargs,
top_p=self.top_p,
temperature=self.temperature or 0.5,
)
@dataclass
class LiteLLMLanguageModel(LanguageModel):
"""
Language model that uses the LiteLLM chat model.
"""
model_name: str
def to_langchain(self) -> ChatLiteLLM:
"""
Convert the language model to a LangChain chat model.
"""
return ChatLiteLLM(
model=self.model_name,
temperature=self.temperature or 0.5,
max_tokens=self.max_tokens,
streaming=self.streaming,
top_p=self.top_p,
)
@dataclass
class VertexAILanguageModel(LanguageModel):
"""
Language model that uses the Vertex AI chat model.
"""
model_name: str
project: Optional[str] = os.environ.get("VERTEX_PROJECT", "no-project")
location: Optional[str] = os.environ.get("VERTEX_LOCATION", "us-central1")
def to_langchain(self) -> ChatVertexAI:
"""
Convert the language model to a LangChain chat model.
"""
return ChatVertexAI(
model=self.model_name,
streaming=self.streaming,
max_tokens=self.max_tokens,
top_p=self.top_p,
location=self.location,
project=self.project,
safety_settings=None,
temperature=self.temperature or 0.5,
)
@dataclass
class GeminiLanguageModel(LanguageModel):
"""
Language model that uses the Gemini Family of chat models.
"""
model_name: str
def to_langchain(self) -> ChatGoogleGenerativeAI:
"""
Convert the language model to a LangChain chat model.
"""
return ChatGoogleGenerativeAI(
model=self.model_name,
max_tokens=self.max_tokens,
temperature=self.temperature or 0.5,
)
@dataclass
class OpenRouterLanguageModel(LanguageModel):
"""
Language model that uses the OpenAI chat model.
"""
model_name: str
def to_langchain(self) -> ChatOpenAI:
"""
Convert the language model to a LangChain chat model for Open Router.
"""
kwargs = self.kwargs
if self.json:
kwargs["response_format"] = {"type": "json_object"}
return ChatOpenAI(
model=self.model_name,
temperature=self.temperature or 0.5,
base_url=os.environ.get(
"OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1"
),
max_tokens=self.max_tokens,
model_kwargs=kwargs,
streaming=self.streaming,
api_key=SecretStr(os.environ.get("OPENROUTER_API_KEY", "openrouter")),
top_p=self.top_p,
)
@dataclass
class GroqLanguageModel(LanguageModel):
"""
Language model that uses the Groq chat model.
"""
model_name: str
def to_langchain(self) -> ChatGroq:
"""
Convert the language model to a LangChain chat model for Groq.
"""
kwargs = self.kwargs
kwargs["top_p"] = self.top_p
return ChatGroq(
model=self.model_name,
temperature=self.temperature or 0.5,
max_tokens=self.max_tokens,
model_kwargs=kwargs,
stop_sequences=None,
)
@dataclass
class XAILanguageModel(LanguageModel):
"""
Language model that uses the OpenAI chat model for X.AI.
"""
model_name: str
def to_langchain(self) -> ChatOpenAI:
"""
Convert the language model to a LangChain chat model.
"""
kwargs = self.kwargs
if self.json:
kwargs["response_format"] = {"type": "json_object"}
return ChatOpenAI(
model=self.model_name,
temperature=self.temperature or 0.5,
base_url=os.environ.get("XAI_BASE_URL", "https://api.x.ai/v1"),
max_tokens=self.max_tokens,
model_kwargs=kwargs,
streaming=self.streaming,
api_key=SecretStr(os.environ.get("XAI_API_KEY", "xai")),
top_p=self.top_p,
)
@dataclass
class AnthropicLanguageModel(LanguageModel):
"""
Language model that uses the Anthropic chat model.
"""
model_name: str
def to_langchain(self) -> ChatAnthropic:
"""
Convert the language model to a LangChain chat model.
"""
return ChatAnthropic( # type: ignore[call-arg]
model_name=self.model_name,
max_tokens_to_sample=self.max_tokens or 850,
model_kwargs=self.kwargs,
streaming=False,
timeout=30,
top_p=self.top_p,
temperature=self.temperature or 0.5,
)
@dataclass
class OpenAILanguageModel(LanguageModel):
"""
Language model that uses the OpenAI chat model.
"""
model_name: str
def to_langchain(self) -> ChatOpenAI:
"""
Convert the language model to a LangChain chat model.
"""
kwargs = self.kwargs.copy() # Make a copy to avoid modifying the original
if self.json:
kwargs["response_format"] = {"type": "json_object"}
return ChatOpenAI(
model=self.model_name,
temperature=self.temperature or 0.5,
streaming=self.streaming,
max_tokens=self.max_tokens,
top_p=self.top_p,
model_kwargs=kwargs,
)

View file

@ -1,61 +0,0 @@
"""
Classes for supporting different transcription models
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional
@dataclass
class SpeechToTextModel(ABC):
"""
Abstract base class for speech to text models.
"""
model_name: Optional[str] = None
@abstractmethod
def transcribe(self, audio_file_path: str) -> str:
"""
Generates a text transcription from audio
"""
raise NotImplementedError
@dataclass
class OpenAISpeechToTextModel(SpeechToTextModel):
model_name: str
def transcribe(self, audio_file_path: str) -> str:
"""
Transcribes an audio file into text
"""
from openai import OpenAI
# todo: make this Singleton
client = OpenAI()
with open(audio_file_path, "rb") as audio:
transcription = client.audio.transcriptions.create(
model=self.model_name, file=audio
)
return transcription.text
@dataclass
class GroqSpeechToTextModel(SpeechToTextModel):
model_name: str
def transcribe(self, audio_file_path: str) -> str:
"""
Transcribes an audio file into text
"""
from groq import Groq
# todo: make this Singleton
client = Groq()
with open(audio_file_path, "rb") as audio:
transcription = client.audio.transcriptions.create(
model=self.model_name, file=audio
)
return transcription.text

View file

@ -1,31 +0,0 @@
"""
Classes for supporting different text to speech models
"""
from abc import ABC
from dataclasses import dataclass
from typing import Optional
@dataclass
class TextToSpeechModel(ABC):
"""
Abstract base class for text to speech models.
"""
model_name: Optional[str] = None
@dataclass
class OpenAITextToSpeechModel(TextToSpeechModel):
model_name: str
@dataclass
class ElevenLabsTextToSpeechModel(TextToSpeechModel):
model_name: str
@dataclass
class GeminiTextToSpeechModel(TextToSpeechModel):
model_name: str

View file

@ -111,35 +111,41 @@ class PodcastConfig(ObjectModel):
api_key_label = "GOOGLE_API_KEY"
llm_model_name = self.transcript_model
if self.provider == "gemini":
tts_model = "geminimulti"
if self.provider == "google":
tts_model = "gemini"
elif self.provider == "openai":
tts_model = "openai"
elif self.provider == "anthropic":
tts_model = "anthropic"
elif self.provider == "vertexai":
tts_model = "geminimulti"
elif self.provider == "elevenlabs":
tts_model = "elevenlabs"
logger.debug(
logger.info(
f"Generating episode {episode_name} with config {conversation_config} and using model {llm_model_name}, tts model {tts_model}"
)
audio_file = generate_podcast(
conversation_config=conversation_config,
text=text,
tts_model=tts_model,
llm_model_name=llm_model_name,
api_key_label=api_key_label,
longform=longform,
)
episode = PodcastEpisode(
name=episode_name,
template=self.name,
instructions=instructions,
text=str(text),
audio_file=audio_file,
)
episode.save()
try:
audio_file = generate_podcast(
conversation_config=conversation_config,
text=text,
tts_model=tts_model,
llm_model_name=llm_model_name,
api_key_label=api_key_label,
longform=longform,
)
episode = PodcastEpisode(
name=episode_name,
template=self.name,
instructions=instructions,
text=str(text),
audio_file=audio_file,
)
episode.save()
except Exception as e:
logger.error(f"Failed to generate episode {episode_name}: {e}")
raise
@field_validator(
"name", "podcast_name", "podcast_tagline", "output_language", "model"

View file

@ -15,23 +15,43 @@ suggested_models:
openai:
language:
- gpt-4o-mini
- gpt-4o
embedding:
- text-embedding-3-small
text_to_speech:
- tts-1-hd
speech_to_text:
- whisper-1
gemini:
google:
language:
- gemini-1.5-flash
- gemini-2.0-flash
- gemini-2.5-pro-preview-06-05
text_to_speech:
- default
- gemini-2.5-flash-preview-tts
xai:
language:
- grok-beta
anthropic:
language:
- claude-3-5-sonnet-20241022
- claude-3-5-sonnet-latest
elevenlabs:
text_to_speech:
- eleven_turbo_v2_5
- eleven_turbo_v2_5
xai:
language:
- grok-3
- grok-3-mini
ollama:
language:
- qwen:14b
embedding:
- mxbai-embed-large
deepseek:
language:
- deepseek-chat
mistral:
language:
- mistral-large-latest
voyage:
embedding:
- voyage-3.5-lite

View file

@ -255,6 +255,14 @@ with templates_tab:
key=f"transcript_model_{pd_config.id}",
)
# Cleanup provider_models to only include specified providers
# filtered_provider_models = {
# k: v
# for k, v in provider_models.items()
# if k in ["openai", "vertex", "elevenlabs"]
# }
# provider_models = filtered_provider_models
pd_config.provider = st.selectbox(
"Audio Model Provider",
list(provider_models.keys()),
@ -271,9 +279,6 @@ with templates_tab:
index=index,
key=f"model_{pd_config.id}",
)
st.caption(
"OpenAI: tts-1 or tts-1-hd, Elevenlabs: eleven_multilingual_v2, eleven_turbo_v2_5"
)
pd_config.voice1 = st.text_input(
"Voice 1",
value=pd_config.voice1,
@ -282,7 +287,7 @@ with templates_tab:
)
st.caption("Voice names are case sensitive. Be sure to add the exact name.")
st.markdown(
"Sample voices from: [Open AI](https://platform.openai.com/docs/guides/text-to-speech), [Gemini](https://cloud.google.com/text-to-speech/docs/voices), [Elevenlabs](https://elevenlabs.io/text-to-speech)"
"Sample voices from: [Open AI](https://platform.openai.com/docs/guides/text-to-speech), [Elevenlabs](https://elevenlabs.io/text-to-speech), [Gemini](https://ai.google.dev/gemini-api/docs/speech-generation), [Vertex AI](https://cloud.google.com/text-to-speech/docs/list-voices-and-types)"
)
pd_config.voice2 = st.text_input(

View file

@ -1,10 +1,9 @@
import os
import streamlit as st
from esperanto import AIFactory
from open_notebook.config import CONFIG
from open_notebook.domain.models import DefaultModels, Model, model_manager
from open_notebook.models import MODEL_CLASS_MAP
from pages.components.model_selector import model_selector
from pages.stream_app.utils import setup_page
@ -13,8 +12,6 @@ setup_page("🤖 Models", only_check_mandatory_models=False, stop_on_model_error
st.title("🤖 Models")
model_tab, model_defaults_tab = st.tabs(["Models", "Model Defaults"])
provider_status = {}
model_types = [
@ -25,252 +22,305 @@ model_types = [
"speech_to_text",
]
provider_status["ollama"] = os.environ.get("OLLAMA_API_BASE") is not None
provider_status["openai"] = os.environ.get("OPENAI_API_KEY") is not None
provider_status["groq"] = os.environ.get("GROQ_API_KEY") is not None
provider_status["xai"] = os.environ.get("XAI_API_KEY") is not None
provider_status["vertexai"] = (
os.environ.get("VERTEX_PROJECT") is not None
and os.environ.get("VERTEX_LOCATION") is not None
and os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") is not None
)
provider_status["vertexai-anthropic"] = (
os.environ.get("VERTEX_PROJECT") is not None
and os.environ.get("VERTEX_LOCATION") is not None
and os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") is not None
)
provider_status["gemini"] = os.environ.get("GOOGLE_API_KEY") is not None
provider_status["openrouter"] = (
os.environ.get("OPENROUTER_API_KEY") is not None
and os.environ.get("OPENAI_API_KEY") is not None
and os.environ.get("OPENROUTER_BASE_URL") is not None
)
provider_status["anthropic"] = os.environ.get("ANTHROPIC_API_KEY") is not None
provider_status["elevenlabs"] = os.environ.get("ELEVENLABS_API_KEY") is not None
provider_status["litellm"] = (
provider_status["ollama"]
or provider_status["vertexai"]
or provider_status["vertexai-anthropic"]
or provider_status["anthropic"]
or provider_status["openai"]
or provider_status["gemini"]
)
available_providers = [k for k, v in provider_status.items() if v]
unavailable_providers = [k for k, v in provider_status.items() if not v]
def check_available_providers():
provider_status["ollama"] = os.environ.get("OLLAMA_API_BASE") is not None
provider_status["openai"] = os.environ.get("OPENAI_API_KEY") is not None
provider_status["groq"] = os.environ.get("GROQ_API_KEY") is not None
provider_status["xai"] = os.environ.get("XAI_API_KEY") is not None
provider_status["vertexai"] = (
os.environ.get("VERTEX_PROJECT") is not None
and os.environ.get("VERTEX_LOCATION") is not None
and os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") is not None
)
provider_status["gemini"] = os.environ.get("GOOGLE_API_KEY") is not None
provider_status["openrouter"] = (
os.environ.get("OPENROUTER_API_KEY") is not None
and os.environ.get("OPENAI_API_KEY") is not None
and os.environ.get("OPENROUTER_BASE_URL") is not None
)
provider_status["anthropic"] = os.environ.get("ANTHROPIC_API_KEY") is not None
provider_status["elevenlabs"] = os.environ.get("ELEVENLABS_API_KEY") is not None
provider_status["voyage"] = os.environ.get("VORAGE_API_KEY") is not None
provider_status["azure"] = (
os.environ.get("AZURE_OPENAI_API_KEY") is not None
and os.environ.get("AZURE_OPENAI_ENDPOINT") is not None
and os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME") is not None
and os.environ.get("AZURE_OPENAI_API_VERSION") is not None
)
provider_status["mistral"] = os.environ.get("MISTRAL_API_KEY") is not None
provider_status["deepseek"] = os.environ.get("DEEPSEEK_API_KEY") is not None
available_providers = [k for k, v in provider_status.items() if v]
unavailable_providers = [k for k, v in provider_status.items() if not v]
def generate_new_models(models, suggested_models):
# Create a set of existing model keys for efficient lookup
existing_model_keys = {
f"{model.provider}-{model.name}-{model.type}" for model in models
}
new_models = []
# Iterate through suggested models by provider
for provider, types in suggested_models.items():
# Iterate through each type (language, embedding, etc.)
for type_, model_list in types.items():
for model_name in model_list:
model_key = f"{provider}-{model_name}-{type_}"
# Check if model already exists
if model_key not in existing_model_keys:
if provider_status.get(provider):
new_models.append(
{
"name": model_name,
"type": type_,
"provider": provider,
}
)
return new_models
return available_providers, unavailable_providers
default_models = DefaultModels()
all_models = Model.get_all()
esperanto_available_providers = AIFactory.get_available_providers()
with model_tab:
st.subheader("Add Model")
provider = st.selectbox("Provider", available_providers)
if len(unavailable_providers) > 0:
st.caption(
f"Unavailable Providers: {', '.join(unavailable_providers)}. Please check docs page if you wish to enable them."
st.subheader("Provider Availability")
st.markdown(
"Below, you'll find all AI providers supported and their current availability status. To enable more providers, you need to setup some of their ENV Variables. Please check [the documentation](https://github.com/lfnovo/open-notebook/blob/main/docs/models.md) for instructions on how to do so."
)
available_providers, unavailable_providers = check_available_providers()
with st.expander("Available Providers"):
st.write(available_providers)
with st.expander("Unavailable Providers"):
st.write(unavailable_providers)
st.divider()
# Helper function to add model with auto-save
def add_model_form(model_type, container_key):
available_providers = esperanto_available_providers.get(model_type, [])
# Sort providers alphabetically for easier navigation
available_providers.sort()
# Remove perplexity from available_providers if it exists
if "perplexity" in available_providers:
available_providers.remove("perplexity")
if not available_providers:
st.info(f"No providers available for {model_type}")
return
st.markdown("**Add New Model**")
with st.form(key=f"add_{model_type}_{container_key}"):
provider = st.selectbox(
"Provider",
available_providers,
key=f"provider_{model_type}_{container_key}",
)
# Filter model types based on provider availability in MODEL_CLASS_MAP
available_model_types = []
for model_type in model_types:
if model_type in MODEL_CLASS_MAP and provider in MODEL_CLASS_MAP[model_type]:
available_model_types.append(model_type)
if not available_model_types:
st.error(f"No compatible model types available for provider: {provider}")
else:
model_type = st.selectbox(
"Model Type",
available_model_types,
help="Use language for text generation models, text_to_speech for TTS models for generating podcasts, etc.",
model_name = st.text_input(
"Model Name",
key=f"name_{model_type}_{container_key}",
help="gpt-4o-mini, claude, gemini, llama3, etc. For azure, use the deployment_name as the model_name",
)
if model_type == "text_to_speech" and provider == "gemini":
model_name = "gemini-default"
st.markdown("Gemini models are pre-configured. Using the default model.")
else:
model_name = st.text_input(
"Model Name", "", help="gpt-4o-mini, claude, gemini, llama3, etc"
)
if st.button("Save"):
model = Model(name=model_name, provider=provider, type=model_type)
model.save()
st.success("Saved")
st.divider()
suggested_models = CONFIG.get("suggested_models", [])
recommendations = generate_new_models(all_models, suggested_models)
if len(recommendations) > 0:
with st.expander("💁‍♂️ Recommended models to get you started.."):
for recommendation in recommendations:
st.markdown(
f"**{recommendation['name']}** ({recommendation['provider']}, {recommendation['type']})"
)
if st.button("Add", key=f"add_{recommendation['name']}"):
new_model = Model(**recommendation)
new_model.save()
st.rerun()
st.subheader("Configured Models")
model_types_available = {
# "vision": False,
"language": False,
"embedding": False,
"text_to_speech": False,
"speech_to_text": False,
}
for model in all_models:
model_types_available[model.type] = True
with st.container(border=True):
st.markdown(f"{model.name} ({model.provider}, {model.type})")
if st.button("Delete", key=f"delete_{model.id}"):
model.delete()
if st.form_submit_button("Add Model"):
if model_name:
model = Model(name=model_name, provider=provider, type=model_type)
model.save()
st.success("Model added!")
st.rerun()
for model_type, available in model_types_available.items():
if not available:
st.warning(f"No models available for {model_type}")
with model_defaults_tab:
text_generation_models = [model for model in all_models if model.type == "language"]
text_to_speech_models = [
model for model in all_models if model.type == "text_to_speech"
]
speech_to_text_models = [
model for model in all_models if model.type == "speech_to_text"
]
vision_models = [model for model in all_models if model.type == "vision"]
embedding_models = [model for model in all_models if model.type == "embedding"]
st.write(
"In this section, you can select the default models to be used on the various content operations done by Open Notebook. Some of these can be overriden in the different modules."
)
defs = {}
# Handle chat model selection
# Helper function to handle default model selection with auto-save
def handle_default_selection(
label, key, current_value, help_text, model_type, caption=None
):
selected_model = model_selector(
"Default Chat Model",
"default_chat_model",
selected_id=default_models.default_chat_model,
help="This model will be used for chat.",
model_type="language",
label,
key,
selected_id=current_value,
help=help_text,
model_type=model_type,
)
if selected_model:
default_models.default_chat_model = selected_model.id
st.divider()
# Handle transformation model selection
selected_model = model_selector(
"Default Transformation Model",
"default_transformation_model",
selected_id=default_models.default_transformation_model,
help="This model will be used for text transformations such as summaries, insights, etc.",
model_type="language",
)
if selected_model:
default_models.default_transformation_model = selected_model.id
st.caption("You can use a cheap model here like gpt-4o-mini, llama3, etc.")
st.divider()
# Handle tools model selection
selected_model = model_selector(
"Default Tools Model",
"default_tools_model",
selected_id=default_models.default_tools_model,
help="This model will be used for calling tools. Currently, it's best to use Open AI and Anthropic for this.",
model_type="language",
)
if selected_model:
default_models.default_tools_model = selected_model.id
st.caption("Recommended to use a capable model here, like gpt-4o, claude, etc.")
st.divider()
# Handle large context model selection
selected_model = model_selector(
"Large Context Model",
"large_context_model",
selected_id=default_models.large_context_model,
help="This model will be used for larger context generation -- recommended: Gemini",
model_type="language",
)
if selected_model:
default_models.large_context_model = selected_model.id
st.caption("Recommended to use Gemini models for larger context processing")
st.divider()
# Handle text-to-speech model selection
selected_model = model_selector(
"Default Text to Speech Model",
"default_text_to_speech_model",
selected_id=default_models.default_text_to_speech_model,
help="This is the default model for converting text to speech (podcasts, etc)",
model_type="text_to_speech",
)
st.caption("You can override this model on different podcasts")
if selected_model:
default_models.default_text_to_speech_model = selected_model.id
st.divider()
# Handle speech-to-text model selection
selected_model = model_selector(
"Default Speech to Text Model",
selected_id=default_models.default_speech_to_text_model,
help="This is the default model for converting speech to text (audio transcriptions, etc)",
model_type="speech_to_text",
key="default_speech_to_text_model",
)
if selected_model:
default_models.default_speech_to_text_model = selected_model.id
st.divider()
# Handle embedding model selection
selected_model = model_selector(
"Default Speech to Text Model",
"default_embedding_model",
selected_id=default_models.default_embedding_model,
help="This is the default model for embeddings (semantic search, etc)",
model_type="embedding",
)
if selected_model:
default_models.default_embedding_model = selected_model.id
st.warning(
"Caution: you cannot change the embedding model once there is embeddings or they will need to be regenerated"
)
for k, v in defs.items():
if v:
defs[k] = v.id
if st.button("Save Defaults"):
default_models.patch(defs)
# Auto-save when selection changes
if selected_model and (not current_value or selected_model.id != current_value):
setattr(default_models, key, selected_model.id)
default_models.update()
model_manager.refresh_defaults()
st.success("Saved")
st.toast(f"Default {model_type} model set to {selected_model.name}")
elif not selected_model and current_value:
setattr(default_models, key, None)
default_models.update()
model_manager.refresh_defaults()
st.toast(f"Default {model_type} model removed")
if caption:
st.caption(caption)
return selected_model
# Group models by type
models_by_type = {
"language": [],
"embedding": [],
"text_to_speech": [],
"speech_to_text": [],
}
for model in all_models:
if model.type in models_by_type:
models_by_type[model.type].append(model)
st.markdown("""
**Model Management Guide:** For optimal performance, refer to [Which model to choose?](https://github.com/lfnovo/open-notebook/blob/main/docs/models.md)
You can test models in the [Transformations](Transformations) page.
""")
# Language Models Section
st.subheader("🗣️ Language Models")
with st.container(border=True):
col1, col2 = st.columns([2, 1])
with col1:
st.markdown("**Configured Models**")
language_models = models_by_type["language"]
if language_models:
for model in language_models:
subcol1, subcol2 = st.columns([4, 1])
with subcol1:
st.markdown(f"{model.provider}/{model.name}")
with subcol2:
if st.button(
"🗑️", key=f"delete_lang_{model.id}", help="Delete model"
):
model.delete()
st.rerun()
else:
st.info("No language models configured")
with col2:
add_model_form("language", "main")
st.markdown("**Default Model Assignments**")
col1, col2 = st.columns(2)
with col1:
handle_default_selection(
"Chat Model",
"default_chat_model",
default_models.default_chat_model,
"Used for chat conversations",
"language",
"Pick the one that vibes with you.",
)
handle_default_selection(
"Tools Model",
"default_tools_model",
default_models.default_tools_model,
"Used for calling tools - use OpenAI or Anthropic",
"language",
"Recommended: gpt-4o, claude, qwen3, etc.",
)
with col2:
handle_default_selection(
"Transformation Model",
"default_transformation_model",
default_models.default_transformation_model,
"Used for summaries, insights, etc.",
"language",
"Can use cheaper models: gpt-4o-mini, llama3, gemma3, etc.",
)
handle_default_selection(
"Large Context Model",
"large_context_model",
default_models.large_context_model,
"Used for large context processing",
"language",
"Recommended: Gemini models",
)
# Embedding Models Section
st.subheader("🔍 Embedding Models")
with st.container(border=True):
col1, col2 = st.columns([2, 1])
with col1:
st.markdown("**Configured Models**")
embedding_models = models_by_type["embedding"]
if embedding_models:
for model in embedding_models:
subcol1, subcol2 = st.columns([4, 1])
with subcol1:
st.markdown(f"{model.provider}/{model.name}")
with subcol2:
if st.button(
"🗑️", key=f"delete_emb_{model.id}", help="Delete model"
):
model.delete()
st.rerun()
else:
st.info("No embedding models configured")
handle_default_selection(
"Default Embedding Model",
"default_embedding_model",
default_models.default_embedding_model,
"Used for semantic search and embeddings",
"embedding",
)
st.warning("⚠️ Changing embedding models requires regenerating all embeddings")
with col2:
add_model_form("embedding", "main")
# Text-to-Speech Models Section
st.subheader("🎙️ Text-to-Speech Models")
with st.container(border=True):
col1, col2 = st.columns([2, 1])
with col1:
st.markdown("**Configured Models**")
tts_models = models_by_type["text_to_speech"]
if tts_models:
for model in tts_models:
subcol1, subcol2 = st.columns([4, 1])
with subcol1:
st.markdown(f"{model.provider}/{model.name}")
with subcol2:
if st.button(
"🗑️", key=f"delete_tts_{model.id}", help="Delete model"
):
model.delete()
st.rerun()
else:
st.info("No text-to-speech models configured")
handle_default_selection(
"Default TTS Model",
"default_text_to_speech_model",
default_models.default_text_to_speech_model,
"Used for podcasts and audio generation",
"text_to_speech",
"Can be overridden per podcast",
)
with col2:
add_model_form("text_to_speech", "main")
# Speech-to-Text Models Section
st.subheader("🎤 Speech-to-Text Models")
with st.container(border=True):
col1, col2 = st.columns([2, 1])
with col1:
st.markdown("**Configured Models**")
stt_models = models_by_type["speech_to_text"]
if stt_models:
for model in stt_models:
subcol1, subcol2 = st.columns([4, 1])
with subcol1:
st.markdown(f"{model.provider}/{model.name}")
with subcol2:
if st.button(
"🗑️", key=f"delete_stt_{model.id}", help="Delete model"
):
model.delete()
st.rerun()
else:
st.info("No speech-to-text models configured")
handle_default_selection(
"Default STT Model",
"default_speech_to_text_model",
default_models.default_speech_to_text_model,
"Used for audio transcriptions",
"speech_to_text",
)
with col2:
add_model_form("speech_to_text", "main")

View file

@ -58,21 +58,26 @@ def source_panel(source_id: str, notebook_id=None, modal=False):
with c2:
transformations = Transformation.get_all(order_by="name asc")
with st.container(border=True):
transformation = st.selectbox(
"Run a transformation",
transformations,
key=f"transformation_{source.id}",
format_func=lambda x: x.name,
)
st.caption(transformation.description)
if st.button("Run"):
asyncio.run(
transform_graph.ainvoke(
input=dict(source=source, transformation=transformation)
)
if transformations:
with st.container(border=True):
transformation = st.selectbox(
"Run a transformation",
transformations,
key=f"transformation_{source.id}",
format_func=lambda x: x.name,
)
st.rerun(scope="fragment" if modal else "app")
st.caption(transformation.description if transformation else "")
if st.button("Run"):
asyncio.run(
transform_graph.ainvoke(
input=dict(source=source, transformation=transformation)
)
)
st.rerun(scope="fragment" if modal else "app")
else:
st.markdown(
"No transformations created yet. Create new Transformation to use this feature."
)
if not model_manager.embedding_model:
help = (

View file

@ -53,9 +53,9 @@ def setup_stream_state(current_notebook: Notebook) -> ChatSession:
If there is no existing thread state for this session_id, it creates a new one.
Finally, it acquires the existing state for the session from Langgraph state and sets it in the streamlit session state.
"""
assert (
current_notebook is not None and current_notebook.id
), "Current Notebook not selected properly"
assert current_notebook is not None and current_notebook.id, (
"Current Notebook not selected properly"
)
if "context_config" not in st.session_state[current_notebook.id]:
st.session_state[current_notebook.id]["context_config"] = {}
@ -99,14 +99,15 @@ def setup_stream_state(current_notebook: Notebook) -> ChatSession:
def check_migration():
if "migration_required" not in st.session_state:
st.session_state["migration_required"] = None
logger.critical("Running migration check")
logger.debug("Running migration check")
mm = MigrationManager()
if mm.needs_migration:
logger.critical("Migration required")
st.warning("The Open Notebook database needs a migration to run properly.")
if st.button("Run Migration"):
mm.run_migration_up()
st.success("Migration successful")
st.session_state["migration_required"] = False
st.rerun()
st.stop()
else:

View file

@ -1,6 +1,6 @@
[project]
name = "open-notebook"
version = "0.2.1"
version = "0.2.2"
description = "An open source implementation of a research assistant, inspired by Google Notebook LM"
authors = [
{name = "Luis Novo", email = "lfnovo@gmail.com"}
@ -13,7 +13,7 @@ classifiers = [
]
requires-python = ">=3.11,<3.13"
dependencies = [
"streamlit>=1.39.0",
"streamlit>=1.45.0",
"pydantic>=2.9.2",
"loguru>=0.7.2",
"langchain>=0.3.3",
@ -24,29 +24,29 @@ dependencies = [
"tiktoken>=0.8.0",
"streamlit-monaco>=0.1.3",
"langgraph-checkpoint-sqlite>=2.0.0",
"openai>=1.52.0",
"langchain-community>=0.3.3",
"langchain-openai>=0.2.3",
"langchain-anthropic>=0.2.3",
"langchain-ollama>=0.2.0",
"langchain-google-vertexai>=2.0.5",
"langchain-google-genai>=2.0.1",
"tomli>=2.0.2",
"google-generativeai>=0.8.3",
"langchain-groq>=0.2.1",
"langchain_mistralai>=0.2.1",
"langchain_deepseek>=0.1.3",
"tomli>=2.0.2",
"groq>=0.12.0",
"python-dotenv>=1.0.1",
"httpx[socks]>=0.27.0",
"sdblpy",
"podcastfy",
"nest-asyncio>=1.6.0",
"content-core>=1.0.0",
"content-core>=1.0.2",
"ai-prompter>=0.3",
"python-magic-bin==0.4.14; sys_platform == 'win32'"
"esperanto>=2.0.0",
"langchain-google-vertexai>=2.0.10",
]
[tool.setuptools]
package-dir = {"open_notebook" = "src/open_notebook"}
package-dir = {"open_notebook" = "open_notebook"}
[project.optional-dependencies]

View file

@ -1,3 +0,0 @@
from dotenv import load_dotenv
load_dotenv()

1021
uv.lock

File diff suppressed because it is too large Load diff