OpenAI compatible multimodal (#167)

* fix text

* remove lint from docker publish workflow

* gemini base url docs

* feat: add multimodal support for openai-compatible providers

- Add helper function to check OpenAI-compatible provider availability per mode
- Update provider detection to support language, embedding, STT, and TTS modalities
- Implement mode-specific environment variable detection (LLM, EMBEDDING, STT, TTS)
- Maintain backward compatibility with generic OPENAI_COMPATIBLE_BASE_URL
- Add comprehensive unit tests for all configuration scenarios
- Update .env.example with mode-specific environment variables
- Update provider support matrix in ai-models.md
- Create comprehensive openai-compatible.md setup guide

This enables users to configure different OpenAI-compatible endpoints for
different AI capabilities (e.g., LM Studio for language models, dedicated
server for embeddings) while maintaining full backward compatibility.

* upgrade

* chore: change docker release strategy
This commit is contained in:
Luis Novo 2025-10-19 07:44:05 -03:00 committed by GitHub
parent 8829eb40c5
commit 4c2b8257fc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 943 additions and 28 deletions

View file

@ -14,6 +14,8 @@ surreal-data/
notebook_data/
temp/
*.env
.git/
.github/
# Frontend build artifacts and dependencies
frontend/node_modules/
@ -55,3 +57,7 @@ coverage.xml
.Trashes
ehthumbs.db
Thumbs.db
.quarentena/
surreal_single_data/

View file

@ -19,6 +19,7 @@ API_URL=http://localhost:5055
# GEMINI
# this is the best model for long context and podcast generation
# GOOGLE_API_KEY=
# GEMINI_API_BASE_URL= # Optional: Override default endpoint (for Vertex AI, proxies, etc.)
# VERTEXAI
# VERTEX_PROJECT=my-google-cloud-project-name
@ -57,10 +58,22 @@ API_URL=http://localhost:5055
# VOYAGE AI
# VOYAGE_API_KEY=
# OPEN AI COMPATIBLE ENDPOINTS
# OPENAI COMPATIBLE ENDPOINTS
# Generic configuration (applies to all modalities: language, embedding, STT, TTS)
# OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
# OPENAI_COMPATIBLE_API_KEY=
# Mode-specific configuration (overrides generic if set)
# Use these when you want different endpoints for different capabilities
# OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
# OPENAI_COMPATIBLE_API_KEY_LLM=
# OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
# OPENAI_COMPATIBLE_API_KEY_EMBEDDING=
# OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1
# OPENAI_COMPATIBLE_API_KEY_STT=
# OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1
# OPENAI_COMPATIBLE_API_KEY_TTS=
# AZURE OPENAI
# AZURE_OPENAI_API_KEY=
# AZURE_OPENAI_ENDPOINT=

View file

@ -3,19 +3,10 @@ name: Build and Release
on:
workflow_dispatch:
inputs:
build_type:
description: 'Build type to create'
required: true
default: 'both'
type: choice
options:
- both
- regular
- single
push_latest:
description: 'Also push latest tags'
required: false
default: true
description: 'Also push v1-latest tags'
required: true
default: false
type: boolean
release:
types: [published]
@ -59,7 +50,6 @@ jobs:
build-regular:
needs: extract-version
runs-on: ubuntu-latest
if: github.event.inputs.build_type == 'regular' || github.event.inputs.build_type == 'both' || github.event_name == 'release'
steps:
- name: Checkout
uses: actions/checkout@v4
@ -145,7 +135,6 @@ jobs:
build-single:
needs: extract-version
runs-on: ubuntu-latest
if: github.event.inputs.build_type == 'single' || github.event.inputs.build_type == 'both' || github.event_name == 'release'
steps:
- name: Checkout
uses: actions/checkout@v4
@ -237,8 +226,7 @@ jobs:
run: |
echo "## Build Summary" >> $GITHUB_STEP_SUMMARY
echo "**Version:** ${{ needs.extract-version.outputs.version }}" >> $GITHUB_STEP_SUMMARY
echo "**Build Type:** ${{ github.event.inputs.build_type || 'both' }}" >> $GITHUB_STEP_SUMMARY
echo "**Push Latest:** ${{ github.event.inputs.push_latest || 'true' }}" >> $GITHUB_STEP_SUMMARY
echo "**Push v1-Latest:** ${{ github.event.inputs.push_latest || 'false' }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Registries:" >> $GITHUB_STEP_SUMMARY
echo "✅ **GHCR:** \`${{ env.GHCR_IMAGE }}\`" >> $GITHUB_STEP_SUMMARY

View file

@ -51,6 +51,7 @@ docker-push: docker-buildx-prepare
@echo "🔨 Building regular image..."
docker buildx build --pull \
--platform $(PLATFORMS) \
--progress=plain \
-t $(DOCKERHUB_IMAGE):$(VERSION) \
-t $(GHCR_IMAGE):$(VERSION) \
--push \
@ -58,6 +59,7 @@ docker-push: docker-buildx-prepare
@echo "🔨 Building single-container image..."
docker buildx build --pull \
--platform $(PLATFORMS) \
--progress=plain \
-f Dockerfile.single \
-t $(DOCKERHUB_IMAGE):$(VERSION)-single \
-t $(GHCR_IMAGE):$(VERSION)-single \
@ -77,6 +79,7 @@ docker-push-latest: docker-buildx-prepare
@echo "🔨 Building regular image with latest tag..."
docker buildx build --pull \
--platform $(PLATFORMS) \
--progress=plain \
-t $(DOCKERHUB_IMAGE):$(VERSION) \
-t $(DOCKERHUB_IMAGE):v1-latest \
-t $(GHCR_IMAGE):$(VERSION) \
@ -86,6 +89,7 @@ docker-push-latest: docker-buildx-prepare
@echo "🔨 Building single-container image with latest tag..."
docker buildx build --pull \
--platform $(PLATFORMS) \
--progress=plain \
-f Dockerfile.single \
-t $(DOCKERHUB_IMAGE):$(VERSION)-single \
-t $(DOCKERHUB_IMAGE):v1-latest-single \

View file

@ -17,6 +17,21 @@ from open_notebook.exceptions import InvalidInputError
router = APIRouter()
def _check_openai_compatible_support(mode: str) -> bool:
"""
Check if OpenAI-compatible provider is available for a specific mode.
Args:
mode: One of 'LLM', 'EMBEDDING', 'STT', 'TTS'
Returns:
bool: True if either generic or mode-specific env var is set
"""
generic = os.environ.get("OPENAI_COMPATIBLE_BASE_URL") is not None
specific = os.environ.get(f"OPENAI_COMPATIBLE_BASE_URL_{mode}") is not None
return generic or specific
@router.get("/models", response_model=List[ModelResponse])
async def get_models(
type: Optional[str] = Query(None, description="Filter by model type")
@ -191,22 +206,43 @@ async def get_provider_availability():
),
"mistral": os.environ.get("MISTRAL_API_KEY") is not None,
"deepseek": os.environ.get("DEEPSEEK_API_KEY") is not None,
"openai-compatible": os.environ.get("OPENAI_COMPATIBLE_BASE_URL") is not None,
"openai-compatible": (
_check_openai_compatible_support("LLM")
or _check_openai_compatible_support("EMBEDDING")
or _check_openai_compatible_support("STT")
or _check_openai_compatible_support("TTS")
),
}
available_providers = [k for k, v in provider_status.items() if v]
unavailable_providers = [k for k, v in provider_status.items() if not v]
# Get supported model types from Esperanto
esperanto_available = AIFactory.get_available_providers()
# Build supported types mapping only for available providers
supported_types: dict[str, list[str]] = {}
for provider in available_providers:
supported_types[provider] = []
for model_type, providers in esperanto_available.items():
if provider in providers:
supported_types[provider].append(model_type)
# Special handling for openai-compatible to check mode-specific availability
if provider == "openai-compatible":
# Map Esperanto model types to our environment variable modes
mode_mapping = {
"language": "LLM",
"embedding": "EMBEDDING",
"speech_to_text": "STT",
"text_to_speech": "TTS",
}
for model_type, mode in mode_mapping.items():
if model_type in esperanto_available and provider in esperanto_available[model_type]:
if _check_openai_compatible_support(mode):
supported_types[provider].append(model_type)
else:
# Standard provider detection
for model_type, providers in esperanto_available.items():
if provider in providers:
supported_types[provider].append(model_type)
return ProviderAvailabilityResponse(
available=available_providers,

View file

@ -72,7 +72,7 @@ Open Notebook uses four distinct types of AI models, each optimized for specific
| **Azure OpenAI** | ✅ | ✅ | ❌ | ❌ |
| **OpenRouter** | ✅ | ❌ | ❌ | ❌ |
| **Perplexity** | ✅ | ❌ | ❌ | ❌ |
| **OpenAI Compatible** | ✅ | ❌ | ❌ | ❌ |
| **OpenAI Compatible** | ✅ | ✅ | ✅ | ✅ |
## Model Selection Guide
@ -103,6 +103,10 @@ Open Notebook uses four distinct types of AI models, each optimized for specific
**Environment Setup**
```bash
export GEMINI_API_KEY=your_api_key_here
# Optional: Override the default Gemini API endpoint
# Use this for Vertex AI, custom proxies, or alternative endpoints
# export GEMINI_API_BASE_URL=https://your-custom-endpoint.com
```
**Recommended Models**
@ -321,22 +325,32 @@ export VOYAGE_API_KEY=your_api_key_here
---
### 🔧 OpenAI Compatible (LM Studio & Others)
**Best for**: Using any OpenAI-compatible API endpoint, including LM Studio
**Best for**: Using any OpenAI-compatible API endpoint for all AI modalities, including LM Studio
**Environment Setup**
```bash
# Generic configuration (applies to all modalities)
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
# Optional - only if your endpoint requires authentication
export OPENAI_COMPATIBLE_API_KEY=your_key_here
# Mode-specific configuration (for different endpoints per modality)
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
export OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1
export OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1
```
**Common Use Cases**
- **LM Studio**: Run models locally with a familiar UI
- **Text Generation WebUI**: Alternative local inference
- **vLLM**: High-performance inference server
- **Custom Endpoints**: Any OpenAI-compatible API
**Strengths**
- Use any OpenAI-compatible endpoint
- **NEW**: Full support for all 4 modalities (language, embeddings, STT, TTS)
- Configure different endpoints for different capabilities
- Perfect for LM Studio users
- Flexibility in model deployment
- Works with local and remote endpoints
@ -346,6 +360,8 @@ export OPENAI_COMPATIBLE_API_KEY=your_key_here
- Model availability varies by endpoint
- Some endpoints may not support all features
> **📖 Need detailed setup help?** Check our comprehensive [OpenAI-Compatible Setup Guide](openai-compatible.md) for LM Studio, Text Generation WebUI, vLLM, and other configurations.
## 🧠 Reasoning Models
Open Notebook fully supports **reasoning models** that show their transparent thinking process. These models output their internal reasoning within `<think>` tags, which Open Notebook automatically handles.
@ -490,6 +506,7 @@ Set up your API keys using environment variables. Here's the complete list:
export OPENAI_API_KEY=your_key
export ANTHROPIC_API_KEY=your_key
export GEMINI_API_KEY=your_key
export GEMINI_API_BASE_URL=https://custom-endpoint.com # Optional
# Additional Language Providers
export MISTRAL_API_KEY=your_key
@ -569,10 +586,14 @@ export ANTHROPIC_API_KEY=sk-ant-your-key-here
#### Google (Gemini)
```bash
export GEMINI_API_KEY=your-key-here
# Optional: Custom API endpoint (for Vertex AI, proxies, etc.)
# export GEMINI_API_BASE_URL=https://your-custom-endpoint.com
```
- Get your API key from [Google AI Studio](https://makersuite.google.com/app/apikey)
- Excellent for large context and TTS
- Cost-effective option
- Supports custom endpoints via `GEMINI_API_BASE_URL` for advanced deployments
#### Ollama (Local)
```bash

View file

@ -0,0 +1,568 @@
# OpenAI-Compatible Providers Setup Guide
Open Notebook supports OpenAI-compatible API endpoints across all AI modalities (language models, embeddings, speech-to-text, and text-to-speech), giving you the flexibility to use popular tools like LM Studio, Text Generation WebUI, vLLM, and custom inference servers.
## Why Choose OpenAI-Compatible Providers?
- **🆓 Cost Flexibility**: Use free local inference or choose cost-effective cloud providers
- **🔒 Privacy Control**: Run models locally or choose privacy-focused hosted services
- **🎯 Model Selection**: Access to thousands of open-source models
- **⚡ Performance Tuning**: Optimize inference for your specific hardware
- **🔧 Full Control**: Deploy on your infrastructure with your configurations
- **🌐 Universal Standard**: Works with any service that implements the OpenAI API specification
## Quick Start
### Basic Setup (All Modalities)
**For LM Studio** (simplest):
```bash
# Start LM Studio and enable server mode on port 1234
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
# Most LM Studio endpoints don't require an API key
# export OPENAI_COMPATIBLE_API_KEY=not_needed
```
**For Text Generation WebUI**:
```bash
# Start with --api flag
# python server.py --api --listen
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:5000/v1
```
**For vLLM**:
```bash
# Start vLLM server
# vllm serve MODEL_NAME --port 8000
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:8000/v1
```
### Advanced Setup (Mode-Specific Endpoints)
Use different endpoints for different capabilities:
```bash
# Language models on LM Studio
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
# Embeddings on a dedicated embedding server
export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
# Speech services on a different server
export OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1
export OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1
```
## Environment Variable Reference
### Generic Configuration
Use these when you want the same endpoint for all modalities:
| Variable | Purpose | Required |
|----------|---------|----------|
| `OPENAI_COMPATIBLE_BASE_URL` | Base URL for all AI services | Yes (unless using mode-specific) |
| `OPENAI_COMPATIBLE_API_KEY` | API key if endpoint requires auth | Optional |
**Example:**
```bash
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
export OPENAI_COMPATIBLE_API_KEY=your_key_here # If needed
```
### Mode-Specific Configuration
Use these when you want different endpoints for different capabilities:
| Variable | Purpose | Modality |
|----------|---------|----------|
| `OPENAI_COMPATIBLE_BASE_URL_LLM` | Language model endpoint | Language models |
| `OPENAI_COMPATIBLE_API_KEY_LLM` | API key for LLM endpoint | Language models |
| `OPENAI_COMPATIBLE_BASE_URL_EMBEDDING` | Embedding model endpoint | Embeddings |
| `OPENAI_COMPATIBLE_API_KEY_EMBEDDING` | API key for embedding endpoint | Embeddings |
| `OPENAI_COMPATIBLE_BASE_URL_STT` | Speech-to-text endpoint | Speech-to-Text |
| `OPENAI_COMPATIBLE_API_KEY_STT` | API key for STT endpoint | Speech-to-Text |
| `OPENAI_COMPATIBLE_BASE_URL_TTS` | Text-to-speech endpoint | Text-to-Speech |
| `OPENAI_COMPATIBLE_API_KEY_TTS` | API key for TTS endpoint | Text-to-Speech |
**Precedence**: Mode-specific variables override the generic `OPENAI_COMPATIBLE_BASE_URL`
**Example:**
```bash
# LLM on LM Studio
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
# Embeddings on dedicated server
export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=secret_key_here
```
## Common Use Cases
### LM Studio
**What is LM Studio?**
LM Studio is a desktop application for running large language models locally with a user-friendly interface.
**Setup Steps:**
1. **Download and install** LM Studio from [lmstudio.ai](https://lmstudio.ai/)
2. **Download a model** (e.g., Llama 3, Qwen, Mistral)
3. **Start the local server**:
- Go to the "Local Server" tab
- Click "Start Server"
- Note the port (default: 1234)
4. **Configure Open Notebook**:
```bash
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
```
**What works:**
- ✅ Language models (chat, completions)
- ✅ Embeddings (with embedding models)
- ❌ Speech-to-text (not supported)
- ❌ Text-to-speech (not supported)
**Tips:**
- LM Studio doesn't require an API key
- Choose quantized models (Q4, Q5) for better performance
- Monitor RAM usage - larger models need more memory
---
### Text Generation WebUI (Oobabooga)
**What is Text Generation WebUI?**
A powerful Gradio-based web interface for running Large Language Models.
**Setup Steps:**
1. **Install** following [official instructions](https://github.com/oobabooga/text-generation-webui)
2. **Download a model** using the UI or manually
3. **Start with API mode**:
```bash
python server.py --api --listen
```
4. **Configure Open Notebook**:
```bash
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:5000/v1
```
**What works:**
- ✅ Language models (excellent support)
- ✅ Embeddings (with compatible models)
- ❌ Speech services (not supported)
**Tips:**
- Use `--listen` to accept connections from Docker
- Supports more model formats than LM Studio
- Great for fine-tuned models
---
### vLLM
**What is vLLM?**
High-performance inference server optimized for serving large language models at scale.
**Setup Steps:**
1. **Install vLLM**:
```bash
pip install vllm
```
2. **Start the server**:
```bash
vllm serve meta-llama/Llama-3-8B-Instruct --port 8000
```
3. **Configure Open Notebook**:
```bash
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:8000/v1
```
**What works:**
- ✅ Language models (optimized inference)
- ✅ Embeddings (with embedding models)
- ❌ Speech services (not supported)
**Tips:**
- Best performance for production deployments
- Supports tensor parallelism for large models
- Excellent for high-throughput scenarios
---
### Custom OpenAI-Compatible Services
Many services implement the OpenAI API specification:
**Examples:**
- **Together AI**: Cloud-hosted models
- **Anyscale Endpoints**: Ray-based inference
- **Replicate**: Cloud model hosting
- **LocalAI**: Self-hosted alternative to OpenAI
- **FastChat**: Multi-model serving
**Configuration:**
```bash
# Generic setup
export OPENAI_COMPATIBLE_BASE_URL=https://api.your-service.com/v1
export OPENAI_COMPATIBLE_API_KEY=your_api_key_here
```
## Configuration Scenarios
### Scenario 1: Single Local Endpoint (Simplest)
**Use Case**: Running LM Studio for language models only
```bash
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
```
**Result**:
- ✅ Language models available
- ✅ Embeddings available (if model supports)
- ✅ Speech services available (if endpoint supports)
- All use the same endpoint
---
### Scenario 2: Separate Endpoints per Modality
**Use Case**: Language models on LM Studio, embeddings on dedicated server
```bash
# Language models on LM Studio
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
# Embeddings on specialized server
export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=embedding_key_here
```
**Result**:
- ✅ Language models use LM Studio (port 1234)
- ✅ Embeddings use specialized server (port 8080)
- ❌ Speech services not available (not configured)
---
### Scenario 3: Mixed Local and Cloud
**Use Case**: Local models for privacy, cloud for specialized tasks
```bash
# Local LLM (privacy-sensitive work)
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
# Cloud embeddings (better quality)
export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=https://api.cloud-provider.com/v1
export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=cloud_key_here
# Cloud speech services
export OPENAI_COMPATIBLE_BASE_URL_TTS=https://api.cloud-provider.com/v1
export OPENAI_COMPATIBLE_API_KEY_TTS=cloud_key_here
```
**Result**:
- ✅ Sensitive chat stays local
- ✅ High-quality embeddings from cloud
- ✅ Professional TTS from cloud
- 🔒 Privacy for conversations, cloud for non-sensitive features
---
### Scenario 4: Docker Deployment
**Use Case**: Open Notebook in Docker, LM Studio on host machine
**On macOS/Windows**:
```bash
export OPENAI_COMPATIBLE_BASE_URL=http://host.docker.internal:1234/v1
```
**On Linux**:
```bash
# Use host networking or find host IP
export OPENAI_COMPATIBLE_BASE_URL=http://172.17.0.1:1234/v1
# or use --network host in docker run
```
**Important**:
- LM Studio must be set to listen on `0.0.0.0`, not just `localhost`
- In LM Studio settings, enable "Allow network connections"
## Network Configuration
### Docker Networking
**Problem**: Docker containers can't reach `localhost` on the host
**Solutions:**
**Option 1: Use `host.docker.internal` (Mac/Windows)**
```bash
export OPENAI_COMPATIBLE_BASE_URL=http://host.docker.internal:1234/v1
```
**Option 2: Use host IP address (Linux)**
```bash
# Find host IP
ip addr show docker0 | grep inet
# Use in environment
export OPENAI_COMPATIBLE_BASE_URL=http://172.17.0.1:1234/v1
```
**Option 3: Host networking (Linux only)**
```bash
docker run --network host \
-v ./notebook_data:/app/data \
-e OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1 \
lfnovo/open_notebook:v1-latest-single
```
### Remote Servers
**Use Case**: OpenAI-compatible service on a different machine
```bash
# Replace with your server's IP or hostname
export OPENAI_COMPATIBLE_BASE_URL=http://192.168.1.100:1234/v1
```
**Security Notes:**
- ⚠️ Only use on trusted networks
- Consider using HTTPS for production
- Implement API key authentication if possible
- Use firewall rules to restrict access
### Port Conflicts
**Problem**: Default port (1234) is already in use
**Solution**: Change the port in your inference server
**LM Studio:**
- Settings → Local Server → Port → Change to different port
**Then update environment:**
```bash
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:8888/v1
```
## Troubleshooting
### Connection Refused
**Symptom**: "Connection refused" or "Could not connect to endpoint"
**Solutions:**
1. **Verify server is running**:
```bash
curl http://localhost:1234/v1/models
```
2. **Check firewall settings**: Ensure the port is not blocked
3. **For Docker**: Use `host.docker.internal` instead of `localhost`
4. **Check server binding**: Server must listen on `0.0.0.0`, not just `127.0.0.1`
---
### Models Not Found
**Symptom**: "Model not found" or "No models available"
**Solutions:**
1. **Verify model is loaded** in your inference server
2. **Check model name** matches what Open Notebook expects
3. **For LM Studio**: Ensure model is loaded in the local server tab
4. **Test endpoint**:
```bash
curl http://localhost:1234/v1/models
```
---
### Slow Performance
**Symptom**: Responses take a long time
**Solutions:**
1. **Use quantized models** (Q4, Q5 instead of full precision)
2. **Check RAM usage**: Model might be swapping to disk
3. **Reduce context length**: Smaller context = faster inference
4. **Enable GPU acceleration**: If available
5. **For vLLM**: Enable tensor parallelism for large models
---
### Authentication Errors
**Symptom**: "Unauthorized" or "Invalid API key"
**Solutions:**
1. **Set API key** if your endpoint requires it:
```bash
export OPENAI_COMPATIBLE_API_KEY=your_key_here
```
2. **Check key validity**: Test with curl:
```bash
curl -H "Authorization: Bearer YOUR_KEY" \
http://localhost:1234/v1/models
```
3. **For mode-specific**: Use the correct key variable:
```bash
export OPENAI_COMPATIBLE_API_KEY_LLM=llm_key
export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=embedding_key
```
---
### Docker Can't Reach Host
**Symptom**: Connection works locally but not from Docker
**Solutions:**
1. **Use `host.docker.internal`** (Mac/Windows):
```bash
export OPENAI_COMPATIBLE_BASE_URL=http://host.docker.internal:1234/v1
```
2. **On Linux**: Use host IP or `--network host`
3. **Check server listening**: Must listen on `0.0.0.0:1234`, not `127.0.0.1:1234`
4. **Test from inside container**:
```bash
docker exec -it open-notebook curl http://host.docker.internal:1234/v1/models
```
---
### Embeddings Not Working
**Symptom**: Search or embeddings fail
**Solutions:**
1. **Verify embedding model is loaded**: Many inference servers need explicit embedding model setup
2. **Use dedicated embedding endpoint**: If available
3. **Check model compatibility**: Not all models support embeddings
4. **For LM Studio**: Load an embedding model separately
---
### Mixed Results (Some Modes Work, Others Don't)
**Symptom**: Language models work, but embeddings or speech don't
**Solution**: Use mode-specific configuration:
```bash
# What works
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
# For embeddings, use a different provider
export OPENAI_API_KEY=your_openai_key # Fallback to OpenAI for embeddings
```
## Best Practices
### Security
1. **API Keys**:
- Use environment variables, never hardcode
- Rotate keys regularly for cloud services
- Use different keys for different services
2. **Network**:
- Only expose on trusted networks
- Use HTTPS in production
- Implement firewall rules
3. **Data Privacy**:
- Use local models for sensitive data
- Check service privacy policies
- Understand data retention policies
### Performance
1. **Model Selection**:
- Quantized models (Q4, Q5) for better speed/memory trade-off
- Smaller models for simple tasks
- Larger models only when needed
2. **Resource Management**:
- Monitor RAM and GPU usage
- Use appropriate batch sizes
- Consider model caching strategies
3. **Network**:
- Use local endpoints when possible for lower latency
- For cloud: Choose geographically close servers
### Reliability
1. **Fallback Strategy**:
```bash
# Primary: Local LLM
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
# Fallback: Use OpenAI if local is unavailable
export OPENAI_API_KEY=your_backup_key
```
2. **Health Checks**:
- Periodically test endpoints
- Monitor server status
- Set up alerts for downtime
3. **Testing**:
- Test configuration before production
- Validate all required modalities work
- Check error handling
## Getting Help
**Community Resources:**
- [Open Notebook Discord](https://discord.gg/37XJPXfz2w) - Get help with Open Notebook integration
- [LM Studio Discord](https://discord.gg/lmstudio) - LM Studio-specific support
- [Text Generation WebUI GitHub](https://github.com/oobabooga/text-generation-webui) - Issues and discussions
**Debugging Steps:**
1. **Test endpoint directly** with curl before configuring Open Notebook
2. **Check Open Notebook logs** for detailed error messages
3. **Verify environment variables** are set correctly
4. **Test with simple requests** first (list models, simple completion)
**Common curl tests:**
```bash
# List models
curl http://localhost:1234/v1/models
# Test completion
curl http://localhost:1234/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "your-model",
"messages": [{"role": "user", "content": "Hello!"}]
}'
# Test embeddings
curl http://localhost:8080/v1/embeddings \
-H "Content-Type: application/json" \
-d '{
"model": "embedding-model",
"input": "Test text"
}'
```
This guide should help you successfully configure OpenAI-compatible providers with Open Notebook. For general AI model configuration, see the [AI Models Guide](ai-models.md).

279
tests/test_models_api.py Normal file
View file

@ -0,0 +1,279 @@
from unittest.mock import patch
import pytest
from fastapi.testclient import TestClient
from api.main import app
client = TestClient(app)
class TestModelsProviderAvailability:
"""Test suite for Models Provider Availability endpoint."""
@patch("api.routers.models.os.environ.get")
@patch("api.routers.models.AIFactory.get_available_providers")
def test_generic_env_var_enables_all_modes(self, mock_esperanto, mock_env):
"""Test that OPENAI_COMPATIBLE_BASE_URL enables all 4 modes."""
# Mock environment: only generic var is set
def env_side_effect(key):
if key == "OPENAI_COMPATIBLE_BASE_URL":
return "http://localhost:1234/v1"
return None
mock_env.side_effect = env_side_effect
# Mock Esperanto response
mock_esperanto.return_value = {
"language": ["openai-compatible"],
"embedding": ["openai-compatible"],
"speech_to_text": ["openai-compatible"],
"text_to_speech": ["openai-compatible"],
}
response = client.get("/api/models/providers")
assert response.status_code == 200
data = response.json()
# openai-compatible should be available
assert "openai-compatible" in data["available"]
# Should support all 4 types
assert "openai-compatible" in data["supported_types"]
supported = data["supported_types"]["openai-compatible"]
assert "language" in supported
assert "embedding" in supported
assert "speech_to_text" in supported
assert "text_to_speech" in supported
assert len(supported) == 4
@patch("api.routers.models.os.environ.get")
@patch("api.routers.models.AIFactory.get_available_providers")
def test_mode_specific_env_vars_llm_embedding(self, mock_esperanto, mock_env):
"""Test mode-specific env vars (LLM + EMBEDDING) enable only those 2 modes."""
# Mock environment: only LLM and EMBEDDING specific vars are set
def env_side_effect(key):
if key == "OPENAI_COMPATIBLE_BASE_URL_LLM":
return "http://localhost:1234/v1"
if key == "OPENAI_COMPATIBLE_BASE_URL_EMBEDDING":
return "http://localhost:8080/v1"
return None
mock_env.side_effect = env_side_effect
# Mock Esperanto response
mock_esperanto.return_value = {
"language": ["openai-compatible"],
"embedding": ["openai-compatible"],
"speech_to_text": ["openai-compatible"],
"text_to_speech": ["openai-compatible"],
}
response = client.get("/api/models/providers")
assert response.status_code == 200
data = response.json()
# openai-compatible should be available
assert "openai-compatible" in data["available"]
# Should support only language and embedding
assert "openai-compatible" in data["supported_types"]
supported = data["supported_types"]["openai-compatible"]
assert "language" in supported
assert "embedding" in supported
assert "speech_to_text" not in supported
assert "text_to_speech" not in supported
assert len(supported) == 2
@patch("api.routers.models.os.environ.get")
@patch("api.routers.models.AIFactory.get_available_providers")
def test_no_env_vars_set(self, mock_esperanto, mock_env):
"""Test that openai-compatible is not available when no env vars are set."""
# Mock environment: no openai-compatible vars are set
def env_side_effect(key):
return None
mock_env.side_effect = env_side_effect
# Mock Esperanto response
mock_esperanto.return_value = {
"language": ["openai-compatible"],
"embedding": ["openai-compatible"],
}
response = client.get("/api/models/providers")
assert response.status_code == 200
data = response.json()
# openai-compatible should NOT be available
assert "openai-compatible" not in data["available"]
assert "openai-compatible" in data["unavailable"]
# Should not have supported_types entry
assert "openai-compatible" not in data["supported_types"]
@patch("api.routers.models.os.environ.get")
@patch("api.routers.models.AIFactory.get_available_providers")
def test_mixed_config_generic_and_mode_specific(self, mock_esperanto, mock_env):
"""Test mixed config: generic + mode-specific (generic should enable all)."""
# Mock environment: both generic and mode-specific vars are set
def env_side_effect(key):
if key == "OPENAI_COMPATIBLE_BASE_URL":
return "http://localhost:1234/v1"
if key == "OPENAI_COMPATIBLE_BASE_URL_LLM":
return "http://localhost:5678/v1"
return None
mock_env.side_effect = env_side_effect
# Mock Esperanto response
mock_esperanto.return_value = {
"language": ["openai-compatible"],
"embedding": ["openai-compatible"],
"speech_to_text": ["openai-compatible"],
"text_to_speech": ["openai-compatible"],
}
response = client.get("/api/models/providers")
assert response.status_code == 200
data = response.json()
# openai-compatible should be available
assert "openai-compatible" in data["available"]
# Generic var enables all, so all 4 should be supported
assert "openai-compatible" in data["supported_types"]
supported = data["supported_types"]["openai-compatible"]
assert "language" in supported
assert "embedding" in supported
assert "speech_to_text" in supported
assert "text_to_speech" in supported
assert len(supported) == 4
@patch("api.routers.models.os.environ.get")
@patch("api.routers.models.AIFactory.get_available_providers")
def test_individual_mode_llm_only(self, mock_esperanto, mock_env):
"""Test individual mode-specific var (LLM only)."""
# Mock environment: only LLM specific var is set
def env_side_effect(key):
if key == "OPENAI_COMPATIBLE_BASE_URL_LLM":
return "http://localhost:1234/v1"
return None
mock_env.side_effect = env_side_effect
# Mock Esperanto response
mock_esperanto.return_value = {
"language": ["openai-compatible"],
"embedding": ["openai-compatible"],
"speech_to_text": ["openai-compatible"],
"text_to_speech": ["openai-compatible"],
}
response = client.get("/api/models/providers")
assert response.status_code == 200
data = response.json()
# Should support only language
supported = data["supported_types"]["openai-compatible"]
assert supported == ["language"]
@patch("api.routers.models.os.environ.get")
@patch("api.routers.models.AIFactory.get_available_providers")
def test_individual_mode_embedding_only(self, mock_esperanto, mock_env):
"""Test individual mode-specific var (EMBEDDING only)."""
# Mock environment: only EMBEDDING specific var is set
def env_side_effect(key):
if key == "OPENAI_COMPATIBLE_BASE_URL_EMBEDDING":
return "http://localhost:8080/v1"
return None
mock_env.side_effect = env_side_effect
# Mock Esperanto response
mock_esperanto.return_value = {
"language": ["openai-compatible"],
"embedding": ["openai-compatible"],
"speech_to_text": ["openai-compatible"],
"text_to_speech": ["openai-compatible"],
}
response = client.get("/api/models/providers")
assert response.status_code == 200
data = response.json()
# Should support only embedding
supported = data["supported_types"]["openai-compatible"]
assert supported == ["embedding"]
@patch("api.routers.models.os.environ.get")
@patch("api.routers.models.AIFactory.get_available_providers")
def test_individual_mode_stt_only(self, mock_esperanto, mock_env):
"""Test individual mode-specific var (STT only)."""
# Mock environment: only STT specific var is set
def env_side_effect(key):
if key == "OPENAI_COMPATIBLE_BASE_URL_STT":
return "http://localhost:9000/v1"
return None
mock_env.side_effect = env_side_effect
# Mock Esperanto response
mock_esperanto.return_value = {
"language": ["openai-compatible"],
"embedding": ["openai-compatible"],
"speech_to_text": ["openai-compatible"],
"text_to_speech": ["openai-compatible"],
}
response = client.get("/api/models/providers")
assert response.status_code == 200
data = response.json()
# Should support only speech_to_text
supported = data["supported_types"]["openai-compatible"]
assert supported == ["speech_to_text"]
@patch("api.routers.models.os.environ.get")
@patch("api.routers.models.AIFactory.get_available_providers")
def test_individual_mode_tts_only(self, mock_esperanto, mock_env):
"""Test individual mode-specific var (TTS only)."""
# Mock environment: only TTS specific var is set
def env_side_effect(key):
if key == "OPENAI_COMPATIBLE_BASE_URL_TTS":
return "http://localhost:9000/v1"
return None
mock_env.side_effect = env_side_effect
# Mock Esperanto response
mock_esperanto.return_value = {
"language": ["openai-compatible"],
"embedding": ["openai-compatible"],
"speech_to_text": ["openai-compatible"],
"text_to_speech": ["openai-compatible"],
}
response = client.get("/api/models/providers")
assert response.status_code == 200
data = response.json()
# Should support only text_to_speech
supported = data["supported_types"]["openai-compatible"]
assert supported == ["text_to_speech"]

View file

@ -620,15 +620,15 @@ wheels = [
[[package]]
name = "esperanto"
version = "2.6.0"
version = "2.7.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "httpx" },
{ name = "pydantic" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ec/a6/088c04b8df5c89d09620869647271ef480a855734d7b17f78fcdb7f183d2/esperanto-2.6.0.tar.gz", hash = "sha256:49ae83650812ddf32e8a5b54229b5bb8393b8a0b866c77ae7e264e2adc9231a7", size = 535743, upload-time = "2025-09-26T21:51:52.844Z" }
sdist = { url = "https://files.pythonhosted.org/packages/6d/cf/0da02a603a63b3850abd14d23629f101942db5c18840b0cc6f34d7db9a04/esperanto-2.7.0.tar.gz", hash = "sha256:3861e4e20697813b19f0070a1142934bd6792077c3c174a2c3dd4b6ca0676b06", size = 553433, upload-time = "2025-10-19T02:04:30.21Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/d6/32b84cbeac1234f2e77c920b6d07e574ee6bfa3675797bb8bd76f36e7e0f/esperanto-2.6.0-py3-none-any.whl", hash = "sha256:063108274966e8e9bc19b844740ddf7646dd4bc5f6b1b7c586cac37947ffeab0", size = 129234, upload-time = "2025-09-26T21:51:51.159Z" },
{ url = "https://files.pythonhosted.org/packages/14/9c/79827f246965ed66ae8d2f3e3937e552730eaf48b270dac852a4756c7bf4/esperanto-2.7.0-py3-none-any.whl", hash = "sha256:2ea3fa98d8622d08a18dc6701ad362461de02492a3252326c70c969b3aba3db6", size = 129524, upload-time = "2025-10-19T02:04:28.57Z" },
]
[[package]]