From 4c2b8257fcf5f7cfb40eed7e14a66cc60e999ddd Mon Sep 17 00:00:00 2001
From: Luis Novo <lfnovo@gmail.com>
Date: Sun, 19 Oct 2025 07:44:05 -0300
Subject: [PATCH] OpenAI compatible multimodal (#167)

* fix text

* remove lint from docker publish workflow

* gemini base url docs

* feat: add multimodal support for openai-compatible providers

- Add helper function to check OpenAI-compatible provider availability per mode
- Update provider detection to support language, embedding, STT, and TTS modalities
- Implement mode-specific environment variable detection (LLM, EMBEDDING, STT, TTS)
- Maintain backward compatibility with generic OPENAI_COMPATIBLE_BASE_URL
- Add comprehensive unit tests for all configuration scenarios
- Update .env.example with mode-specific environment variables
- Update provider support matrix in ai-models.md
- Create comprehensive openai-compatible.md setup guide

This enables users to configure different OpenAI-compatible endpoints for
different AI capabilities (e.g., LM Studio for language models, dedicated
server for embeddings) while maintaining full backward compatibility.

* upgrade

* chore: change docker release strategy
---
 .dockerignore                           |   6 +
 .env.example                            |  15 +-
 .github/workflows/build-and-release.yml |  20 +-
 Makefile                                |   4 +
 api/routers/models.py                   |  48 +-
 docs/features/ai-models.md              |  25 +-
 docs/features/openai-compatible.md      | 568 ++++++++++++++++++++++++
 tests/test_models_api.py                | 279 ++++++++++++
 uv.lock                                 |   6 +-
 9 files changed, 943 insertions(+), 28 deletions(-)
 create mode 100644 docs/features/openai-compatible.md
 create mode 100644 tests/test_models_api.py

diff --git a/.dockerignore b/.dockerignore
index ef72e9d..cc482b3 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -14,6 +14,8 @@ surreal-data/
 notebook_data/
 temp/
 *.env
+.git/
+.github/
 
 # Frontend build artifacts and dependencies
 frontend/node_modules/
@@ -55,3 +57,7 @@ coverage.xml
 .Trashes
 ehthumbs.db
 Thumbs.db
+
+
+.quarentena/
+surreal_single_data/
\ No newline at end of file
diff --git a/.env.example b/.env.example
index 276740a..e2300a0 100644
--- a/.env.example
+++ b/.env.example
@@ -19,6 +19,7 @@ API_URL=http://localhost:5055
 # GEMINI
 # this is the best model for long context and podcast generation
 # GOOGLE_API_KEY=
+# GEMINI_API_BASE_URL=  # Optional: Override default endpoint (for Vertex AI, proxies, etc.)
 
 # VERTEXAI
 # VERTEX_PROJECT=my-google-cloud-project-name
@@ -57,10 +58,22 @@ API_URL=http://localhost:5055
 # VOYAGE AI
 # VOYAGE_API_KEY=
 
-# OPEN AI COMPATIBLE ENDPOINTS
+# OPENAI COMPATIBLE ENDPOINTS
+# Generic configuration (applies to all modalities: language, embedding, STT, TTS)
 # OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
 # OPENAI_COMPATIBLE_API_KEY=
 
+# Mode-specific configuration (overrides generic if set)
+# Use these when you want different endpoints for different capabilities
+# OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
+# OPENAI_COMPATIBLE_API_KEY_LLM=
+# OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
+# OPENAI_COMPATIBLE_API_KEY_EMBEDDING=
+# OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1
+# OPENAI_COMPATIBLE_API_KEY_STT=
+# OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1
+# OPENAI_COMPATIBLE_API_KEY_TTS=
+
 # AZURE OPENAI
 # AZURE_OPENAI_API_KEY=
 # AZURE_OPENAI_ENDPOINT=
diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 79e50f3..dd8694d 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -3,19 +3,10 @@ name: Build and Release
 on:
   workflow_dispatch:
     inputs:
-      build_type:
-        description: 'Build type to create'
-        required: true
-        default: 'both'
-        type: choice
-        options:
-          - both
-          - regular
-          - single
       push_latest:
-        description: 'Also push latest tags'
-        required: false
-        default: true
+        description: 'Also push v1-latest tags'
+        required: true
+        default: false
         type: boolean
   release:
     types: [published]
@@ -59,7 +50,6 @@ jobs:
   build-regular:
     needs: extract-version
     runs-on: ubuntu-latest
-    if: github.event.inputs.build_type == 'regular' || github.event.inputs.build_type == 'both' || github.event_name == 'release'
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -145,7 +135,6 @@ jobs:
   build-single:
     needs: extract-version
     runs-on: ubuntu-latest
-    if: github.event.inputs.build_type == 'single' || github.event.inputs.build_type == 'both' || github.event_name == 'release'
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -237,8 +226,7 @@ jobs:
         run: |
           echo "## Build Summary" >> $GITHUB_STEP_SUMMARY
           echo "**Version:** ${{ needs.extract-version.outputs.version }}" >> $GITHUB_STEP_SUMMARY
-          echo "**Build Type:** ${{ github.event.inputs.build_type || 'both' }}" >> $GITHUB_STEP_SUMMARY
-          echo "**Push Latest:** ${{ github.event.inputs.push_latest || 'true' }}" >> $GITHUB_STEP_SUMMARY
+          echo "**Push v1-Latest:** ${{ github.event.inputs.push_latest || 'false' }}" >> $GITHUB_STEP_SUMMARY
           echo "" >> $GITHUB_STEP_SUMMARY
           echo "### Registries:" >> $GITHUB_STEP_SUMMARY
           echo "✅ **GHCR:** \`${{ env.GHCR_IMAGE }}\`" >> $GITHUB_STEP_SUMMARY
diff --git a/Makefile b/Makefile
index 0aa631a..7fabe78 100644
--- a/Makefile
+++ b/Makefile
@@ -51,6 +51,7 @@ docker-push: docker-buildx-prepare
 	@echo "🔨 Building regular image..."
 	docker buildx build --pull \
 		--platform $(PLATFORMS) \
+		--progress=plain \
 		-t $(DOCKERHUB_IMAGE):$(VERSION) \
 		-t $(GHCR_IMAGE):$(VERSION) \
 		--push \
@@ -58,6 +59,7 @@ docker-push: docker-buildx-prepare
 	@echo "🔨 Building single-container image..."
 	docker buildx build --pull \
 		--platform $(PLATFORMS) \
+		--progress=plain \
 		-f Dockerfile.single \
 		-t $(DOCKERHUB_IMAGE):$(VERSION)-single \
 		-t $(GHCR_IMAGE):$(VERSION)-single \
@@ -77,6 +79,7 @@ docker-push-latest: docker-buildx-prepare
 	@echo "🔨 Building regular image with latest tag..."
 	docker buildx build --pull \
 		--platform $(PLATFORMS) \
+		--progress=plain \
 		-t $(DOCKERHUB_IMAGE):$(VERSION) \
 		-t $(DOCKERHUB_IMAGE):v1-latest \
 		-t $(GHCR_IMAGE):$(VERSION) \
@@ -86,6 +89,7 @@ docker-push-latest: docker-buildx-prepare
 	@echo "🔨 Building single-container image with latest tag..."
 	docker buildx build --pull \
 		--platform $(PLATFORMS) \
+		--progress=plain \
 		-f Dockerfile.single \
 		-t $(DOCKERHUB_IMAGE):$(VERSION)-single \
 		-t $(DOCKERHUB_IMAGE):v1-latest-single \
diff --git a/api/routers/models.py b/api/routers/models.py
index 9a5f725..ff2eb28 100644
--- a/api/routers/models.py
+++ b/api/routers/models.py
@@ -17,6 +17,21 @@ from open_notebook.exceptions import InvalidInputError
 router = APIRouter()
 
 
+def _check_openai_compatible_support(mode: str) -> bool:
+    """
+    Check if OpenAI-compatible provider is available for a specific mode.
+
+    Args:
+        mode: One of 'LLM', 'EMBEDDING', 'STT', 'TTS'
+
+    Returns:
+        bool: True if either generic or mode-specific env var is set
+    """
+    generic = os.environ.get("OPENAI_COMPATIBLE_BASE_URL") is not None
+    specific = os.environ.get(f"OPENAI_COMPATIBLE_BASE_URL_{mode}") is not None
+    return generic or specific
+
+
 @router.get("/models", response_model=List[ModelResponse])
 async def get_models(
     type: Optional[str] = Query(None, description="Filter by model type")
@@ -191,22 +206,43 @@ async def get_provider_availability():
             ),
             "mistral": os.environ.get("MISTRAL_API_KEY") is not None,
             "deepseek": os.environ.get("DEEPSEEK_API_KEY") is not None,
-            "openai-compatible": os.environ.get("OPENAI_COMPATIBLE_BASE_URL") is not None,
+            "openai-compatible": (
+                _check_openai_compatible_support("LLM")
+                or _check_openai_compatible_support("EMBEDDING")
+                or _check_openai_compatible_support("STT")
+                or _check_openai_compatible_support("TTS")
+            ),
         }
         
         available_providers = [k for k, v in provider_status.items() if v]
         unavailable_providers = [k for k, v in provider_status.items() if not v]
-        
+
         # Get supported model types from Esperanto
         esperanto_available = AIFactory.get_available_providers()
-        
+
         # Build supported types mapping only for available providers
         supported_types: dict[str, list[str]] = {}
         for provider in available_providers:
             supported_types[provider] = []
-            for model_type, providers in esperanto_available.items():
-                if provider in providers:
-                    supported_types[provider].append(model_type)
+
+            # Special handling for openai-compatible to check mode-specific availability
+            if provider == "openai-compatible":
+                # Map Esperanto model types to our environment variable modes
+                mode_mapping = {
+                    "language": "LLM",
+                    "embedding": "EMBEDDING",
+                    "speech_to_text": "STT",
+                    "text_to_speech": "TTS",
+                }
+                for model_type, mode in mode_mapping.items():
+                    if model_type in esperanto_available and provider in esperanto_available[model_type]:
+                        if _check_openai_compatible_support(mode):
+                            supported_types[provider].append(model_type)
+            else:
+                # Standard provider detection
+                for model_type, providers in esperanto_available.items():
+                    if provider in providers:
+                        supported_types[provider].append(model_type)
         
         return ProviderAvailabilityResponse(
             available=available_providers,
diff --git a/docs/features/ai-models.md b/docs/features/ai-models.md
index 2d60e1a..b0ed875 100644
--- a/docs/features/ai-models.md
+++ b/docs/features/ai-models.md
@@ -72,7 +72,7 @@ Open Notebook uses four distinct types of AI models, each optimized for specific
 | **Azure OpenAI** | ✅       | ✅        | ❌  | ❌  |
 | **OpenRouter**   | ✅       | ❌        | ❌  | ❌  |
 | **Perplexity**   | ✅       | ❌        | ❌  | ❌  |
-| **OpenAI Compatible** | ✅       | ❌        | ❌  | ❌  |
+| **OpenAI Compatible** | ✅       | ✅        | ✅  | ✅  |
 
 ## Model Selection Guide
 
@@ -103,6 +103,10 @@ Open Notebook uses four distinct types of AI models, each optimized for specific
 **Environment Setup**
 ```bash
 export GEMINI_API_KEY=your_api_key_here
+
+# Optional: Override the default Gemini API endpoint
+# Use this for Vertex AI, custom proxies, or alternative endpoints
+# export GEMINI_API_BASE_URL=https://your-custom-endpoint.com
 ```
 
 **Recommended Models**
@@ -321,22 +325,32 @@ export VOYAGE_API_KEY=your_api_key_here
 ---
 
 ### 🔧 OpenAI Compatible (LM Studio & Others)
-**Best for**: Using any OpenAI-compatible API endpoint, including LM Studio
+**Best for**: Using any OpenAI-compatible API endpoint for all AI modalities, including LM Studio
 
 **Environment Setup**
 ```bash
+# Generic configuration (applies to all modalities)
 export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
 # Optional - only if your endpoint requires authentication
 export OPENAI_COMPATIBLE_API_KEY=your_key_here
+
+# Mode-specific configuration (for different endpoints per modality)
+export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
+export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
+export OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1
+export OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1
 ```
 
 **Common Use Cases**
 - **LM Studio**: Run models locally with a familiar UI
 - **Text Generation WebUI**: Alternative local inference
+- **vLLM**: High-performance inference server
 - **Custom Endpoints**: Any OpenAI-compatible API
 
 **Strengths**
 - Use any OpenAI-compatible endpoint
+- **NEW**: Full support for all 4 modalities (language, embeddings, STT, TTS)
+- Configure different endpoints for different capabilities
 - Perfect for LM Studio users
 - Flexibility in model deployment
 - Works with local and remote endpoints
@@ -346,6 +360,8 @@ export OPENAI_COMPATIBLE_API_KEY=your_key_here
 - Model availability varies by endpoint
 - Some endpoints may not support all features
 
+> **📖 Need detailed setup help?** Check our comprehensive [OpenAI-Compatible Setup Guide](openai-compatible.md) for LM Studio, Text Generation WebUI, vLLM, and other configurations.
+
 ## 🧠 Reasoning Models
 
 Open Notebook fully supports **reasoning models** that show their transparent thinking process. These models output their internal reasoning within `<think>` tags, which Open Notebook automatically handles.
@@ -490,6 +506,7 @@ Set up your API keys using environment variables. Here's the complete list:
 export OPENAI_API_KEY=your_key
 export ANTHROPIC_API_KEY=your_key
 export GEMINI_API_KEY=your_key
+export GEMINI_API_BASE_URL=https://custom-endpoint.com  # Optional
 
 # Additional Language Providers
 export MISTRAL_API_KEY=your_key
@@ -569,10 +586,14 @@ export ANTHROPIC_API_KEY=sk-ant-your-key-here
 #### Google (Gemini)
 ```bash
 export GEMINI_API_KEY=your-key-here
+
+# Optional: Custom API endpoint (for Vertex AI, proxies, etc.)
+# export GEMINI_API_BASE_URL=https://your-custom-endpoint.com
 ```
 - Get your API key from [Google AI Studio](https://makersuite.google.com/app/apikey)
 - Excellent for large context and TTS
 - Cost-effective option
+- Supports custom endpoints via `GEMINI_API_BASE_URL` for advanced deployments
 
 #### Ollama (Local)
 ```bash
diff --git a/docs/features/openai-compatible.md b/docs/features/openai-compatible.md
new file mode 100644
index 0000000..9bcd974
--- /dev/null
+++ b/docs/features/openai-compatible.md
@@ -0,0 +1,568 @@
+# OpenAI-Compatible Providers Setup Guide
+
+Open Notebook supports OpenAI-compatible API endpoints across all AI modalities (language models, embeddings, speech-to-text, and text-to-speech), giving you the flexibility to use popular tools like LM Studio, Text Generation WebUI, vLLM, and custom inference servers.
+
+## Why Choose OpenAI-Compatible Providers?
+
+- **🆓 Cost Flexibility**: Use free local inference or choose cost-effective cloud providers
+- **🔒 Privacy Control**: Run models locally or choose privacy-focused hosted services
+- **🎯 Model Selection**: Access to thousands of open-source models
+- **⚡ Performance Tuning**: Optimize inference for your specific hardware
+- **🔧 Full Control**: Deploy on your infrastructure with your configurations
+- **🌐 Universal Standard**: Works with any service that implements the OpenAI API specification
+
+## Quick Start
+
+### Basic Setup (All Modalities)
+
+**For LM Studio** (simplest):
+```bash
+# Start LM Studio and enable server mode on port 1234
+export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
+
+# Most LM Studio endpoints don't require an API key
+# export OPENAI_COMPATIBLE_API_KEY=not_needed
+```
+
+**For Text Generation WebUI**:
+```bash
+# Start with --api flag
+# python server.py --api --listen
+
+export OPENAI_COMPATIBLE_BASE_URL=http://localhost:5000/v1
+```
+
+**For vLLM**:
+```bash
+# Start vLLM server
+# vllm serve MODEL_NAME --port 8000
+
+export OPENAI_COMPATIBLE_BASE_URL=http://localhost:8000/v1
+```
+
+### Advanced Setup (Mode-Specific Endpoints)
+
+Use different endpoints for different capabilities:
+
+```bash
+# Language models on LM Studio
+export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
+
+# Embeddings on a dedicated embedding server
+export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
+
+# Speech services on a different server
+export OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1
+export OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1
+```
+
+## Environment Variable Reference
+
+### Generic Configuration
+
+Use these when you want the same endpoint for all modalities:
+
+| Variable | Purpose | Required |
+|----------|---------|----------|
+| `OPENAI_COMPATIBLE_BASE_URL` | Base URL for all AI services | Yes (unless using mode-specific) |
+| `OPENAI_COMPATIBLE_API_KEY` | API key if endpoint requires auth | Optional |
+
+**Example:**
+```bash
+export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
+export OPENAI_COMPATIBLE_API_KEY=your_key_here  # If needed
+```
+
+### Mode-Specific Configuration
+
+Use these when you want different endpoints for different capabilities:
+
+| Variable | Purpose | Modality |
+|----------|---------|----------|
+| `OPENAI_COMPATIBLE_BASE_URL_LLM` | Language model endpoint | Language models |
+| `OPENAI_COMPATIBLE_API_KEY_LLM` | API key for LLM endpoint | Language models |
+| `OPENAI_COMPATIBLE_BASE_URL_EMBEDDING` | Embedding model endpoint | Embeddings |
+| `OPENAI_COMPATIBLE_API_KEY_EMBEDDING` | API key for embedding endpoint | Embeddings |
+| `OPENAI_COMPATIBLE_BASE_URL_STT` | Speech-to-text endpoint | Speech-to-Text |
+| `OPENAI_COMPATIBLE_API_KEY_STT` | API key for STT endpoint | Speech-to-Text |
+| `OPENAI_COMPATIBLE_BASE_URL_TTS` | Text-to-speech endpoint | Text-to-Speech |
+| `OPENAI_COMPATIBLE_API_KEY_TTS` | API key for TTS endpoint | Text-to-Speech |
+
+**Precedence**: Mode-specific variables override the generic `OPENAI_COMPATIBLE_BASE_URL`
+
+**Example:**
+```bash
+# LLM on LM Studio
+export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
+
+# Embeddings on dedicated server
+export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
+export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=secret_key_here
+```
+
+## Common Use Cases
+
+### LM Studio
+
+**What is LM Studio?**
+LM Studio is a desktop application for running large language models locally with a user-friendly interface.
+
+**Setup Steps:**
+1. **Download and install** LM Studio from [lmstudio.ai](https://lmstudio.ai/)
+2. **Download a model** (e.g., Llama 3, Qwen, Mistral)
+3. **Start the local server**:
+   - Go to the "Local Server" tab
+   - Click "Start Server"
+   - Note the port (default: 1234)
+
+4. **Configure Open Notebook**:
+```bash
+export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
+```
+
+**What works:**
+- ✅ Language models (chat, completions)
+- ✅ Embeddings (with embedding models)
+- ❌ Speech-to-text (not supported)
+- ❌ Text-to-speech (not supported)
+
+**Tips:**
+- LM Studio doesn't require an API key
+- Choose quantized models (Q4, Q5) for better performance
+- Monitor RAM usage - larger models need more memory
+
+---
+
+### Text Generation WebUI (Oobabooga)
+
+**What is Text Generation WebUI?**
+A powerful Gradio-based web interface for running Large Language Models.
+
+**Setup Steps:**
+1. **Install** following [official instructions](https://github.com/oobabooga/text-generation-webui)
+2. **Download a model** using the UI or manually
+3. **Start with API mode**:
+```bash
+python server.py --api --listen
+```
+
+4. **Configure Open Notebook**:
+```bash
+export OPENAI_COMPATIBLE_BASE_URL=http://localhost:5000/v1
+```
+
+**What works:**
+- ✅ Language models (excellent support)
+- ✅ Embeddings (with compatible models)
+- ❌ Speech services (not supported)
+
+**Tips:**
+- Use `--listen` to accept connections from Docker
+- Supports more model formats than LM Studio
+- Great for fine-tuned models
+
+---
+
+### vLLM
+
+**What is vLLM?**
+High-performance inference server optimized for serving large language models at scale.
+
+**Setup Steps:**
+1. **Install vLLM**:
+```bash
+pip install vllm
+```
+
+2. **Start the server**:
+```bash
+vllm serve meta-llama/Llama-3-8B-Instruct --port 8000
+```
+
+3. **Configure Open Notebook**:
+```bash
+export OPENAI_COMPATIBLE_BASE_URL=http://localhost:8000/v1
+```
+
+**What works:**
+- ✅ Language models (optimized inference)
+- ✅ Embeddings (with embedding models)
+- ❌ Speech services (not supported)
+
+**Tips:**
+- Best performance for production deployments
+- Supports tensor parallelism for large models
+- Excellent for high-throughput scenarios
+
+---
+
+### Custom OpenAI-Compatible Services
+
+Many services implement the OpenAI API specification:
+
+**Examples:**
+- **Together AI**: Cloud-hosted models
+- **Anyscale Endpoints**: Ray-based inference
+- **Replicate**: Cloud model hosting
+- **LocalAI**: Self-hosted alternative to OpenAI
+- **FastChat**: Multi-model serving
+
+**Configuration:**
+```bash
+# Generic setup
+export OPENAI_COMPATIBLE_BASE_URL=https://api.your-service.com/v1
+export OPENAI_COMPATIBLE_API_KEY=your_api_key_here
+```
+
+## Configuration Scenarios
+
+### Scenario 1: Single Local Endpoint (Simplest)
+
+**Use Case**: Running LM Studio for language models only
+
+```bash
+export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
+```
+
+**Result**:
+- ✅ Language models available
+- ✅ Embeddings available (if model supports)
+- ✅ Speech services available (if endpoint supports)
+- All use the same endpoint
+
+---
+
+### Scenario 2: Separate Endpoints per Modality
+
+**Use Case**: Language models on LM Studio, embeddings on dedicated server
+
+```bash
+# Language models on LM Studio
+export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
+
+# Embeddings on specialized server
+export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
+export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=embedding_key_here
+```
+
+**Result**:
+- ✅ Language models use LM Studio (port 1234)
+- ✅ Embeddings use specialized server (port 8080)
+- ❌ Speech services not available (not configured)
+
+---
+
+### Scenario 3: Mixed Local and Cloud
+
+**Use Case**: Local models for privacy, cloud for specialized tasks
+
+```bash
+# Local LLM (privacy-sensitive work)
+export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
+
+# Cloud embeddings (better quality)
+export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=https://api.cloud-provider.com/v1
+export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=cloud_key_here
+
+# Cloud speech services
+export OPENAI_COMPATIBLE_BASE_URL_TTS=https://api.cloud-provider.com/v1
+export OPENAI_COMPATIBLE_API_KEY_TTS=cloud_key_here
+```
+
+**Result**:
+- ✅ Sensitive chat stays local
+- ✅ High-quality embeddings from cloud
+- ✅ Professional TTS from cloud
+- 🔒 Privacy for conversations, cloud for non-sensitive features
+
+---
+
+### Scenario 4: Docker Deployment
+
+**Use Case**: Open Notebook in Docker, LM Studio on host machine
+
+**On macOS/Windows**:
+```bash
+export OPENAI_COMPATIBLE_BASE_URL=http://host.docker.internal:1234/v1
+```
+
+**On Linux**:
+```bash
+# Use host networking or find host IP
+export OPENAI_COMPATIBLE_BASE_URL=http://172.17.0.1:1234/v1
+# or use --network host in docker run
+```
+
+**Important**:
+- LM Studio must be set to listen on `0.0.0.0`, not just `localhost`
+- In LM Studio settings, enable "Allow network connections"
+
+## Network Configuration
+
+### Docker Networking
+
+**Problem**: Docker containers can't reach `localhost` on the host
+
+**Solutions:**
+
+**Option 1: Use `host.docker.internal` (Mac/Windows)**
+```bash
+export OPENAI_COMPATIBLE_BASE_URL=http://host.docker.internal:1234/v1
+```
+
+**Option 2: Use host IP address (Linux)**
+```bash
+# Find host IP
+ip addr show docker0 | grep inet
+
+# Use in environment
+export OPENAI_COMPATIBLE_BASE_URL=http://172.17.0.1:1234/v1
+```
+
+**Option 3: Host networking (Linux only)**
+```bash
+docker run --network host \
+  -v ./notebook_data:/app/data \
+  -e OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1 \
+  lfnovo/open_notebook:v1-latest-single
+```
+
+### Remote Servers
+
+**Use Case**: OpenAI-compatible service on a different machine
+
+```bash
+# Replace with your server's IP or hostname
+export OPENAI_COMPATIBLE_BASE_URL=http://192.168.1.100:1234/v1
+```
+
+**Security Notes:**
+- ⚠️ Only use on trusted networks
+- Consider using HTTPS for production
+- Implement API key authentication if possible
+- Use firewall rules to restrict access
+
+### Port Conflicts
+
+**Problem**: Default port (1234) is already in use
+
+**Solution**: Change the port in your inference server
+
+**LM Studio:**
+- Settings → Local Server → Port → Change to different port
+
+**Then update environment:**
+```bash
+export OPENAI_COMPATIBLE_BASE_URL=http://localhost:8888/v1
+```
+
+## Troubleshooting
+
+### Connection Refused
+
+**Symptom**: "Connection refused" or "Could not connect to endpoint"
+
+**Solutions:**
+1. **Verify server is running**:
+   ```bash
+   curl http://localhost:1234/v1/models
+   ```
+
+2. **Check firewall settings**: Ensure the port is not blocked
+
+3. **For Docker**: Use `host.docker.internal` instead of `localhost`
+
+4. **Check server binding**: Server must listen on `0.0.0.0`, not just `127.0.0.1`
+
+---
+
+### Models Not Found
+
+**Symptom**: "Model not found" or "No models available"
+
+**Solutions:**
+1. **Verify model is loaded** in your inference server
+2. **Check model name** matches what Open Notebook expects
+3. **For LM Studio**: Ensure model is loaded in the local server tab
+4. **Test endpoint**:
+   ```bash
+   curl http://localhost:1234/v1/models
+   ```
+
+---
+
+### Slow Performance
+
+**Symptom**: Responses take a long time
+
+**Solutions:**
+1. **Use quantized models** (Q4, Q5 instead of full precision)
+2. **Check RAM usage**: Model might be swapping to disk
+3. **Reduce context length**: Smaller context = faster inference
+4. **Enable GPU acceleration**: If available
+5. **For vLLM**: Enable tensor parallelism for large models
+
+---
+
+### Authentication Errors
+
+**Symptom**: "Unauthorized" or "Invalid API key"
+
+**Solutions:**
+1. **Set API key** if your endpoint requires it:
+   ```bash
+   export OPENAI_COMPATIBLE_API_KEY=your_key_here
+   ```
+
+2. **Check key validity**: Test with curl:
+   ```bash
+   curl -H "Authorization: Bearer YOUR_KEY" \
+     http://localhost:1234/v1/models
+   ```
+
+3. **For mode-specific**: Use the correct key variable:
+   ```bash
+   export OPENAI_COMPATIBLE_API_KEY_LLM=llm_key
+   export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=embedding_key
+   ```
+
+---
+
+### Docker Can't Reach Host
+
+**Symptom**: Connection works locally but not from Docker
+
+**Solutions:**
+1. **Use `host.docker.internal`** (Mac/Windows):
+   ```bash
+   export OPENAI_COMPATIBLE_BASE_URL=http://host.docker.internal:1234/v1
+   ```
+
+2. **On Linux**: Use host IP or `--network host`
+
+3. **Check server listening**: Must listen on `0.0.0.0:1234`, not `127.0.0.1:1234`
+
+4. **Test from inside container**:
+   ```bash
+   docker exec -it open-notebook curl http://host.docker.internal:1234/v1/models
+   ```
+
+---
+
+### Embeddings Not Working
+
+**Symptom**: Search or embeddings fail
+
+**Solutions:**
+1. **Verify embedding model is loaded**: Many inference servers need explicit embedding model setup
+2. **Use dedicated embedding endpoint**: If available
+3. **Check model compatibility**: Not all models support embeddings
+4. **For LM Studio**: Load an embedding model separately
+
+---
+
+### Mixed Results (Some Modes Work, Others Don't)
+
+**Symptom**: Language models work, but embeddings or speech don't
+
+**Solution**: Use mode-specific configuration:
+```bash
+# What works
+export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
+
+# For embeddings, use a different provider
+export OPENAI_API_KEY=your_openai_key  # Fallback to OpenAI for embeddings
+```
+
+## Best Practices
+
+### Security
+
+1. **API Keys**:
+   - Use environment variables, never hardcode
+   - Rotate keys regularly for cloud services
+   - Use different keys for different services
+
+2. **Network**:
+   - Only expose on trusted networks
+   - Use HTTPS in production
+   - Implement firewall rules
+
+3. **Data Privacy**:
+   - Use local models for sensitive data
+   - Check service privacy policies
+   - Understand data retention policies
+
+### Performance
+
+1. **Model Selection**:
+   - Quantized models (Q4, Q5) for better speed/memory trade-off
+   - Smaller models for simple tasks
+   - Larger models only when needed
+
+2. **Resource Management**:
+   - Monitor RAM and GPU usage
+   - Use appropriate batch sizes
+   - Consider model caching strategies
+
+3. **Network**:
+   - Use local endpoints when possible for lower latency
+   - For cloud: Choose geographically close servers
+
+### Reliability
+
+1. **Fallback Strategy**:
+   ```bash
+   # Primary: Local LLM
+   export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
+
+   # Fallback: Use OpenAI if local is unavailable
+   export OPENAI_API_KEY=your_backup_key
+   ```
+
+2. **Health Checks**:
+   - Periodically test endpoints
+   - Monitor server status
+   - Set up alerts for downtime
+
+3. **Testing**:
+   - Test configuration before production
+   - Validate all required modalities work
+   - Check error handling
+
+## Getting Help
+
+**Community Resources:**
+- [Open Notebook Discord](https://discord.gg/37XJPXfz2w) - Get help with Open Notebook integration
+- [LM Studio Discord](https://discord.gg/lmstudio) - LM Studio-specific support
+- [Text Generation WebUI GitHub](https://github.com/oobabooga/text-generation-webui) - Issues and discussions
+
+**Debugging Steps:**
+1. **Test endpoint directly** with curl before configuring Open Notebook
+2. **Check Open Notebook logs** for detailed error messages
+3. **Verify environment variables** are set correctly
+4. **Test with simple requests** first (list models, simple completion)
+
+**Common curl tests:**
+```bash
+# List models
+curl http://localhost:1234/v1/models
+
+# Test completion
+curl http://localhost:1234/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "your-model",
+    "messages": [{"role": "user", "content": "Hello!"}]
+  }'
+
+# Test embeddings
+curl http://localhost:8080/v1/embeddings \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "embedding-model",
+    "input": "Test text"
+  }'
+```
+
+This guide should help you successfully configure OpenAI-compatible providers with Open Notebook. For general AI model configuration, see the [AI Models Guide](ai-models.md).
\ No newline at end of file
diff --git a/tests/test_models_api.py b/tests/test_models_api.py
new file mode 100644
index 0000000..91ac3e7
--- /dev/null
+++ b/tests/test_models_api.py
@@ -0,0 +1,279 @@
+from unittest.mock import patch
+
+import pytest
+from fastapi.testclient import TestClient
+
+from api.main import app
+
+client = TestClient(app)
+
+
+class TestModelsProviderAvailability:
+    """Test suite for Models Provider Availability endpoint."""
+
+    @patch("api.routers.models.os.environ.get")
+    @patch("api.routers.models.AIFactory.get_available_providers")
+    def test_generic_env_var_enables_all_modes(self, mock_esperanto, mock_env):
+        """Test that OPENAI_COMPATIBLE_BASE_URL enables all 4 modes."""
+
+        # Mock environment: only generic var is set
+        def env_side_effect(key):
+            if key == "OPENAI_COMPATIBLE_BASE_URL":
+                return "http://localhost:1234/v1"
+            return None
+
+        mock_env.side_effect = env_side_effect
+
+        # Mock Esperanto response
+        mock_esperanto.return_value = {
+            "language": ["openai-compatible"],
+            "embedding": ["openai-compatible"],
+            "speech_to_text": ["openai-compatible"],
+            "text_to_speech": ["openai-compatible"],
+        }
+
+        response = client.get("/api/models/providers")
+
+        assert response.status_code == 200
+        data = response.json()
+
+        # openai-compatible should be available
+        assert "openai-compatible" in data["available"]
+
+        # Should support all 4 types
+        assert "openai-compatible" in data["supported_types"]
+        supported = data["supported_types"]["openai-compatible"]
+        assert "language" in supported
+        assert "embedding" in supported
+        assert "speech_to_text" in supported
+        assert "text_to_speech" in supported
+        assert len(supported) == 4
+
+    @patch("api.routers.models.os.environ.get")
+    @patch("api.routers.models.AIFactory.get_available_providers")
+    def test_mode_specific_env_vars_llm_embedding(self, mock_esperanto, mock_env):
+        """Test mode-specific env vars (LLM + EMBEDDING) enable only those 2 modes."""
+
+        # Mock environment: only LLM and EMBEDDING specific vars are set
+        def env_side_effect(key):
+            if key == "OPENAI_COMPATIBLE_BASE_URL_LLM":
+                return "http://localhost:1234/v1"
+            if key == "OPENAI_COMPATIBLE_BASE_URL_EMBEDDING":
+                return "http://localhost:8080/v1"
+            return None
+
+        mock_env.side_effect = env_side_effect
+
+        # Mock Esperanto response
+        mock_esperanto.return_value = {
+            "language": ["openai-compatible"],
+            "embedding": ["openai-compatible"],
+            "speech_to_text": ["openai-compatible"],
+            "text_to_speech": ["openai-compatible"],
+        }
+
+        response = client.get("/api/models/providers")
+
+        assert response.status_code == 200
+        data = response.json()
+
+        # openai-compatible should be available
+        assert "openai-compatible" in data["available"]
+
+        # Should support only language and embedding
+        assert "openai-compatible" in data["supported_types"]
+        supported = data["supported_types"]["openai-compatible"]
+        assert "language" in supported
+        assert "embedding" in supported
+        assert "speech_to_text" not in supported
+        assert "text_to_speech" not in supported
+        assert len(supported) == 2
+
+    @patch("api.routers.models.os.environ.get")
+    @patch("api.routers.models.AIFactory.get_available_providers")
+    def test_no_env_vars_set(self, mock_esperanto, mock_env):
+        """Test that openai-compatible is not available when no env vars are set."""
+
+        # Mock environment: no openai-compatible vars are set
+        def env_side_effect(key):
+            return None
+
+        mock_env.side_effect = env_side_effect
+
+        # Mock Esperanto response
+        mock_esperanto.return_value = {
+            "language": ["openai-compatible"],
+            "embedding": ["openai-compatible"],
+        }
+
+        response = client.get("/api/models/providers")
+
+        assert response.status_code == 200
+        data = response.json()
+
+        # openai-compatible should NOT be available
+        assert "openai-compatible" not in data["available"]
+        assert "openai-compatible" in data["unavailable"]
+
+        # Should not have supported_types entry
+        assert "openai-compatible" not in data["supported_types"]
+
+    @patch("api.routers.models.os.environ.get")
+    @patch("api.routers.models.AIFactory.get_available_providers")
+    def test_mixed_config_generic_and_mode_specific(self, mock_esperanto, mock_env):
+        """Test mixed config: generic + mode-specific (generic should enable all)."""
+
+        # Mock environment: both generic and mode-specific vars are set
+        def env_side_effect(key):
+            if key == "OPENAI_COMPATIBLE_BASE_URL":
+                return "http://localhost:1234/v1"
+            if key == "OPENAI_COMPATIBLE_BASE_URL_LLM":
+                return "http://localhost:5678/v1"
+            return None
+
+        mock_env.side_effect = env_side_effect
+
+        # Mock Esperanto response
+        mock_esperanto.return_value = {
+            "language": ["openai-compatible"],
+            "embedding": ["openai-compatible"],
+            "speech_to_text": ["openai-compatible"],
+            "text_to_speech": ["openai-compatible"],
+        }
+
+        response = client.get("/api/models/providers")
+
+        assert response.status_code == 200
+        data = response.json()
+
+        # openai-compatible should be available
+        assert "openai-compatible" in data["available"]
+
+        # Generic var enables all, so all 4 should be supported
+        assert "openai-compatible" in data["supported_types"]
+        supported = data["supported_types"]["openai-compatible"]
+        assert "language" in supported
+        assert "embedding" in supported
+        assert "speech_to_text" in supported
+        assert "text_to_speech" in supported
+        assert len(supported) == 4
+
+    @patch("api.routers.models.os.environ.get")
+    @patch("api.routers.models.AIFactory.get_available_providers")
+    def test_individual_mode_llm_only(self, mock_esperanto, mock_env):
+        """Test individual mode-specific var (LLM only)."""
+
+        # Mock environment: only LLM specific var is set
+        def env_side_effect(key):
+            if key == "OPENAI_COMPATIBLE_BASE_URL_LLM":
+                return "http://localhost:1234/v1"
+            return None
+
+        mock_env.side_effect = env_side_effect
+
+        # Mock Esperanto response
+        mock_esperanto.return_value = {
+            "language": ["openai-compatible"],
+            "embedding": ["openai-compatible"],
+            "speech_to_text": ["openai-compatible"],
+            "text_to_speech": ["openai-compatible"],
+        }
+
+        response = client.get("/api/models/providers")
+
+        assert response.status_code == 200
+        data = response.json()
+
+        # Should support only language
+        supported = data["supported_types"]["openai-compatible"]
+        assert supported == ["language"]
+
+    @patch("api.routers.models.os.environ.get")
+    @patch("api.routers.models.AIFactory.get_available_providers")
+    def test_individual_mode_embedding_only(self, mock_esperanto, mock_env):
+        """Test individual mode-specific var (EMBEDDING only)."""
+
+        # Mock environment: only EMBEDDING specific var is set
+        def env_side_effect(key):
+            if key == "OPENAI_COMPATIBLE_BASE_URL_EMBEDDING":
+                return "http://localhost:8080/v1"
+            return None
+
+        mock_env.side_effect = env_side_effect
+
+        # Mock Esperanto response
+        mock_esperanto.return_value = {
+            "language": ["openai-compatible"],
+            "embedding": ["openai-compatible"],
+            "speech_to_text": ["openai-compatible"],
+            "text_to_speech": ["openai-compatible"],
+        }
+
+        response = client.get("/api/models/providers")
+
+        assert response.status_code == 200
+        data = response.json()
+
+        # Should support only embedding
+        supported = data["supported_types"]["openai-compatible"]
+        assert supported == ["embedding"]
+
+    @patch("api.routers.models.os.environ.get")
+    @patch("api.routers.models.AIFactory.get_available_providers")
+    def test_individual_mode_stt_only(self, mock_esperanto, mock_env):
+        """Test individual mode-specific var (STT only)."""
+
+        # Mock environment: only STT specific var is set
+        def env_side_effect(key):
+            if key == "OPENAI_COMPATIBLE_BASE_URL_STT":
+                return "http://localhost:9000/v1"
+            return None
+
+        mock_env.side_effect = env_side_effect
+
+        # Mock Esperanto response
+        mock_esperanto.return_value = {
+            "language": ["openai-compatible"],
+            "embedding": ["openai-compatible"],
+            "speech_to_text": ["openai-compatible"],
+            "text_to_speech": ["openai-compatible"],
+        }
+
+        response = client.get("/api/models/providers")
+
+        assert response.status_code == 200
+        data = response.json()
+
+        # Should support only speech_to_text
+        supported = data["supported_types"]["openai-compatible"]
+        assert supported == ["speech_to_text"]
+
+    @patch("api.routers.models.os.environ.get")
+    @patch("api.routers.models.AIFactory.get_available_providers")
+    def test_individual_mode_tts_only(self, mock_esperanto, mock_env):
+        """Test individual mode-specific var (TTS only)."""
+
+        # Mock environment: only TTS specific var is set
+        def env_side_effect(key):
+            if key == "OPENAI_COMPATIBLE_BASE_URL_TTS":
+                return "http://localhost:9000/v1"
+            return None
+
+        mock_env.side_effect = env_side_effect
+
+        # Mock Esperanto response
+        mock_esperanto.return_value = {
+            "language": ["openai-compatible"],
+            "embedding": ["openai-compatible"],
+            "speech_to_text": ["openai-compatible"],
+            "text_to_speech": ["openai-compatible"],
+        }
+
+        response = client.get("/api/models/providers")
+
+        assert response.status_code == 200
+        data = response.json()
+
+        # Should support only text_to_speech
+        supported = data["supported_types"]["openai-compatible"]
+        assert supported == ["text_to_speech"]
diff --git a/uv.lock b/uv.lock
index e5a0971..40956ff 100644
--- a/uv.lock
+++ b/uv.lock
@@ -620,15 +620,15 @@ wheels = [
 
 [[package]]
 name = "esperanto"
-version = "2.6.0"
+version = "2.7.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ec/a6/088c04b8df5c89d09620869647271ef480a855734d7b17f78fcdb7f183d2/esperanto-2.6.0.tar.gz", hash = "sha256:49ae83650812ddf32e8a5b54229b5bb8393b8a0b866c77ae7e264e2adc9231a7", size = 535743, upload-time = "2025-09-26T21:51:52.844Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/cf/0da02a603a63b3850abd14d23629f101942db5c18840b0cc6f34d7db9a04/esperanto-2.7.0.tar.gz", hash = "sha256:3861e4e20697813b19f0070a1142934bd6792077c3c174a2c3dd4b6ca0676b06", size = 553433, upload-time = "2025-10-19T02:04:30.21Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/04/d6/32b84cbeac1234f2e77c920b6d07e574ee6bfa3675797bb8bd76f36e7e0f/esperanto-2.6.0-py3-none-any.whl", hash = "sha256:063108274966e8e9bc19b844740ddf7646dd4bc5f6b1b7c586cac37947ffeab0", size = 129234, upload-time = "2025-09-26T21:51:51.159Z" },
+    { url = "https://files.pythonhosted.org/packages/14/9c/79827f246965ed66ae8d2f3e3937e552730eaf48b270dac852a4756c7bf4/esperanto-2.7.0-py3-none-any.whl", hash = "sha256:2ea3fa98d8622d08a18dc6701ad362461de02492a3252326c70c969b3aba3db6", size = 129524, upload-time = "2025-10-19T02:04:28.57Z" },
 ]
 
 [[package]]