From 4c2b8257fcf5f7cfb40eed7e14a66cc60e999ddd Mon Sep 17 00:00:00 2001 From: Luis Novo Date: Sun, 19 Oct 2025 07:44:05 -0300 Subject: [PATCH] OpenAI compatible multimodal (#167) * fix text * remove lint from docker publish workflow * gemini base url docs * feat: add multimodal support for openai-compatible providers - Add helper function to check OpenAI-compatible provider availability per mode - Update provider detection to support language, embedding, STT, and TTS modalities - Implement mode-specific environment variable detection (LLM, EMBEDDING, STT, TTS) - Maintain backward compatibility with generic OPENAI_COMPATIBLE_BASE_URL - Add comprehensive unit tests for all configuration scenarios - Update .env.example with mode-specific environment variables - Update provider support matrix in ai-models.md - Create comprehensive openai-compatible.md setup guide This enables users to configure different OpenAI-compatible endpoints for different AI capabilities (e.g., LM Studio for language models, dedicated server for embeddings) while maintaining full backward compatibility. * upgrade * chore: change docker release strategy --- .dockerignore | 6 + .env.example | 15 +- .github/workflows/build-and-release.yml | 20 +- Makefile | 4 + api/routers/models.py | 48 +- docs/features/ai-models.md | 25 +- docs/features/openai-compatible.md | 568 ++++++++++++++++++++++++ tests/test_models_api.py | 279 ++++++++++++ uv.lock | 6 +- 9 files changed, 943 insertions(+), 28 deletions(-) create mode 100644 docs/features/openai-compatible.md create mode 100644 tests/test_models_api.py diff --git a/.dockerignore b/.dockerignore index ef72e9d..cc482b3 100644 --- a/.dockerignore +++ b/.dockerignore @@ -14,6 +14,8 @@ surreal-data/ notebook_data/ temp/ *.env +.git/ +.github/ # Frontend build artifacts and dependencies frontend/node_modules/ @@ -55,3 +57,7 @@ coverage.xml .Trashes ehthumbs.db Thumbs.db + + +.quarentena/ +surreal_single_data/ \ No newline at end of file diff --git a/.env.example b/.env.example index 276740a..e2300a0 100644 --- a/.env.example +++ b/.env.example @@ -19,6 +19,7 @@ API_URL=http://localhost:5055 # GEMINI # this is the best model for long context and podcast generation # GOOGLE_API_KEY= +# GEMINI_API_BASE_URL= # Optional: Override default endpoint (for Vertex AI, proxies, etc.) # VERTEXAI # VERTEX_PROJECT=my-google-cloud-project-name @@ -57,10 +58,22 @@ API_URL=http://localhost:5055 # VOYAGE AI # VOYAGE_API_KEY= -# OPEN AI COMPATIBLE ENDPOINTS +# OPENAI COMPATIBLE ENDPOINTS +# Generic configuration (applies to all modalities: language, embedding, STT, TTS) # OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1 # OPENAI_COMPATIBLE_API_KEY= +# Mode-specific configuration (overrides generic if set) +# Use these when you want different endpoints for different capabilities +# OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1 +# OPENAI_COMPATIBLE_API_KEY_LLM= +# OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1 +# OPENAI_COMPATIBLE_API_KEY_EMBEDDING= +# OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1 +# OPENAI_COMPATIBLE_API_KEY_STT= +# OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1 +# OPENAI_COMPATIBLE_API_KEY_TTS= + # AZURE OPENAI # AZURE_OPENAI_API_KEY= # AZURE_OPENAI_ENDPOINT= diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml index 79e50f3..dd8694d 100644 --- a/.github/workflows/build-and-release.yml +++ b/.github/workflows/build-and-release.yml @@ -3,19 +3,10 @@ name: Build and Release on: workflow_dispatch: inputs: - build_type: - description: 'Build type to create' - required: true - default: 'both' - type: choice - options: - - both - - regular - - single push_latest: - description: 'Also push latest tags' - required: false - default: true + description: 'Also push v1-latest tags' + required: true + default: false type: boolean release: types: [published] @@ -59,7 +50,6 @@ jobs: build-regular: needs: extract-version runs-on: ubuntu-latest - if: github.event.inputs.build_type == 'regular' || github.event.inputs.build_type == 'both' || github.event_name == 'release' steps: - name: Checkout uses: actions/checkout@v4 @@ -145,7 +135,6 @@ jobs: build-single: needs: extract-version runs-on: ubuntu-latest - if: github.event.inputs.build_type == 'single' || github.event.inputs.build_type == 'both' || github.event_name == 'release' steps: - name: Checkout uses: actions/checkout@v4 @@ -237,8 +226,7 @@ jobs: run: | echo "## Build Summary" >> $GITHUB_STEP_SUMMARY echo "**Version:** ${{ needs.extract-version.outputs.version }}" >> $GITHUB_STEP_SUMMARY - echo "**Build Type:** ${{ github.event.inputs.build_type || 'both' }}" >> $GITHUB_STEP_SUMMARY - echo "**Push Latest:** ${{ github.event.inputs.push_latest || 'true' }}" >> $GITHUB_STEP_SUMMARY + echo "**Push v1-Latest:** ${{ github.event.inputs.push_latest || 'false' }}" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "### Registries:" >> $GITHUB_STEP_SUMMARY echo "✅ **GHCR:** \`${{ env.GHCR_IMAGE }}\`" >> $GITHUB_STEP_SUMMARY diff --git a/Makefile b/Makefile index 0aa631a..7fabe78 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,7 @@ docker-push: docker-buildx-prepare @echo "🔨 Building regular image..." docker buildx build --pull \ --platform $(PLATFORMS) \ + --progress=plain \ -t $(DOCKERHUB_IMAGE):$(VERSION) \ -t $(GHCR_IMAGE):$(VERSION) \ --push \ @@ -58,6 +59,7 @@ docker-push: docker-buildx-prepare @echo "🔨 Building single-container image..." docker buildx build --pull \ --platform $(PLATFORMS) \ + --progress=plain \ -f Dockerfile.single \ -t $(DOCKERHUB_IMAGE):$(VERSION)-single \ -t $(GHCR_IMAGE):$(VERSION)-single \ @@ -77,6 +79,7 @@ docker-push-latest: docker-buildx-prepare @echo "🔨 Building regular image with latest tag..." docker buildx build --pull \ --platform $(PLATFORMS) \ + --progress=plain \ -t $(DOCKERHUB_IMAGE):$(VERSION) \ -t $(DOCKERHUB_IMAGE):v1-latest \ -t $(GHCR_IMAGE):$(VERSION) \ @@ -86,6 +89,7 @@ docker-push-latest: docker-buildx-prepare @echo "🔨 Building single-container image with latest tag..." docker buildx build --pull \ --platform $(PLATFORMS) \ + --progress=plain \ -f Dockerfile.single \ -t $(DOCKERHUB_IMAGE):$(VERSION)-single \ -t $(DOCKERHUB_IMAGE):v1-latest-single \ diff --git a/api/routers/models.py b/api/routers/models.py index 9a5f725..ff2eb28 100644 --- a/api/routers/models.py +++ b/api/routers/models.py @@ -17,6 +17,21 @@ from open_notebook.exceptions import InvalidInputError router = APIRouter() +def _check_openai_compatible_support(mode: str) -> bool: + """ + Check if OpenAI-compatible provider is available for a specific mode. + + Args: + mode: One of 'LLM', 'EMBEDDING', 'STT', 'TTS' + + Returns: + bool: True if either generic or mode-specific env var is set + """ + generic = os.environ.get("OPENAI_COMPATIBLE_BASE_URL") is not None + specific = os.environ.get(f"OPENAI_COMPATIBLE_BASE_URL_{mode}") is not None + return generic or specific + + @router.get("/models", response_model=List[ModelResponse]) async def get_models( type: Optional[str] = Query(None, description="Filter by model type") @@ -191,22 +206,43 @@ async def get_provider_availability(): ), "mistral": os.environ.get("MISTRAL_API_KEY") is not None, "deepseek": os.environ.get("DEEPSEEK_API_KEY") is not None, - "openai-compatible": os.environ.get("OPENAI_COMPATIBLE_BASE_URL") is not None, + "openai-compatible": ( + _check_openai_compatible_support("LLM") + or _check_openai_compatible_support("EMBEDDING") + or _check_openai_compatible_support("STT") + or _check_openai_compatible_support("TTS") + ), } available_providers = [k for k, v in provider_status.items() if v] unavailable_providers = [k for k, v in provider_status.items() if not v] - + # Get supported model types from Esperanto esperanto_available = AIFactory.get_available_providers() - + # Build supported types mapping only for available providers supported_types: dict[str, list[str]] = {} for provider in available_providers: supported_types[provider] = [] - for model_type, providers in esperanto_available.items(): - if provider in providers: - supported_types[provider].append(model_type) + + # Special handling for openai-compatible to check mode-specific availability + if provider == "openai-compatible": + # Map Esperanto model types to our environment variable modes + mode_mapping = { + "language": "LLM", + "embedding": "EMBEDDING", + "speech_to_text": "STT", + "text_to_speech": "TTS", + } + for model_type, mode in mode_mapping.items(): + if model_type in esperanto_available and provider in esperanto_available[model_type]: + if _check_openai_compatible_support(mode): + supported_types[provider].append(model_type) + else: + # Standard provider detection + for model_type, providers in esperanto_available.items(): + if provider in providers: + supported_types[provider].append(model_type) return ProviderAvailabilityResponse( available=available_providers, diff --git a/docs/features/ai-models.md b/docs/features/ai-models.md index 2d60e1a..b0ed875 100644 --- a/docs/features/ai-models.md +++ b/docs/features/ai-models.md @@ -72,7 +72,7 @@ Open Notebook uses four distinct types of AI models, each optimized for specific | **Azure OpenAI** | ✅ | ✅ | ❌ | ❌ | | **OpenRouter** | ✅ | ❌ | ❌ | ❌ | | **Perplexity** | ✅ | ❌ | ❌ | ❌ | -| **OpenAI Compatible** | ✅ | ❌ | ❌ | ❌ | +| **OpenAI Compatible** | ✅ | ✅ | ✅ | ✅ | ## Model Selection Guide @@ -103,6 +103,10 @@ Open Notebook uses four distinct types of AI models, each optimized for specific **Environment Setup** ```bash export GEMINI_API_KEY=your_api_key_here + +# Optional: Override the default Gemini API endpoint +# Use this for Vertex AI, custom proxies, or alternative endpoints +# export GEMINI_API_BASE_URL=https://your-custom-endpoint.com ``` **Recommended Models** @@ -321,22 +325,32 @@ export VOYAGE_API_KEY=your_api_key_here --- ### 🔧 OpenAI Compatible (LM Studio & Others) -**Best for**: Using any OpenAI-compatible API endpoint, including LM Studio +**Best for**: Using any OpenAI-compatible API endpoint for all AI modalities, including LM Studio **Environment Setup** ```bash +# Generic configuration (applies to all modalities) export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1 # Optional - only if your endpoint requires authentication export OPENAI_COMPATIBLE_API_KEY=your_key_here + +# Mode-specific configuration (for different endpoints per modality) +export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1 +export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1 +export OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1 +export OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1 ``` **Common Use Cases** - **LM Studio**: Run models locally with a familiar UI - **Text Generation WebUI**: Alternative local inference +- **vLLM**: High-performance inference server - **Custom Endpoints**: Any OpenAI-compatible API **Strengths** - Use any OpenAI-compatible endpoint +- **NEW**: Full support for all 4 modalities (language, embeddings, STT, TTS) +- Configure different endpoints for different capabilities - Perfect for LM Studio users - Flexibility in model deployment - Works with local and remote endpoints @@ -346,6 +360,8 @@ export OPENAI_COMPATIBLE_API_KEY=your_key_here - Model availability varies by endpoint - Some endpoints may not support all features +> **📖 Need detailed setup help?** Check our comprehensive [OpenAI-Compatible Setup Guide](openai-compatible.md) for LM Studio, Text Generation WebUI, vLLM, and other configurations. + ## 🧠 Reasoning Models Open Notebook fully supports **reasoning models** that show their transparent thinking process. These models output their internal reasoning within `` tags, which Open Notebook automatically handles. @@ -490,6 +506,7 @@ Set up your API keys using environment variables. Here's the complete list: export OPENAI_API_KEY=your_key export ANTHROPIC_API_KEY=your_key export GEMINI_API_KEY=your_key +export GEMINI_API_BASE_URL=https://custom-endpoint.com # Optional # Additional Language Providers export MISTRAL_API_KEY=your_key @@ -569,10 +586,14 @@ export ANTHROPIC_API_KEY=sk-ant-your-key-here #### Google (Gemini) ```bash export GEMINI_API_KEY=your-key-here + +# Optional: Custom API endpoint (for Vertex AI, proxies, etc.) +# export GEMINI_API_BASE_URL=https://your-custom-endpoint.com ``` - Get your API key from [Google AI Studio](https://makersuite.google.com/app/apikey) - Excellent for large context and TTS - Cost-effective option +- Supports custom endpoints via `GEMINI_API_BASE_URL` for advanced deployments #### Ollama (Local) ```bash diff --git a/docs/features/openai-compatible.md b/docs/features/openai-compatible.md new file mode 100644 index 0000000..9bcd974 --- /dev/null +++ b/docs/features/openai-compatible.md @@ -0,0 +1,568 @@ +# OpenAI-Compatible Providers Setup Guide + +Open Notebook supports OpenAI-compatible API endpoints across all AI modalities (language models, embeddings, speech-to-text, and text-to-speech), giving you the flexibility to use popular tools like LM Studio, Text Generation WebUI, vLLM, and custom inference servers. + +## Why Choose OpenAI-Compatible Providers? + +- **🆓 Cost Flexibility**: Use free local inference or choose cost-effective cloud providers +- **🔒 Privacy Control**: Run models locally or choose privacy-focused hosted services +- **🎯 Model Selection**: Access to thousands of open-source models +- **⚡ Performance Tuning**: Optimize inference for your specific hardware +- **🔧 Full Control**: Deploy on your infrastructure with your configurations +- **🌐 Universal Standard**: Works with any service that implements the OpenAI API specification + +## Quick Start + +### Basic Setup (All Modalities) + +**For LM Studio** (simplest): +```bash +# Start LM Studio and enable server mode on port 1234 +export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1 + +# Most LM Studio endpoints don't require an API key +# export OPENAI_COMPATIBLE_API_KEY=not_needed +``` + +**For Text Generation WebUI**: +```bash +# Start with --api flag +# python server.py --api --listen + +export OPENAI_COMPATIBLE_BASE_URL=http://localhost:5000/v1 +``` + +**For vLLM**: +```bash +# Start vLLM server +# vllm serve MODEL_NAME --port 8000 + +export OPENAI_COMPATIBLE_BASE_URL=http://localhost:8000/v1 +``` + +### Advanced Setup (Mode-Specific Endpoints) + +Use different endpoints for different capabilities: + +```bash +# Language models on LM Studio +export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1 + +# Embeddings on a dedicated embedding server +export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1 + +# Speech services on a different server +export OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1 +export OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1 +``` + +## Environment Variable Reference + +### Generic Configuration + +Use these when you want the same endpoint for all modalities: + +| Variable | Purpose | Required | +|----------|---------|----------| +| `OPENAI_COMPATIBLE_BASE_URL` | Base URL for all AI services | Yes (unless using mode-specific) | +| `OPENAI_COMPATIBLE_API_KEY` | API key if endpoint requires auth | Optional | + +**Example:** +```bash +export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1 +export OPENAI_COMPATIBLE_API_KEY=your_key_here # If needed +``` + +### Mode-Specific Configuration + +Use these when you want different endpoints for different capabilities: + +| Variable | Purpose | Modality | +|----------|---------|----------| +| `OPENAI_COMPATIBLE_BASE_URL_LLM` | Language model endpoint | Language models | +| `OPENAI_COMPATIBLE_API_KEY_LLM` | API key for LLM endpoint | Language models | +| `OPENAI_COMPATIBLE_BASE_URL_EMBEDDING` | Embedding model endpoint | Embeddings | +| `OPENAI_COMPATIBLE_API_KEY_EMBEDDING` | API key for embedding endpoint | Embeddings | +| `OPENAI_COMPATIBLE_BASE_URL_STT` | Speech-to-text endpoint | Speech-to-Text | +| `OPENAI_COMPATIBLE_API_KEY_STT` | API key for STT endpoint | Speech-to-Text | +| `OPENAI_COMPATIBLE_BASE_URL_TTS` | Text-to-speech endpoint | Text-to-Speech | +| `OPENAI_COMPATIBLE_API_KEY_TTS` | API key for TTS endpoint | Text-to-Speech | + +**Precedence**: Mode-specific variables override the generic `OPENAI_COMPATIBLE_BASE_URL` + +**Example:** +```bash +# LLM on LM Studio +export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1 + +# Embeddings on dedicated server +export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1 +export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=secret_key_here +``` + +## Common Use Cases + +### LM Studio + +**What is LM Studio?** +LM Studio is a desktop application for running large language models locally with a user-friendly interface. + +**Setup Steps:** +1. **Download and install** LM Studio from [lmstudio.ai](https://lmstudio.ai/) +2. **Download a model** (e.g., Llama 3, Qwen, Mistral) +3. **Start the local server**: + - Go to the "Local Server" tab + - Click "Start Server" + - Note the port (default: 1234) + +4. **Configure Open Notebook**: +```bash +export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1 +``` + +**What works:** +- ✅ Language models (chat, completions) +- ✅ Embeddings (with embedding models) +- ❌ Speech-to-text (not supported) +- ❌ Text-to-speech (not supported) + +**Tips:** +- LM Studio doesn't require an API key +- Choose quantized models (Q4, Q5) for better performance +- Monitor RAM usage - larger models need more memory + +--- + +### Text Generation WebUI (Oobabooga) + +**What is Text Generation WebUI?** +A powerful Gradio-based web interface for running Large Language Models. + +**Setup Steps:** +1. **Install** following [official instructions](https://github.com/oobabooga/text-generation-webui) +2. **Download a model** using the UI or manually +3. **Start with API mode**: +```bash +python server.py --api --listen +``` + +4. **Configure Open Notebook**: +```bash +export OPENAI_COMPATIBLE_BASE_URL=http://localhost:5000/v1 +``` + +**What works:** +- ✅ Language models (excellent support) +- ✅ Embeddings (with compatible models) +- ❌ Speech services (not supported) + +**Tips:** +- Use `--listen` to accept connections from Docker +- Supports more model formats than LM Studio +- Great for fine-tuned models + +--- + +### vLLM + +**What is vLLM?** +High-performance inference server optimized for serving large language models at scale. + +**Setup Steps:** +1. **Install vLLM**: +```bash +pip install vllm +``` + +2. **Start the server**: +```bash +vllm serve meta-llama/Llama-3-8B-Instruct --port 8000 +``` + +3. **Configure Open Notebook**: +```bash +export OPENAI_COMPATIBLE_BASE_URL=http://localhost:8000/v1 +``` + +**What works:** +- ✅ Language models (optimized inference) +- ✅ Embeddings (with embedding models) +- ❌ Speech services (not supported) + +**Tips:** +- Best performance for production deployments +- Supports tensor parallelism for large models +- Excellent for high-throughput scenarios + +--- + +### Custom OpenAI-Compatible Services + +Many services implement the OpenAI API specification: + +**Examples:** +- **Together AI**: Cloud-hosted models +- **Anyscale Endpoints**: Ray-based inference +- **Replicate**: Cloud model hosting +- **LocalAI**: Self-hosted alternative to OpenAI +- **FastChat**: Multi-model serving + +**Configuration:** +```bash +# Generic setup +export OPENAI_COMPATIBLE_BASE_URL=https://api.your-service.com/v1 +export OPENAI_COMPATIBLE_API_KEY=your_api_key_here +``` + +## Configuration Scenarios + +### Scenario 1: Single Local Endpoint (Simplest) + +**Use Case**: Running LM Studio for language models only + +```bash +export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1 +``` + +**Result**: +- ✅ Language models available +- ✅ Embeddings available (if model supports) +- ✅ Speech services available (if endpoint supports) +- All use the same endpoint + +--- + +### Scenario 2: Separate Endpoints per Modality + +**Use Case**: Language models on LM Studio, embeddings on dedicated server + +```bash +# Language models on LM Studio +export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1 + +# Embeddings on specialized server +export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1 +export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=embedding_key_here +``` + +**Result**: +- ✅ Language models use LM Studio (port 1234) +- ✅ Embeddings use specialized server (port 8080) +- ❌ Speech services not available (not configured) + +--- + +### Scenario 3: Mixed Local and Cloud + +**Use Case**: Local models for privacy, cloud for specialized tasks + +```bash +# Local LLM (privacy-sensitive work) +export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1 + +# Cloud embeddings (better quality) +export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=https://api.cloud-provider.com/v1 +export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=cloud_key_here + +# Cloud speech services +export OPENAI_COMPATIBLE_BASE_URL_TTS=https://api.cloud-provider.com/v1 +export OPENAI_COMPATIBLE_API_KEY_TTS=cloud_key_here +``` + +**Result**: +- ✅ Sensitive chat stays local +- ✅ High-quality embeddings from cloud +- ✅ Professional TTS from cloud +- 🔒 Privacy for conversations, cloud for non-sensitive features + +--- + +### Scenario 4: Docker Deployment + +**Use Case**: Open Notebook in Docker, LM Studio on host machine + +**On macOS/Windows**: +```bash +export OPENAI_COMPATIBLE_BASE_URL=http://host.docker.internal:1234/v1 +``` + +**On Linux**: +```bash +# Use host networking or find host IP +export OPENAI_COMPATIBLE_BASE_URL=http://172.17.0.1:1234/v1 +# or use --network host in docker run +``` + +**Important**: +- LM Studio must be set to listen on `0.0.0.0`, not just `localhost` +- In LM Studio settings, enable "Allow network connections" + +## Network Configuration + +### Docker Networking + +**Problem**: Docker containers can't reach `localhost` on the host + +**Solutions:** + +**Option 1: Use `host.docker.internal` (Mac/Windows)** +```bash +export OPENAI_COMPATIBLE_BASE_URL=http://host.docker.internal:1234/v1 +``` + +**Option 2: Use host IP address (Linux)** +```bash +# Find host IP +ip addr show docker0 | grep inet + +# Use in environment +export OPENAI_COMPATIBLE_BASE_URL=http://172.17.0.1:1234/v1 +``` + +**Option 3: Host networking (Linux only)** +```bash +docker run --network host \ + -v ./notebook_data:/app/data \ + -e OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1 \ + lfnovo/open_notebook:v1-latest-single +``` + +### Remote Servers + +**Use Case**: OpenAI-compatible service on a different machine + +```bash +# Replace with your server's IP or hostname +export OPENAI_COMPATIBLE_BASE_URL=http://192.168.1.100:1234/v1 +``` + +**Security Notes:** +- ⚠️ Only use on trusted networks +- Consider using HTTPS for production +- Implement API key authentication if possible +- Use firewall rules to restrict access + +### Port Conflicts + +**Problem**: Default port (1234) is already in use + +**Solution**: Change the port in your inference server + +**LM Studio:** +- Settings → Local Server → Port → Change to different port + +**Then update environment:** +```bash +export OPENAI_COMPATIBLE_BASE_URL=http://localhost:8888/v1 +``` + +## Troubleshooting + +### Connection Refused + +**Symptom**: "Connection refused" or "Could not connect to endpoint" + +**Solutions:** +1. **Verify server is running**: + ```bash + curl http://localhost:1234/v1/models + ``` + +2. **Check firewall settings**: Ensure the port is not blocked + +3. **For Docker**: Use `host.docker.internal` instead of `localhost` + +4. **Check server binding**: Server must listen on `0.0.0.0`, not just `127.0.0.1` + +--- + +### Models Not Found + +**Symptom**: "Model not found" or "No models available" + +**Solutions:** +1. **Verify model is loaded** in your inference server +2. **Check model name** matches what Open Notebook expects +3. **For LM Studio**: Ensure model is loaded in the local server tab +4. **Test endpoint**: + ```bash + curl http://localhost:1234/v1/models + ``` + +--- + +### Slow Performance + +**Symptom**: Responses take a long time + +**Solutions:** +1. **Use quantized models** (Q4, Q5 instead of full precision) +2. **Check RAM usage**: Model might be swapping to disk +3. **Reduce context length**: Smaller context = faster inference +4. **Enable GPU acceleration**: If available +5. **For vLLM**: Enable tensor parallelism for large models + +--- + +### Authentication Errors + +**Symptom**: "Unauthorized" or "Invalid API key" + +**Solutions:** +1. **Set API key** if your endpoint requires it: + ```bash + export OPENAI_COMPATIBLE_API_KEY=your_key_here + ``` + +2. **Check key validity**: Test with curl: + ```bash + curl -H "Authorization: Bearer YOUR_KEY" \ + http://localhost:1234/v1/models + ``` + +3. **For mode-specific**: Use the correct key variable: + ```bash + export OPENAI_COMPATIBLE_API_KEY_LLM=llm_key + export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=embedding_key + ``` + +--- + +### Docker Can't Reach Host + +**Symptom**: Connection works locally but not from Docker + +**Solutions:** +1. **Use `host.docker.internal`** (Mac/Windows): + ```bash + export OPENAI_COMPATIBLE_BASE_URL=http://host.docker.internal:1234/v1 + ``` + +2. **On Linux**: Use host IP or `--network host` + +3. **Check server listening**: Must listen on `0.0.0.0:1234`, not `127.0.0.1:1234` + +4. **Test from inside container**: + ```bash + docker exec -it open-notebook curl http://host.docker.internal:1234/v1/models + ``` + +--- + +### Embeddings Not Working + +**Symptom**: Search or embeddings fail + +**Solutions:** +1. **Verify embedding model is loaded**: Many inference servers need explicit embedding model setup +2. **Use dedicated embedding endpoint**: If available +3. **Check model compatibility**: Not all models support embeddings +4. **For LM Studio**: Load an embedding model separately + +--- + +### Mixed Results (Some Modes Work, Others Don't) + +**Symptom**: Language models work, but embeddings or speech don't + +**Solution**: Use mode-specific configuration: +```bash +# What works +export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1 + +# For embeddings, use a different provider +export OPENAI_API_KEY=your_openai_key # Fallback to OpenAI for embeddings +``` + +## Best Practices + +### Security + +1. **API Keys**: + - Use environment variables, never hardcode + - Rotate keys regularly for cloud services + - Use different keys for different services + +2. **Network**: + - Only expose on trusted networks + - Use HTTPS in production + - Implement firewall rules + +3. **Data Privacy**: + - Use local models for sensitive data + - Check service privacy policies + - Understand data retention policies + +### Performance + +1. **Model Selection**: + - Quantized models (Q4, Q5) for better speed/memory trade-off + - Smaller models for simple tasks + - Larger models only when needed + +2. **Resource Management**: + - Monitor RAM and GPU usage + - Use appropriate batch sizes + - Consider model caching strategies + +3. **Network**: + - Use local endpoints when possible for lower latency + - For cloud: Choose geographically close servers + +### Reliability + +1. **Fallback Strategy**: + ```bash + # Primary: Local LLM + export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1 + + # Fallback: Use OpenAI if local is unavailable + export OPENAI_API_KEY=your_backup_key + ``` + +2. **Health Checks**: + - Periodically test endpoints + - Monitor server status + - Set up alerts for downtime + +3. **Testing**: + - Test configuration before production + - Validate all required modalities work + - Check error handling + +## Getting Help + +**Community Resources:** +- [Open Notebook Discord](https://discord.gg/37XJPXfz2w) - Get help with Open Notebook integration +- [LM Studio Discord](https://discord.gg/lmstudio) - LM Studio-specific support +- [Text Generation WebUI GitHub](https://github.com/oobabooga/text-generation-webui) - Issues and discussions + +**Debugging Steps:** +1. **Test endpoint directly** with curl before configuring Open Notebook +2. **Check Open Notebook logs** for detailed error messages +3. **Verify environment variables** are set correctly +4. **Test with simple requests** first (list models, simple completion) + +**Common curl tests:** +```bash +# List models +curl http://localhost:1234/v1/models + +# Test completion +curl http://localhost:1234/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "your-model", + "messages": [{"role": "user", "content": "Hello!"}] + }' + +# Test embeddings +curl http://localhost:8080/v1/embeddings \ + -H "Content-Type: application/json" \ + -d '{ + "model": "embedding-model", + "input": "Test text" + }' +``` + +This guide should help you successfully configure OpenAI-compatible providers with Open Notebook. For general AI model configuration, see the [AI Models Guide](ai-models.md). \ No newline at end of file diff --git a/tests/test_models_api.py b/tests/test_models_api.py new file mode 100644 index 0000000..91ac3e7 --- /dev/null +++ b/tests/test_models_api.py @@ -0,0 +1,279 @@ +from unittest.mock import patch + +import pytest +from fastapi.testclient import TestClient + +from api.main import app + +client = TestClient(app) + + +class TestModelsProviderAvailability: + """Test suite for Models Provider Availability endpoint.""" + + @patch("api.routers.models.os.environ.get") + @patch("api.routers.models.AIFactory.get_available_providers") + def test_generic_env_var_enables_all_modes(self, mock_esperanto, mock_env): + """Test that OPENAI_COMPATIBLE_BASE_URL enables all 4 modes.""" + + # Mock environment: only generic var is set + def env_side_effect(key): + if key == "OPENAI_COMPATIBLE_BASE_URL": + return "http://localhost:1234/v1" + return None + + mock_env.side_effect = env_side_effect + + # Mock Esperanto response + mock_esperanto.return_value = { + "language": ["openai-compatible"], + "embedding": ["openai-compatible"], + "speech_to_text": ["openai-compatible"], + "text_to_speech": ["openai-compatible"], + } + + response = client.get("/api/models/providers") + + assert response.status_code == 200 + data = response.json() + + # openai-compatible should be available + assert "openai-compatible" in data["available"] + + # Should support all 4 types + assert "openai-compatible" in data["supported_types"] + supported = data["supported_types"]["openai-compatible"] + assert "language" in supported + assert "embedding" in supported + assert "speech_to_text" in supported + assert "text_to_speech" in supported + assert len(supported) == 4 + + @patch("api.routers.models.os.environ.get") + @patch("api.routers.models.AIFactory.get_available_providers") + def test_mode_specific_env_vars_llm_embedding(self, mock_esperanto, mock_env): + """Test mode-specific env vars (LLM + EMBEDDING) enable only those 2 modes.""" + + # Mock environment: only LLM and EMBEDDING specific vars are set + def env_side_effect(key): + if key == "OPENAI_COMPATIBLE_BASE_URL_LLM": + return "http://localhost:1234/v1" + if key == "OPENAI_COMPATIBLE_BASE_URL_EMBEDDING": + return "http://localhost:8080/v1" + return None + + mock_env.side_effect = env_side_effect + + # Mock Esperanto response + mock_esperanto.return_value = { + "language": ["openai-compatible"], + "embedding": ["openai-compatible"], + "speech_to_text": ["openai-compatible"], + "text_to_speech": ["openai-compatible"], + } + + response = client.get("/api/models/providers") + + assert response.status_code == 200 + data = response.json() + + # openai-compatible should be available + assert "openai-compatible" in data["available"] + + # Should support only language and embedding + assert "openai-compatible" in data["supported_types"] + supported = data["supported_types"]["openai-compatible"] + assert "language" in supported + assert "embedding" in supported + assert "speech_to_text" not in supported + assert "text_to_speech" not in supported + assert len(supported) == 2 + + @patch("api.routers.models.os.environ.get") + @patch("api.routers.models.AIFactory.get_available_providers") + def test_no_env_vars_set(self, mock_esperanto, mock_env): + """Test that openai-compatible is not available when no env vars are set.""" + + # Mock environment: no openai-compatible vars are set + def env_side_effect(key): + return None + + mock_env.side_effect = env_side_effect + + # Mock Esperanto response + mock_esperanto.return_value = { + "language": ["openai-compatible"], + "embedding": ["openai-compatible"], + } + + response = client.get("/api/models/providers") + + assert response.status_code == 200 + data = response.json() + + # openai-compatible should NOT be available + assert "openai-compatible" not in data["available"] + assert "openai-compatible" in data["unavailable"] + + # Should not have supported_types entry + assert "openai-compatible" not in data["supported_types"] + + @patch("api.routers.models.os.environ.get") + @patch("api.routers.models.AIFactory.get_available_providers") + def test_mixed_config_generic_and_mode_specific(self, mock_esperanto, mock_env): + """Test mixed config: generic + mode-specific (generic should enable all).""" + + # Mock environment: both generic and mode-specific vars are set + def env_side_effect(key): + if key == "OPENAI_COMPATIBLE_BASE_URL": + return "http://localhost:1234/v1" + if key == "OPENAI_COMPATIBLE_BASE_URL_LLM": + return "http://localhost:5678/v1" + return None + + mock_env.side_effect = env_side_effect + + # Mock Esperanto response + mock_esperanto.return_value = { + "language": ["openai-compatible"], + "embedding": ["openai-compatible"], + "speech_to_text": ["openai-compatible"], + "text_to_speech": ["openai-compatible"], + } + + response = client.get("/api/models/providers") + + assert response.status_code == 200 + data = response.json() + + # openai-compatible should be available + assert "openai-compatible" in data["available"] + + # Generic var enables all, so all 4 should be supported + assert "openai-compatible" in data["supported_types"] + supported = data["supported_types"]["openai-compatible"] + assert "language" in supported + assert "embedding" in supported + assert "speech_to_text" in supported + assert "text_to_speech" in supported + assert len(supported) == 4 + + @patch("api.routers.models.os.environ.get") + @patch("api.routers.models.AIFactory.get_available_providers") + def test_individual_mode_llm_only(self, mock_esperanto, mock_env): + """Test individual mode-specific var (LLM only).""" + + # Mock environment: only LLM specific var is set + def env_side_effect(key): + if key == "OPENAI_COMPATIBLE_BASE_URL_LLM": + return "http://localhost:1234/v1" + return None + + mock_env.side_effect = env_side_effect + + # Mock Esperanto response + mock_esperanto.return_value = { + "language": ["openai-compatible"], + "embedding": ["openai-compatible"], + "speech_to_text": ["openai-compatible"], + "text_to_speech": ["openai-compatible"], + } + + response = client.get("/api/models/providers") + + assert response.status_code == 200 + data = response.json() + + # Should support only language + supported = data["supported_types"]["openai-compatible"] + assert supported == ["language"] + + @patch("api.routers.models.os.environ.get") + @patch("api.routers.models.AIFactory.get_available_providers") + def test_individual_mode_embedding_only(self, mock_esperanto, mock_env): + """Test individual mode-specific var (EMBEDDING only).""" + + # Mock environment: only EMBEDDING specific var is set + def env_side_effect(key): + if key == "OPENAI_COMPATIBLE_BASE_URL_EMBEDDING": + return "http://localhost:8080/v1" + return None + + mock_env.side_effect = env_side_effect + + # Mock Esperanto response + mock_esperanto.return_value = { + "language": ["openai-compatible"], + "embedding": ["openai-compatible"], + "speech_to_text": ["openai-compatible"], + "text_to_speech": ["openai-compatible"], + } + + response = client.get("/api/models/providers") + + assert response.status_code == 200 + data = response.json() + + # Should support only embedding + supported = data["supported_types"]["openai-compatible"] + assert supported == ["embedding"] + + @patch("api.routers.models.os.environ.get") + @patch("api.routers.models.AIFactory.get_available_providers") + def test_individual_mode_stt_only(self, mock_esperanto, mock_env): + """Test individual mode-specific var (STT only).""" + + # Mock environment: only STT specific var is set + def env_side_effect(key): + if key == "OPENAI_COMPATIBLE_BASE_URL_STT": + return "http://localhost:9000/v1" + return None + + mock_env.side_effect = env_side_effect + + # Mock Esperanto response + mock_esperanto.return_value = { + "language": ["openai-compatible"], + "embedding": ["openai-compatible"], + "speech_to_text": ["openai-compatible"], + "text_to_speech": ["openai-compatible"], + } + + response = client.get("/api/models/providers") + + assert response.status_code == 200 + data = response.json() + + # Should support only speech_to_text + supported = data["supported_types"]["openai-compatible"] + assert supported == ["speech_to_text"] + + @patch("api.routers.models.os.environ.get") + @patch("api.routers.models.AIFactory.get_available_providers") + def test_individual_mode_tts_only(self, mock_esperanto, mock_env): + """Test individual mode-specific var (TTS only).""" + + # Mock environment: only TTS specific var is set + def env_side_effect(key): + if key == "OPENAI_COMPATIBLE_BASE_URL_TTS": + return "http://localhost:9000/v1" + return None + + mock_env.side_effect = env_side_effect + + # Mock Esperanto response + mock_esperanto.return_value = { + "language": ["openai-compatible"], + "embedding": ["openai-compatible"], + "speech_to_text": ["openai-compatible"], + "text_to_speech": ["openai-compatible"], + } + + response = client.get("/api/models/providers") + + assert response.status_code == 200 + data = response.json() + + # Should support only text_to_speech + supported = data["supported_types"]["openai-compatible"] + assert supported == ["text_to_speech"] diff --git a/uv.lock b/uv.lock index e5a0971..40956ff 100644 --- a/uv.lock +++ b/uv.lock @@ -620,15 +620,15 @@ wheels = [ [[package]] name = "esperanto" -version = "2.6.0" +version = "2.7.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ec/a6/088c04b8df5c89d09620869647271ef480a855734d7b17f78fcdb7f183d2/esperanto-2.6.0.tar.gz", hash = "sha256:49ae83650812ddf32e8a5b54229b5bb8393b8a0b866c77ae7e264e2adc9231a7", size = 535743, upload-time = "2025-09-26T21:51:52.844Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/cf/0da02a603a63b3850abd14d23629f101942db5c18840b0cc6f34d7db9a04/esperanto-2.7.0.tar.gz", hash = "sha256:3861e4e20697813b19f0070a1142934bd6792077c3c174a2c3dd4b6ca0676b06", size = 553433, upload-time = "2025-10-19T02:04:30.21Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/04/d6/32b84cbeac1234f2e77c920b6d07e574ee6bfa3675797bb8bd76f36e7e0f/esperanto-2.6.0-py3-none-any.whl", hash = "sha256:063108274966e8e9bc19b844740ddf7646dd4bc5f6b1b7c586cac37947ffeab0", size = 129234, upload-time = "2025-09-26T21:51:51.159Z" }, + { url = "https://files.pythonhosted.org/packages/14/9c/79827f246965ed66ae8d2f3e3937e552730eaf48b270dac852a4756c7bf4/esperanto-2.7.0-py3-none-any.whl", hash = "sha256:2ea3fa98d8622d08a18dc6701ad362461de02492a3252326c70c969b3aba3db6", size = 129524, upload-time = "2025-10-19T02:04:28.57Z" }, ] [[package]]