Merge branch 'main' of github.com:lfnovo/open-notebook
This commit is contained in:
commit
2fa2956c4c
9 changed files with 943 additions and 28 deletions
|
|
@ -14,6 +14,8 @@ surreal-data/
|
|||
notebook_data/
|
||||
temp/
|
||||
*.env
|
||||
.git/
|
||||
.github/
|
||||
|
||||
# Frontend build artifacts and dependencies
|
||||
frontend/node_modules/
|
||||
|
|
@ -55,3 +57,7 @@ coverage.xml
|
|||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
|
||||
.quarentena/
|
||||
surreal_single_data/
|
||||
15
.env.example
15
.env.example
|
|
@ -19,6 +19,7 @@ API_URL=http://localhost:5055
|
|||
# GEMINI
|
||||
# this is the best model for long context and podcast generation
|
||||
# GOOGLE_API_KEY=
|
||||
# GEMINI_API_BASE_URL= # Optional: Override default endpoint (for Vertex AI, proxies, etc.)
|
||||
|
||||
# VERTEXAI
|
||||
# VERTEX_PROJECT=my-google-cloud-project-name
|
||||
|
|
@ -57,10 +58,22 @@ API_URL=http://localhost:5055
|
|||
# VOYAGE AI
|
||||
# VOYAGE_API_KEY=
|
||||
|
||||
# OPEN AI COMPATIBLE ENDPOINTS
|
||||
# OPENAI COMPATIBLE ENDPOINTS
|
||||
# Generic configuration (applies to all modalities: language, embedding, STT, TTS)
|
||||
# OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
|
||||
# OPENAI_COMPATIBLE_API_KEY=
|
||||
|
||||
# Mode-specific configuration (overrides generic if set)
|
||||
# Use these when you want different endpoints for different capabilities
|
||||
# OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
|
||||
# OPENAI_COMPATIBLE_API_KEY_LLM=
|
||||
# OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
|
||||
# OPENAI_COMPATIBLE_API_KEY_EMBEDDING=
|
||||
# OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1
|
||||
# OPENAI_COMPATIBLE_API_KEY_STT=
|
||||
# OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1
|
||||
# OPENAI_COMPATIBLE_API_KEY_TTS=
|
||||
|
||||
# AZURE OPENAI
|
||||
# AZURE_OPENAI_API_KEY=
|
||||
# AZURE_OPENAI_ENDPOINT=
|
||||
|
|
|
|||
20
.github/workflows/build-and-release.yml
vendored
20
.github/workflows/build-and-release.yml
vendored
|
|
@ -3,19 +3,10 @@ name: Build and Release
|
|||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
build_type:
|
||||
description: 'Build type to create'
|
||||
required: true
|
||||
default: 'both'
|
||||
type: choice
|
||||
options:
|
||||
- both
|
||||
- regular
|
||||
- single
|
||||
push_latest:
|
||||
description: 'Also push latest tags'
|
||||
required: false
|
||||
default: true
|
||||
description: 'Also push v1-latest tags'
|
||||
required: true
|
||||
default: false
|
||||
type: boolean
|
||||
release:
|
||||
types: [published]
|
||||
|
|
@ -59,7 +50,6 @@ jobs:
|
|||
build-regular:
|
||||
needs: extract-version
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.inputs.build_type == 'regular' || github.event.inputs.build_type == 'both' || github.event_name == 'release'
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
|
@ -145,7 +135,6 @@ jobs:
|
|||
build-single:
|
||||
needs: extract-version
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.inputs.build_type == 'single' || github.event.inputs.build_type == 'both' || github.event_name == 'release'
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
|
@ -237,8 +226,7 @@ jobs:
|
|||
run: |
|
||||
echo "## Build Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Version:** ${{ needs.extract-version.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Build Type:** ${{ github.event.inputs.build_type || 'both' }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Push Latest:** ${{ github.event.inputs.push_latest || 'true' }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Push v1-Latest:** ${{ github.event.inputs.push_latest || 'false' }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Registries:" >> $GITHUB_STEP_SUMMARY
|
||||
echo "✅ **GHCR:** \`${{ env.GHCR_IMAGE }}\`" >> $GITHUB_STEP_SUMMARY
|
||||
|
|
|
|||
4
Makefile
4
Makefile
|
|
@ -51,6 +51,7 @@ docker-push: docker-buildx-prepare
|
|||
@echo "🔨 Building regular image..."
|
||||
docker buildx build --pull \
|
||||
--platform $(PLATFORMS) \
|
||||
--progress=plain \
|
||||
-t $(DOCKERHUB_IMAGE):$(VERSION) \
|
||||
-t $(GHCR_IMAGE):$(VERSION) \
|
||||
--push \
|
||||
|
|
@ -58,6 +59,7 @@ docker-push: docker-buildx-prepare
|
|||
@echo "🔨 Building single-container image..."
|
||||
docker buildx build --pull \
|
||||
--platform $(PLATFORMS) \
|
||||
--progress=plain \
|
||||
-f Dockerfile.single \
|
||||
-t $(DOCKERHUB_IMAGE):$(VERSION)-single \
|
||||
-t $(GHCR_IMAGE):$(VERSION)-single \
|
||||
|
|
@ -77,6 +79,7 @@ docker-push-latest: docker-buildx-prepare
|
|||
@echo "🔨 Building regular image with latest tag..."
|
||||
docker buildx build --pull \
|
||||
--platform $(PLATFORMS) \
|
||||
--progress=plain \
|
||||
-t $(DOCKERHUB_IMAGE):$(VERSION) \
|
||||
-t $(DOCKERHUB_IMAGE):v1-latest \
|
||||
-t $(GHCR_IMAGE):$(VERSION) \
|
||||
|
|
@ -86,6 +89,7 @@ docker-push-latest: docker-buildx-prepare
|
|||
@echo "🔨 Building single-container image with latest tag..."
|
||||
docker buildx build --pull \
|
||||
--platform $(PLATFORMS) \
|
||||
--progress=plain \
|
||||
-f Dockerfile.single \
|
||||
-t $(DOCKERHUB_IMAGE):$(VERSION)-single \
|
||||
-t $(DOCKERHUB_IMAGE):v1-latest-single \
|
||||
|
|
|
|||
|
|
@ -17,6 +17,21 @@ from open_notebook.exceptions import InvalidInputError
|
|||
router = APIRouter()
|
||||
|
||||
|
||||
def _check_openai_compatible_support(mode: str) -> bool:
|
||||
"""
|
||||
Check if OpenAI-compatible provider is available for a specific mode.
|
||||
|
||||
Args:
|
||||
mode: One of 'LLM', 'EMBEDDING', 'STT', 'TTS'
|
||||
|
||||
Returns:
|
||||
bool: True if either generic or mode-specific env var is set
|
||||
"""
|
||||
generic = os.environ.get("OPENAI_COMPATIBLE_BASE_URL") is not None
|
||||
specific = os.environ.get(f"OPENAI_COMPATIBLE_BASE_URL_{mode}") is not None
|
||||
return generic or specific
|
||||
|
||||
|
||||
@router.get("/models", response_model=List[ModelResponse])
|
||||
async def get_models(
|
||||
type: Optional[str] = Query(None, description="Filter by model type")
|
||||
|
|
@ -191,22 +206,43 @@ async def get_provider_availability():
|
|||
),
|
||||
"mistral": os.environ.get("MISTRAL_API_KEY") is not None,
|
||||
"deepseek": os.environ.get("DEEPSEEK_API_KEY") is not None,
|
||||
"openai-compatible": os.environ.get("OPENAI_COMPATIBLE_BASE_URL") is not None,
|
||||
"openai-compatible": (
|
||||
_check_openai_compatible_support("LLM")
|
||||
or _check_openai_compatible_support("EMBEDDING")
|
||||
or _check_openai_compatible_support("STT")
|
||||
or _check_openai_compatible_support("TTS")
|
||||
),
|
||||
}
|
||||
|
||||
available_providers = [k for k, v in provider_status.items() if v]
|
||||
unavailable_providers = [k for k, v in provider_status.items() if not v]
|
||||
|
||||
|
||||
# Get supported model types from Esperanto
|
||||
esperanto_available = AIFactory.get_available_providers()
|
||||
|
||||
|
||||
# Build supported types mapping only for available providers
|
||||
supported_types: dict[str, list[str]] = {}
|
||||
for provider in available_providers:
|
||||
supported_types[provider] = []
|
||||
for model_type, providers in esperanto_available.items():
|
||||
if provider in providers:
|
||||
supported_types[provider].append(model_type)
|
||||
|
||||
# Special handling for openai-compatible to check mode-specific availability
|
||||
if provider == "openai-compatible":
|
||||
# Map Esperanto model types to our environment variable modes
|
||||
mode_mapping = {
|
||||
"language": "LLM",
|
||||
"embedding": "EMBEDDING",
|
||||
"speech_to_text": "STT",
|
||||
"text_to_speech": "TTS",
|
||||
}
|
||||
for model_type, mode in mode_mapping.items():
|
||||
if model_type in esperanto_available and provider in esperanto_available[model_type]:
|
||||
if _check_openai_compatible_support(mode):
|
||||
supported_types[provider].append(model_type)
|
||||
else:
|
||||
# Standard provider detection
|
||||
for model_type, providers in esperanto_available.items():
|
||||
if provider in providers:
|
||||
supported_types[provider].append(model_type)
|
||||
|
||||
return ProviderAvailabilityResponse(
|
||||
available=available_providers,
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ Open Notebook uses four distinct types of AI models, each optimized for specific
|
|||
| **Azure OpenAI** | ✅ | ✅ | ❌ | ❌ |
|
||||
| **OpenRouter** | ✅ | ❌ | ❌ | ❌ |
|
||||
| **Perplexity** | ✅ | ❌ | ❌ | ❌ |
|
||||
| **OpenAI Compatible** | ✅ | ❌ | ❌ | ❌ |
|
||||
| **OpenAI Compatible** | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
## Model Selection Guide
|
||||
|
||||
|
|
@ -103,6 +103,10 @@ Open Notebook uses four distinct types of AI models, each optimized for specific
|
|||
**Environment Setup**
|
||||
```bash
|
||||
export GEMINI_API_KEY=your_api_key_here
|
||||
|
||||
# Optional: Override the default Gemini API endpoint
|
||||
# Use this for Vertex AI, custom proxies, or alternative endpoints
|
||||
# export GEMINI_API_BASE_URL=https://your-custom-endpoint.com
|
||||
```
|
||||
|
||||
**Recommended Models**
|
||||
|
|
@ -321,22 +325,32 @@ export VOYAGE_API_KEY=your_api_key_here
|
|||
---
|
||||
|
||||
### 🔧 OpenAI Compatible (LM Studio & Others)
|
||||
**Best for**: Using any OpenAI-compatible API endpoint, including LM Studio
|
||||
**Best for**: Using any OpenAI-compatible API endpoint for all AI modalities, including LM Studio
|
||||
|
||||
**Environment Setup**
|
||||
```bash
|
||||
# Generic configuration (applies to all modalities)
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
|
||||
# Optional - only if your endpoint requires authentication
|
||||
export OPENAI_COMPATIBLE_API_KEY=your_key_here
|
||||
|
||||
# Mode-specific configuration (for different endpoints per modality)
|
||||
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
|
||||
export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
|
||||
export OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1
|
||||
export OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1
|
||||
```
|
||||
|
||||
**Common Use Cases**
|
||||
- **LM Studio**: Run models locally with a familiar UI
|
||||
- **Text Generation WebUI**: Alternative local inference
|
||||
- **vLLM**: High-performance inference server
|
||||
- **Custom Endpoints**: Any OpenAI-compatible API
|
||||
|
||||
**Strengths**
|
||||
- Use any OpenAI-compatible endpoint
|
||||
- **NEW**: Full support for all 4 modalities (language, embeddings, STT, TTS)
|
||||
- Configure different endpoints for different capabilities
|
||||
- Perfect for LM Studio users
|
||||
- Flexibility in model deployment
|
||||
- Works with local and remote endpoints
|
||||
|
|
@ -346,6 +360,8 @@ export OPENAI_COMPATIBLE_API_KEY=your_key_here
|
|||
- Model availability varies by endpoint
|
||||
- Some endpoints may not support all features
|
||||
|
||||
> **📖 Need detailed setup help?** Check our comprehensive [OpenAI-Compatible Setup Guide](openai-compatible.md) for LM Studio, Text Generation WebUI, vLLM, and other configurations.
|
||||
|
||||
## 🧠 Reasoning Models
|
||||
|
||||
Open Notebook fully supports **reasoning models** that show their transparent thinking process. These models output their internal reasoning within `<think>` tags, which Open Notebook automatically handles.
|
||||
|
|
@ -490,6 +506,7 @@ Set up your API keys using environment variables. Here's the complete list:
|
|||
export OPENAI_API_KEY=your_key
|
||||
export ANTHROPIC_API_KEY=your_key
|
||||
export GEMINI_API_KEY=your_key
|
||||
export GEMINI_API_BASE_URL=https://custom-endpoint.com # Optional
|
||||
|
||||
# Additional Language Providers
|
||||
export MISTRAL_API_KEY=your_key
|
||||
|
|
@ -569,10 +586,14 @@ export ANTHROPIC_API_KEY=sk-ant-your-key-here
|
|||
#### Google (Gemini)
|
||||
```bash
|
||||
export GEMINI_API_KEY=your-key-here
|
||||
|
||||
# Optional: Custom API endpoint (for Vertex AI, proxies, etc.)
|
||||
# export GEMINI_API_BASE_URL=https://your-custom-endpoint.com
|
||||
```
|
||||
- Get your API key from [Google AI Studio](https://makersuite.google.com/app/apikey)
|
||||
- Excellent for large context and TTS
|
||||
- Cost-effective option
|
||||
- Supports custom endpoints via `GEMINI_API_BASE_URL` for advanced deployments
|
||||
|
||||
#### Ollama (Local)
|
||||
```bash
|
||||
|
|
|
|||
568
docs/features/openai-compatible.md
Normal file
568
docs/features/openai-compatible.md
Normal file
|
|
@ -0,0 +1,568 @@
|
|||
# OpenAI-Compatible Providers Setup Guide
|
||||
|
||||
Open Notebook supports OpenAI-compatible API endpoints across all AI modalities (language models, embeddings, speech-to-text, and text-to-speech), giving you the flexibility to use popular tools like LM Studio, Text Generation WebUI, vLLM, and custom inference servers.
|
||||
|
||||
## Why Choose OpenAI-Compatible Providers?
|
||||
|
||||
- **🆓 Cost Flexibility**: Use free local inference or choose cost-effective cloud providers
|
||||
- **🔒 Privacy Control**: Run models locally or choose privacy-focused hosted services
|
||||
- **🎯 Model Selection**: Access to thousands of open-source models
|
||||
- **⚡ Performance Tuning**: Optimize inference for your specific hardware
|
||||
- **🔧 Full Control**: Deploy on your infrastructure with your configurations
|
||||
- **🌐 Universal Standard**: Works with any service that implements the OpenAI API specification
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Basic Setup (All Modalities)
|
||||
|
||||
**For LM Studio** (simplest):
|
||||
```bash
|
||||
# Start LM Studio and enable server mode on port 1234
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
|
||||
|
||||
# Most LM Studio endpoints don't require an API key
|
||||
# export OPENAI_COMPATIBLE_API_KEY=not_needed
|
||||
```
|
||||
|
||||
**For Text Generation WebUI**:
|
||||
```bash
|
||||
# Start with --api flag
|
||||
# python server.py --api --listen
|
||||
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:5000/v1
|
||||
```
|
||||
|
||||
**For vLLM**:
|
||||
```bash
|
||||
# Start vLLM server
|
||||
# vllm serve MODEL_NAME --port 8000
|
||||
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:8000/v1
|
||||
```
|
||||
|
||||
### Advanced Setup (Mode-Specific Endpoints)
|
||||
|
||||
Use different endpoints for different capabilities:
|
||||
|
||||
```bash
|
||||
# Language models on LM Studio
|
||||
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
|
||||
|
||||
# Embeddings on a dedicated embedding server
|
||||
export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
|
||||
|
||||
# Speech services on a different server
|
||||
export OPENAI_COMPATIBLE_BASE_URL_STT=http://localhost:9000/v1
|
||||
export OPENAI_COMPATIBLE_BASE_URL_TTS=http://localhost:9000/v1
|
||||
```
|
||||
|
||||
## Environment Variable Reference
|
||||
|
||||
### Generic Configuration
|
||||
|
||||
Use these when you want the same endpoint for all modalities:
|
||||
|
||||
| Variable | Purpose | Required |
|
||||
|----------|---------|----------|
|
||||
| `OPENAI_COMPATIBLE_BASE_URL` | Base URL for all AI services | Yes (unless using mode-specific) |
|
||||
| `OPENAI_COMPATIBLE_API_KEY` | API key if endpoint requires auth | Optional |
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
|
||||
export OPENAI_COMPATIBLE_API_KEY=your_key_here # If needed
|
||||
```
|
||||
|
||||
### Mode-Specific Configuration
|
||||
|
||||
Use these when you want different endpoints for different capabilities:
|
||||
|
||||
| Variable | Purpose | Modality |
|
||||
|----------|---------|----------|
|
||||
| `OPENAI_COMPATIBLE_BASE_URL_LLM` | Language model endpoint | Language models |
|
||||
| `OPENAI_COMPATIBLE_API_KEY_LLM` | API key for LLM endpoint | Language models |
|
||||
| `OPENAI_COMPATIBLE_BASE_URL_EMBEDDING` | Embedding model endpoint | Embeddings |
|
||||
| `OPENAI_COMPATIBLE_API_KEY_EMBEDDING` | API key for embedding endpoint | Embeddings |
|
||||
| `OPENAI_COMPATIBLE_BASE_URL_STT` | Speech-to-text endpoint | Speech-to-Text |
|
||||
| `OPENAI_COMPATIBLE_API_KEY_STT` | API key for STT endpoint | Speech-to-Text |
|
||||
| `OPENAI_COMPATIBLE_BASE_URL_TTS` | Text-to-speech endpoint | Text-to-Speech |
|
||||
| `OPENAI_COMPATIBLE_API_KEY_TTS` | API key for TTS endpoint | Text-to-Speech |
|
||||
|
||||
**Precedence**: Mode-specific variables override the generic `OPENAI_COMPATIBLE_BASE_URL`
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
# LLM on LM Studio
|
||||
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
|
||||
|
||||
# Embeddings on dedicated server
|
||||
export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
|
||||
export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=secret_key_here
|
||||
```
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### LM Studio
|
||||
|
||||
**What is LM Studio?**
|
||||
LM Studio is a desktop application for running large language models locally with a user-friendly interface.
|
||||
|
||||
**Setup Steps:**
|
||||
1. **Download and install** LM Studio from [lmstudio.ai](https://lmstudio.ai/)
|
||||
2. **Download a model** (e.g., Llama 3, Qwen, Mistral)
|
||||
3. **Start the local server**:
|
||||
- Go to the "Local Server" tab
|
||||
- Click "Start Server"
|
||||
- Note the port (default: 1234)
|
||||
|
||||
4. **Configure Open Notebook**:
|
||||
```bash
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
|
||||
```
|
||||
|
||||
**What works:**
|
||||
- ✅ Language models (chat, completions)
|
||||
- ✅ Embeddings (with embedding models)
|
||||
- ❌ Speech-to-text (not supported)
|
||||
- ❌ Text-to-speech (not supported)
|
||||
|
||||
**Tips:**
|
||||
- LM Studio doesn't require an API key
|
||||
- Choose quantized models (Q4, Q5) for better performance
|
||||
- Monitor RAM usage - larger models need more memory
|
||||
|
||||
---
|
||||
|
||||
### Text Generation WebUI (Oobabooga)
|
||||
|
||||
**What is Text Generation WebUI?**
|
||||
A powerful Gradio-based web interface for running Large Language Models.
|
||||
|
||||
**Setup Steps:**
|
||||
1. **Install** following [official instructions](https://github.com/oobabooga/text-generation-webui)
|
||||
2. **Download a model** using the UI or manually
|
||||
3. **Start with API mode**:
|
||||
```bash
|
||||
python server.py --api --listen
|
||||
```
|
||||
|
||||
4. **Configure Open Notebook**:
|
||||
```bash
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:5000/v1
|
||||
```
|
||||
|
||||
**What works:**
|
||||
- ✅ Language models (excellent support)
|
||||
- ✅ Embeddings (with compatible models)
|
||||
- ❌ Speech services (not supported)
|
||||
|
||||
**Tips:**
|
||||
- Use `--listen` to accept connections from Docker
|
||||
- Supports more model formats than LM Studio
|
||||
- Great for fine-tuned models
|
||||
|
||||
---
|
||||
|
||||
### vLLM
|
||||
|
||||
**What is vLLM?**
|
||||
High-performance inference server optimized for serving large language models at scale.
|
||||
|
||||
**Setup Steps:**
|
||||
1. **Install vLLM**:
|
||||
```bash
|
||||
pip install vllm
|
||||
```
|
||||
|
||||
2. **Start the server**:
|
||||
```bash
|
||||
vllm serve meta-llama/Llama-3-8B-Instruct --port 8000
|
||||
```
|
||||
|
||||
3. **Configure Open Notebook**:
|
||||
```bash
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:8000/v1
|
||||
```
|
||||
|
||||
**What works:**
|
||||
- ✅ Language models (optimized inference)
|
||||
- ✅ Embeddings (with embedding models)
|
||||
- ❌ Speech services (not supported)
|
||||
|
||||
**Tips:**
|
||||
- Best performance for production deployments
|
||||
- Supports tensor parallelism for large models
|
||||
- Excellent for high-throughput scenarios
|
||||
|
||||
---
|
||||
|
||||
### Custom OpenAI-Compatible Services
|
||||
|
||||
Many services implement the OpenAI API specification:
|
||||
|
||||
**Examples:**
|
||||
- **Together AI**: Cloud-hosted models
|
||||
- **Anyscale Endpoints**: Ray-based inference
|
||||
- **Replicate**: Cloud model hosting
|
||||
- **LocalAI**: Self-hosted alternative to OpenAI
|
||||
- **FastChat**: Multi-model serving
|
||||
|
||||
**Configuration:**
|
||||
```bash
|
||||
# Generic setup
|
||||
export OPENAI_COMPATIBLE_BASE_URL=https://api.your-service.com/v1
|
||||
export OPENAI_COMPATIBLE_API_KEY=your_api_key_here
|
||||
```
|
||||
|
||||
## Configuration Scenarios
|
||||
|
||||
### Scenario 1: Single Local Endpoint (Simplest)
|
||||
|
||||
**Use Case**: Running LM Studio for language models only
|
||||
|
||||
```bash
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1
|
||||
```
|
||||
|
||||
**Result**:
|
||||
- ✅ Language models available
|
||||
- ✅ Embeddings available (if model supports)
|
||||
- ✅ Speech services available (if endpoint supports)
|
||||
- All use the same endpoint
|
||||
|
||||
---
|
||||
|
||||
### Scenario 2: Separate Endpoints per Modality
|
||||
|
||||
**Use Case**: Language models on LM Studio, embeddings on dedicated server
|
||||
|
||||
```bash
|
||||
# Language models on LM Studio
|
||||
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
|
||||
|
||||
# Embeddings on specialized server
|
||||
export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=http://localhost:8080/v1
|
||||
export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=embedding_key_here
|
||||
```
|
||||
|
||||
**Result**:
|
||||
- ✅ Language models use LM Studio (port 1234)
|
||||
- ✅ Embeddings use specialized server (port 8080)
|
||||
- ❌ Speech services not available (not configured)
|
||||
|
||||
---
|
||||
|
||||
### Scenario 3: Mixed Local and Cloud
|
||||
|
||||
**Use Case**: Local models for privacy, cloud for specialized tasks
|
||||
|
||||
```bash
|
||||
# Local LLM (privacy-sensitive work)
|
||||
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
|
||||
|
||||
# Cloud embeddings (better quality)
|
||||
export OPENAI_COMPATIBLE_BASE_URL_EMBEDDING=https://api.cloud-provider.com/v1
|
||||
export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=cloud_key_here
|
||||
|
||||
# Cloud speech services
|
||||
export OPENAI_COMPATIBLE_BASE_URL_TTS=https://api.cloud-provider.com/v1
|
||||
export OPENAI_COMPATIBLE_API_KEY_TTS=cloud_key_here
|
||||
```
|
||||
|
||||
**Result**:
|
||||
- ✅ Sensitive chat stays local
|
||||
- ✅ High-quality embeddings from cloud
|
||||
- ✅ Professional TTS from cloud
|
||||
- 🔒 Privacy for conversations, cloud for non-sensitive features
|
||||
|
||||
---
|
||||
|
||||
### Scenario 4: Docker Deployment
|
||||
|
||||
**Use Case**: Open Notebook in Docker, LM Studio on host machine
|
||||
|
||||
**On macOS/Windows**:
|
||||
```bash
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://host.docker.internal:1234/v1
|
||||
```
|
||||
|
||||
**On Linux**:
|
||||
```bash
|
||||
# Use host networking or find host IP
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://172.17.0.1:1234/v1
|
||||
# or use --network host in docker run
|
||||
```
|
||||
|
||||
**Important**:
|
||||
- LM Studio must be set to listen on `0.0.0.0`, not just `localhost`
|
||||
- In LM Studio settings, enable "Allow network connections"
|
||||
|
||||
## Network Configuration
|
||||
|
||||
### Docker Networking
|
||||
|
||||
**Problem**: Docker containers can't reach `localhost` on the host
|
||||
|
||||
**Solutions:**
|
||||
|
||||
**Option 1: Use `host.docker.internal` (Mac/Windows)**
|
||||
```bash
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://host.docker.internal:1234/v1
|
||||
```
|
||||
|
||||
**Option 2: Use host IP address (Linux)**
|
||||
```bash
|
||||
# Find host IP
|
||||
ip addr show docker0 | grep inet
|
||||
|
||||
# Use in environment
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://172.17.0.1:1234/v1
|
||||
```
|
||||
|
||||
**Option 3: Host networking (Linux only)**
|
||||
```bash
|
||||
docker run --network host \
|
||||
-v ./notebook_data:/app/data \
|
||||
-e OPENAI_COMPATIBLE_BASE_URL=http://localhost:1234/v1 \
|
||||
lfnovo/open_notebook:v1-latest-single
|
||||
```
|
||||
|
||||
### Remote Servers
|
||||
|
||||
**Use Case**: OpenAI-compatible service on a different machine
|
||||
|
||||
```bash
|
||||
# Replace with your server's IP or hostname
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://192.168.1.100:1234/v1
|
||||
```
|
||||
|
||||
**Security Notes:**
|
||||
- ⚠️ Only use on trusted networks
|
||||
- Consider using HTTPS for production
|
||||
- Implement API key authentication if possible
|
||||
- Use firewall rules to restrict access
|
||||
|
||||
### Port Conflicts
|
||||
|
||||
**Problem**: Default port (1234) is already in use
|
||||
|
||||
**Solution**: Change the port in your inference server
|
||||
|
||||
**LM Studio:**
|
||||
- Settings → Local Server → Port → Change to different port
|
||||
|
||||
**Then update environment:**
|
||||
```bash
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://localhost:8888/v1
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Connection Refused
|
||||
|
||||
**Symptom**: "Connection refused" or "Could not connect to endpoint"
|
||||
|
||||
**Solutions:**
|
||||
1. **Verify server is running**:
|
||||
```bash
|
||||
curl http://localhost:1234/v1/models
|
||||
```
|
||||
|
||||
2. **Check firewall settings**: Ensure the port is not blocked
|
||||
|
||||
3. **For Docker**: Use `host.docker.internal` instead of `localhost`
|
||||
|
||||
4. **Check server binding**: Server must listen on `0.0.0.0`, not just `127.0.0.1`
|
||||
|
||||
---
|
||||
|
||||
### Models Not Found
|
||||
|
||||
**Symptom**: "Model not found" or "No models available"
|
||||
|
||||
**Solutions:**
|
||||
1. **Verify model is loaded** in your inference server
|
||||
2. **Check model name** matches what Open Notebook expects
|
||||
3. **For LM Studio**: Ensure model is loaded in the local server tab
|
||||
4. **Test endpoint**:
|
||||
```bash
|
||||
curl http://localhost:1234/v1/models
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Slow Performance
|
||||
|
||||
**Symptom**: Responses take a long time
|
||||
|
||||
**Solutions:**
|
||||
1. **Use quantized models** (Q4, Q5 instead of full precision)
|
||||
2. **Check RAM usage**: Model might be swapping to disk
|
||||
3. **Reduce context length**: Smaller context = faster inference
|
||||
4. **Enable GPU acceleration**: If available
|
||||
5. **For vLLM**: Enable tensor parallelism for large models
|
||||
|
||||
---
|
||||
|
||||
### Authentication Errors
|
||||
|
||||
**Symptom**: "Unauthorized" or "Invalid API key"
|
||||
|
||||
**Solutions:**
|
||||
1. **Set API key** if your endpoint requires it:
|
||||
```bash
|
||||
export OPENAI_COMPATIBLE_API_KEY=your_key_here
|
||||
```
|
||||
|
||||
2. **Check key validity**: Test with curl:
|
||||
```bash
|
||||
curl -H "Authorization: Bearer YOUR_KEY" \
|
||||
http://localhost:1234/v1/models
|
||||
```
|
||||
|
||||
3. **For mode-specific**: Use the correct key variable:
|
||||
```bash
|
||||
export OPENAI_COMPATIBLE_API_KEY_LLM=llm_key
|
||||
export OPENAI_COMPATIBLE_API_KEY_EMBEDDING=embedding_key
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Docker Can't Reach Host
|
||||
|
||||
**Symptom**: Connection works locally but not from Docker
|
||||
|
||||
**Solutions:**
|
||||
1. **Use `host.docker.internal`** (Mac/Windows):
|
||||
```bash
|
||||
export OPENAI_COMPATIBLE_BASE_URL=http://host.docker.internal:1234/v1
|
||||
```
|
||||
|
||||
2. **On Linux**: Use host IP or `--network host`
|
||||
|
||||
3. **Check server listening**: Must listen on `0.0.0.0:1234`, not `127.0.0.1:1234`
|
||||
|
||||
4. **Test from inside container**:
|
||||
```bash
|
||||
docker exec -it open-notebook curl http://host.docker.internal:1234/v1/models
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Embeddings Not Working
|
||||
|
||||
**Symptom**: Search or embeddings fail
|
||||
|
||||
**Solutions:**
|
||||
1. **Verify embedding model is loaded**: Many inference servers need explicit embedding model setup
|
||||
2. **Use dedicated embedding endpoint**: If available
|
||||
3. **Check model compatibility**: Not all models support embeddings
|
||||
4. **For LM Studio**: Load an embedding model separately
|
||||
|
||||
---
|
||||
|
||||
### Mixed Results (Some Modes Work, Others Don't)
|
||||
|
||||
**Symptom**: Language models work, but embeddings or speech don't
|
||||
|
||||
**Solution**: Use mode-specific configuration:
|
||||
```bash
|
||||
# What works
|
||||
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
|
||||
|
||||
# For embeddings, use a different provider
|
||||
export OPENAI_API_KEY=your_openai_key # Fallback to OpenAI for embeddings
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Security
|
||||
|
||||
1. **API Keys**:
|
||||
- Use environment variables, never hardcode
|
||||
- Rotate keys regularly for cloud services
|
||||
- Use different keys for different services
|
||||
|
||||
2. **Network**:
|
||||
- Only expose on trusted networks
|
||||
- Use HTTPS in production
|
||||
- Implement firewall rules
|
||||
|
||||
3. **Data Privacy**:
|
||||
- Use local models for sensitive data
|
||||
- Check service privacy policies
|
||||
- Understand data retention policies
|
||||
|
||||
### Performance
|
||||
|
||||
1. **Model Selection**:
|
||||
- Quantized models (Q4, Q5) for better speed/memory trade-off
|
||||
- Smaller models for simple tasks
|
||||
- Larger models only when needed
|
||||
|
||||
2. **Resource Management**:
|
||||
- Monitor RAM and GPU usage
|
||||
- Use appropriate batch sizes
|
||||
- Consider model caching strategies
|
||||
|
||||
3. **Network**:
|
||||
- Use local endpoints when possible for lower latency
|
||||
- For cloud: Choose geographically close servers
|
||||
|
||||
### Reliability
|
||||
|
||||
1. **Fallback Strategy**:
|
||||
```bash
|
||||
# Primary: Local LLM
|
||||
export OPENAI_COMPATIBLE_BASE_URL_LLM=http://localhost:1234/v1
|
||||
|
||||
# Fallback: Use OpenAI if local is unavailable
|
||||
export OPENAI_API_KEY=your_backup_key
|
||||
```
|
||||
|
||||
2. **Health Checks**:
|
||||
- Periodically test endpoints
|
||||
- Monitor server status
|
||||
- Set up alerts for downtime
|
||||
|
||||
3. **Testing**:
|
||||
- Test configuration before production
|
||||
- Validate all required modalities work
|
||||
- Check error handling
|
||||
|
||||
## Getting Help
|
||||
|
||||
**Community Resources:**
|
||||
- [Open Notebook Discord](https://discord.gg/37XJPXfz2w) - Get help with Open Notebook integration
|
||||
- [LM Studio Discord](https://discord.gg/lmstudio) - LM Studio-specific support
|
||||
- [Text Generation WebUI GitHub](https://github.com/oobabooga/text-generation-webui) - Issues and discussions
|
||||
|
||||
**Debugging Steps:**
|
||||
1. **Test endpoint directly** with curl before configuring Open Notebook
|
||||
2. **Check Open Notebook logs** for detailed error messages
|
||||
3. **Verify environment variables** are set correctly
|
||||
4. **Test with simple requests** first (list models, simple completion)
|
||||
|
||||
**Common curl tests:**
|
||||
```bash
|
||||
# List models
|
||||
curl http://localhost:1234/v1/models
|
||||
|
||||
# Test completion
|
||||
curl http://localhost:1234/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "your-model",
|
||||
"messages": [{"role": "user", "content": "Hello!"}]
|
||||
}'
|
||||
|
||||
# Test embeddings
|
||||
curl http://localhost:8080/v1/embeddings \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "embedding-model",
|
||||
"input": "Test text"
|
||||
}'
|
||||
```
|
||||
|
||||
This guide should help you successfully configure OpenAI-compatible providers with Open Notebook. For general AI model configuration, see the [AI Models Guide](ai-models.md).
|
||||
279
tests/test_models_api.py
Normal file
279
tests/test_models_api.py
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from api.main import app
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
class TestModelsProviderAvailability:
|
||||
"""Test suite for Models Provider Availability endpoint."""
|
||||
|
||||
@patch("api.routers.models.os.environ.get")
|
||||
@patch("api.routers.models.AIFactory.get_available_providers")
|
||||
def test_generic_env_var_enables_all_modes(self, mock_esperanto, mock_env):
|
||||
"""Test that OPENAI_COMPATIBLE_BASE_URL enables all 4 modes."""
|
||||
|
||||
# Mock environment: only generic var is set
|
||||
def env_side_effect(key):
|
||||
if key == "OPENAI_COMPATIBLE_BASE_URL":
|
||||
return "http://localhost:1234/v1"
|
||||
return None
|
||||
|
||||
mock_env.side_effect = env_side_effect
|
||||
|
||||
# Mock Esperanto response
|
||||
mock_esperanto.return_value = {
|
||||
"language": ["openai-compatible"],
|
||||
"embedding": ["openai-compatible"],
|
||||
"speech_to_text": ["openai-compatible"],
|
||||
"text_to_speech": ["openai-compatible"],
|
||||
}
|
||||
|
||||
response = client.get("/api/models/providers")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# openai-compatible should be available
|
||||
assert "openai-compatible" in data["available"]
|
||||
|
||||
# Should support all 4 types
|
||||
assert "openai-compatible" in data["supported_types"]
|
||||
supported = data["supported_types"]["openai-compatible"]
|
||||
assert "language" in supported
|
||||
assert "embedding" in supported
|
||||
assert "speech_to_text" in supported
|
||||
assert "text_to_speech" in supported
|
||||
assert len(supported) == 4
|
||||
|
||||
@patch("api.routers.models.os.environ.get")
|
||||
@patch("api.routers.models.AIFactory.get_available_providers")
|
||||
def test_mode_specific_env_vars_llm_embedding(self, mock_esperanto, mock_env):
|
||||
"""Test mode-specific env vars (LLM + EMBEDDING) enable only those 2 modes."""
|
||||
|
||||
# Mock environment: only LLM and EMBEDDING specific vars are set
|
||||
def env_side_effect(key):
|
||||
if key == "OPENAI_COMPATIBLE_BASE_URL_LLM":
|
||||
return "http://localhost:1234/v1"
|
||||
if key == "OPENAI_COMPATIBLE_BASE_URL_EMBEDDING":
|
||||
return "http://localhost:8080/v1"
|
||||
return None
|
||||
|
||||
mock_env.side_effect = env_side_effect
|
||||
|
||||
# Mock Esperanto response
|
||||
mock_esperanto.return_value = {
|
||||
"language": ["openai-compatible"],
|
||||
"embedding": ["openai-compatible"],
|
||||
"speech_to_text": ["openai-compatible"],
|
||||
"text_to_speech": ["openai-compatible"],
|
||||
}
|
||||
|
||||
response = client.get("/api/models/providers")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# openai-compatible should be available
|
||||
assert "openai-compatible" in data["available"]
|
||||
|
||||
# Should support only language and embedding
|
||||
assert "openai-compatible" in data["supported_types"]
|
||||
supported = data["supported_types"]["openai-compatible"]
|
||||
assert "language" in supported
|
||||
assert "embedding" in supported
|
||||
assert "speech_to_text" not in supported
|
||||
assert "text_to_speech" not in supported
|
||||
assert len(supported) == 2
|
||||
|
||||
@patch("api.routers.models.os.environ.get")
|
||||
@patch("api.routers.models.AIFactory.get_available_providers")
|
||||
def test_no_env_vars_set(self, mock_esperanto, mock_env):
|
||||
"""Test that openai-compatible is not available when no env vars are set."""
|
||||
|
||||
# Mock environment: no openai-compatible vars are set
|
||||
def env_side_effect(key):
|
||||
return None
|
||||
|
||||
mock_env.side_effect = env_side_effect
|
||||
|
||||
# Mock Esperanto response
|
||||
mock_esperanto.return_value = {
|
||||
"language": ["openai-compatible"],
|
||||
"embedding": ["openai-compatible"],
|
||||
}
|
||||
|
||||
response = client.get("/api/models/providers")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# openai-compatible should NOT be available
|
||||
assert "openai-compatible" not in data["available"]
|
||||
assert "openai-compatible" in data["unavailable"]
|
||||
|
||||
# Should not have supported_types entry
|
||||
assert "openai-compatible" not in data["supported_types"]
|
||||
|
||||
@patch("api.routers.models.os.environ.get")
|
||||
@patch("api.routers.models.AIFactory.get_available_providers")
|
||||
def test_mixed_config_generic_and_mode_specific(self, mock_esperanto, mock_env):
|
||||
"""Test mixed config: generic + mode-specific (generic should enable all)."""
|
||||
|
||||
# Mock environment: both generic and mode-specific vars are set
|
||||
def env_side_effect(key):
|
||||
if key == "OPENAI_COMPATIBLE_BASE_URL":
|
||||
return "http://localhost:1234/v1"
|
||||
if key == "OPENAI_COMPATIBLE_BASE_URL_LLM":
|
||||
return "http://localhost:5678/v1"
|
||||
return None
|
||||
|
||||
mock_env.side_effect = env_side_effect
|
||||
|
||||
# Mock Esperanto response
|
||||
mock_esperanto.return_value = {
|
||||
"language": ["openai-compatible"],
|
||||
"embedding": ["openai-compatible"],
|
||||
"speech_to_text": ["openai-compatible"],
|
||||
"text_to_speech": ["openai-compatible"],
|
||||
}
|
||||
|
||||
response = client.get("/api/models/providers")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# openai-compatible should be available
|
||||
assert "openai-compatible" in data["available"]
|
||||
|
||||
# Generic var enables all, so all 4 should be supported
|
||||
assert "openai-compatible" in data["supported_types"]
|
||||
supported = data["supported_types"]["openai-compatible"]
|
||||
assert "language" in supported
|
||||
assert "embedding" in supported
|
||||
assert "speech_to_text" in supported
|
||||
assert "text_to_speech" in supported
|
||||
assert len(supported) == 4
|
||||
|
||||
@patch("api.routers.models.os.environ.get")
|
||||
@patch("api.routers.models.AIFactory.get_available_providers")
|
||||
def test_individual_mode_llm_only(self, mock_esperanto, mock_env):
|
||||
"""Test individual mode-specific var (LLM only)."""
|
||||
|
||||
# Mock environment: only LLM specific var is set
|
||||
def env_side_effect(key):
|
||||
if key == "OPENAI_COMPATIBLE_BASE_URL_LLM":
|
||||
return "http://localhost:1234/v1"
|
||||
return None
|
||||
|
||||
mock_env.side_effect = env_side_effect
|
||||
|
||||
# Mock Esperanto response
|
||||
mock_esperanto.return_value = {
|
||||
"language": ["openai-compatible"],
|
||||
"embedding": ["openai-compatible"],
|
||||
"speech_to_text": ["openai-compatible"],
|
||||
"text_to_speech": ["openai-compatible"],
|
||||
}
|
||||
|
||||
response = client.get("/api/models/providers")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Should support only language
|
||||
supported = data["supported_types"]["openai-compatible"]
|
||||
assert supported == ["language"]
|
||||
|
||||
@patch("api.routers.models.os.environ.get")
|
||||
@patch("api.routers.models.AIFactory.get_available_providers")
|
||||
def test_individual_mode_embedding_only(self, mock_esperanto, mock_env):
|
||||
"""Test individual mode-specific var (EMBEDDING only)."""
|
||||
|
||||
# Mock environment: only EMBEDDING specific var is set
|
||||
def env_side_effect(key):
|
||||
if key == "OPENAI_COMPATIBLE_BASE_URL_EMBEDDING":
|
||||
return "http://localhost:8080/v1"
|
||||
return None
|
||||
|
||||
mock_env.side_effect = env_side_effect
|
||||
|
||||
# Mock Esperanto response
|
||||
mock_esperanto.return_value = {
|
||||
"language": ["openai-compatible"],
|
||||
"embedding": ["openai-compatible"],
|
||||
"speech_to_text": ["openai-compatible"],
|
||||
"text_to_speech": ["openai-compatible"],
|
||||
}
|
||||
|
||||
response = client.get("/api/models/providers")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Should support only embedding
|
||||
supported = data["supported_types"]["openai-compatible"]
|
||||
assert supported == ["embedding"]
|
||||
|
||||
@patch("api.routers.models.os.environ.get")
|
||||
@patch("api.routers.models.AIFactory.get_available_providers")
|
||||
def test_individual_mode_stt_only(self, mock_esperanto, mock_env):
|
||||
"""Test individual mode-specific var (STT only)."""
|
||||
|
||||
# Mock environment: only STT specific var is set
|
||||
def env_side_effect(key):
|
||||
if key == "OPENAI_COMPATIBLE_BASE_URL_STT":
|
||||
return "http://localhost:9000/v1"
|
||||
return None
|
||||
|
||||
mock_env.side_effect = env_side_effect
|
||||
|
||||
# Mock Esperanto response
|
||||
mock_esperanto.return_value = {
|
||||
"language": ["openai-compatible"],
|
||||
"embedding": ["openai-compatible"],
|
||||
"speech_to_text": ["openai-compatible"],
|
||||
"text_to_speech": ["openai-compatible"],
|
||||
}
|
||||
|
||||
response = client.get("/api/models/providers")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Should support only speech_to_text
|
||||
supported = data["supported_types"]["openai-compatible"]
|
||||
assert supported == ["speech_to_text"]
|
||||
|
||||
@patch("api.routers.models.os.environ.get")
|
||||
@patch("api.routers.models.AIFactory.get_available_providers")
|
||||
def test_individual_mode_tts_only(self, mock_esperanto, mock_env):
|
||||
"""Test individual mode-specific var (TTS only)."""
|
||||
|
||||
# Mock environment: only TTS specific var is set
|
||||
def env_side_effect(key):
|
||||
if key == "OPENAI_COMPATIBLE_BASE_URL_TTS":
|
||||
return "http://localhost:9000/v1"
|
||||
return None
|
||||
|
||||
mock_env.side_effect = env_side_effect
|
||||
|
||||
# Mock Esperanto response
|
||||
mock_esperanto.return_value = {
|
||||
"language": ["openai-compatible"],
|
||||
"embedding": ["openai-compatible"],
|
||||
"speech_to_text": ["openai-compatible"],
|
||||
"text_to_speech": ["openai-compatible"],
|
||||
}
|
||||
|
||||
response = client.get("/api/models/providers")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Should support only text_to_speech
|
||||
supported = data["supported_types"]["openai-compatible"]
|
||||
assert supported == ["text_to_speech"]
|
||||
6
uv.lock
6
uv.lock
|
|
@ -620,15 +620,15 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "esperanto"
|
||||
version = "2.6.0"
|
||||
version = "2.7.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "httpx" },
|
||||
{ name = "pydantic" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ec/a6/088c04b8df5c89d09620869647271ef480a855734d7b17f78fcdb7f183d2/esperanto-2.6.0.tar.gz", hash = "sha256:49ae83650812ddf32e8a5b54229b5bb8393b8a0b866c77ae7e264e2adc9231a7", size = 535743, upload-time = "2025-09-26T21:51:52.844Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/6d/cf/0da02a603a63b3850abd14d23629f101942db5c18840b0cc6f34d7db9a04/esperanto-2.7.0.tar.gz", hash = "sha256:3861e4e20697813b19f0070a1142934bd6792077c3c174a2c3dd4b6ca0676b06", size = 553433, upload-time = "2025-10-19T02:04:30.21Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/04/d6/32b84cbeac1234f2e77c920b6d07e574ee6bfa3675797bb8bd76f36e7e0f/esperanto-2.6.0-py3-none-any.whl", hash = "sha256:063108274966e8e9bc19b844740ddf7646dd4bc5f6b1b7c586cac37947ffeab0", size = 129234, upload-time = "2025-09-26T21:51:51.159Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/9c/79827f246965ed66ae8d2f3e3937e552730eaf48b270dac852a4756c7bf4/esperanto-2.7.0-py3-none-any.whl", hash = "sha256:2ea3fa98d8622d08a18dc6701ad362461de02492a3252326c70c969b3aba3db6", size = 129524, upload-time = "2025-10-19T02:04:28.57Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
Loading…
Reference in a new issue