diff --git a/Dockerfile b/Dockerfile index bfcd5a8..e62d5ac 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04 +FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 @@ -9,13 +9,6 @@ ARG EXTRAS ARG HF_PRECACHE_DIR ARG HF_TKN_FILE -# Install system dependencies -#RUN apt-get update && \ -# apt-get install -y ffmpeg git && \ -# apt-get clean && \ -# rm -rf /var/lib/apt/lists/* - -# 2) Install system dependencies + Python + pip RUN apt-get update && \ apt-get install -y --no-install-recommends \ python3 \ @@ -26,7 +19,7 @@ RUN apt-get update && \ python3-dev && \ rm -rf /var/lib/apt/lists/* -RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 +RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu129 COPY . . @@ -35,10 +28,10 @@ COPY . . # for more details. RUN if [ -n "$EXTRAS" ]; then \ echo "Installing with extras: [$EXTRAS]"; \ - pip install --no-cache-dir .[$EXTRAS]; \ + pip install --no-cache-dir whisperlivekit[$EXTRAS]; \ else \ echo "Installing base package only"; \ - pip install --no-cache-dir .; \ + pip install --no-cache-dir whisperlivekit; \ fi # Enable in-container caching for Hugging Face models by: @@ -81,4 +74,4 @@ EXPOSE 8000 ENTRYPOINT ["whisperlivekit-server", "--host", "0.0.0.0"] # Default args -CMD ["--model", "base"] \ No newline at end of file +CMD ["--model", "medium"] \ No newline at end of file diff --git a/Dockerfile.cpu b/Dockerfile.cpu new file mode 100644 index 0000000..5101fae --- /dev/null +++ b/Dockerfile.cpu @@ -0,0 +1,61 @@ +FROM python:3.13-slim + +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 + +WORKDIR /app + +ARG EXTRAS +ARG HF_PRECACHE_DIR +ARG HF_TKN_FILE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ffmpeg \ + git \ + build-essential \ + python3-dev && \ + rm -rf /var/lib/apt/lists/* + +# Install CPU-only PyTorch +RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + +COPY . . + +# Install WhisperLiveKit directly, allowing for optional dependencies +RUN if [ -n "$EXTRAS" ]; then \ + echo "Installing with extras: [$EXTRAS]"; \ + pip install --no-cache-dir whisperlivekit[$EXTRAS]; \ + else \ + echo "Installing base package only"; \ + pip install --no-cache-dir whisperlivekit; \ + fi + +# Enable in-container caching for Hugging Face models +VOLUME ["/root/.cache/huggingface/hub"] + +# Conditionally copy a local pre-cache from the build context +RUN if [ -n "$HF_PRECACHE_DIR" ]; then \ + echo "Copying Hugging Face cache from $HF_PRECACHE_DIR"; \ + mkdir -p /root/.cache/huggingface/hub && \ + cp -r $HF_PRECACHE_DIR/* /root/.cache/huggingface/hub; \ + else \ + echo "No local Hugging Face cache specified, skipping copy"; \ + fi + +# Conditionally copy a Hugging Face token if provided +RUN if [ -n "$HF_TKN_FILE" ]; then \ + echo "Copying Hugging Face token from $HF_TKN_FILE"; \ + mkdir -p /root/.cache/huggingface && \ + cp $HF_TKN_FILE /root/.cache/huggingface/token; \ + else \ + echo "No Hugging Face token file specified, skipping token setup"; \ + fi + +# Expose port for the transcription server +EXPOSE 8000 + +ENTRYPOINT ["whisperlivekit-server", "--host", "0.0.0.0"] + +# Default args - you might want to use a smaller model for CPU +CMD ["--model", "tiny"] \ No newline at end of file diff --git a/README.md b/README.md index f62b35b..6d2ae17 100644 --- a/README.md +++ b/README.md @@ -216,19 +216,39 @@ To deploy WhisperLiveKit in production: 4. **HTTPS Support**: For secure deployments, use "wss://" instead of "ws://" in WebSocket URL -### 🐋 Docker +## 🐋 Docker -A Dockerfile is provided which allows re-use of Python package installation options. Create a reusable image with only the basics and then run as a named container: +Deploy the application easily using Docker with GPU or CPU support. +### Prerequisites +- Docker installed on your system +- For GPU support: NVIDIA Docker runtime installed + +### Quick Start + +**With GPU acceleration (recommended):** ```bash -docker build -t whisperlivekit-defaults . -docker create --gpus all --name whisperlivekit -p 8000:8000 whisperlivekit-defaults --model base -docker start -i whisperlivekit +docker build -t wlk . +docker run --gpus all -p 8000:8000 --name wlk wlk ``` -> **Note**: For **large** models, ensure that your **docker runtime** has enough **memory** available +**CPU only:** +```bash +docker build -f Dockerfile.cpu -t wlk . +docker run -p 8000:8000 --name wlk wlk +``` + +### Advanced Usage + +**Custom configuration:** +```bash +# Example with custom model and language +docker run --gpus all -p 8000:8000 --name wlk wlk --model large-v3 --language fr +``` + +### Memory Requirements +- **Large models**: Ensure your Docker runtime has sufficient memory allocated -> **Note**: If you're running on a system without NVIDIA GPU support (such as Mac with Apple Silicon or any system without CUDA capabilities), you need to **remove the `--gpus all` flag** from the `docker create` command. Without GPU acceleration, transcription will use CPU only, which may be significantly slower. Consider using small models for better performance on CPU-only systems. #### Customization diff --git a/whisperlivekit/web/live_transcription.js b/whisperlivekit/web/live_transcription.js index f2efb18..21ebba7 100644 --- a/whisperlivekit/web/live_transcription.js +++ b/whisperlivekit/web/live_transcription.js @@ -400,7 +400,12 @@ async function startRecording() { isRecording = true; updateUI(); } catch (err) { - statusText.textContent = "Error accessing microphone. Please allow microphone access."; + if (window.location.hostname === "0.0.0.0") { + statusText.textContent = + "Error accessing microphone. Browsers may block microphone access on 0.0.0.0. Try using localhost:8000 instead."; + } else { + statusText.textContent = "Error accessing microphone. Please allow microphone access."; + } console.error(err); } }