🚀 chore (docker): update docker images improving caching and using uv as python package manager

2026-02-25 14:22:43 -03:00 · 2026-02-25 14:22:43 -03:00 · d24805cc18
commit d24805cc18
parent 994ce21365
4 changed files with 178 additions and 113 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,13 @@
 .git
 .github
 .venv
 __pycache__
 *.pyc
 .pytest_cache
 .mypy_cache
 .ruff_cache
 .cache
 .tmp
 .secrets
 dist
 build
--- a/124
+++ b/124
@ -1,86 +1,74 @@
-FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04
+FROM ghcr.io/astral-sh/uv:0.10.4 AS uvbin
 # --- MARK: Builder Stage
 FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 AS builder-gpu
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
 WORKDIR /app
-ARG EXTRAS
+RUN apt-get update && \
-ARG HF_PRECACHE_DIR
+  apt-get install -y --no-install-recommends \
-ARG HF_TKN_FILE
+  build-essential \
  python3-dev && \
  rm -rf /var/lib/apt/lists/*
 # Install UV and set up the environment 
 COPY --from=uvbin /uv /uvx /bin/
 ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy UV_NO_DEV=1
 ENV UV_PYTHON_PREFERENCE=only-managed
 ENV UV_PYTHON_INSTALL_DIR=/python
 RUN uv python install 3.12
 # Install dependencies first to leverage caching
 ARG EXTRAS=gpu-cu129
 COPY pyproject.toml uv.lock /app/
 RUN set -eux; \
  set --; \
  for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \
  set -- "$@" --extra "$extra"; \
  done; \
  uv sync --frozen --no-install-project --no-editable --no-cache "$@"
 # Copy the source code and install the package only
 COPY whisperlivekit /app/whisperlivekit
 RUN set -eux; \
  set --; \
  for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \
  set -- "$@" --extra "$extra"; \
  done; \
  uv sync --frozen --no-editable --no-cache "$@"
 # --- MARK: Runtime Stage 
 FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04
 ENV DEBIAN_FRONTEND=noninteractive
 WORKDIR /app
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
+  apt-get install -y --no-install-recommends \
-        python3 \
+  ffmpeg &&\
-        python3-pip \
+  rm -rf /var/lib/apt/lists/*
        python3-venv \
        ffmpeg \
        git \
        build-essential \
        python3-dev \
        ca-certificates && \
    rm -rf /var/lib/apt/lists/*
-RUN python3 -m venv /opt/venv
+# Copy UV binaries
-ENV PATH="/opt/venv/bin:$PATH"
+COPY --from=uvbin /uv /uvx /bin/
-# timeout/retries for large torch wheels
+# Copy the Python version
-RUN pip3 install --upgrade pip setuptools wheel && \
+COPY --from=builder-gpu --chown=python:python /python /python
    pip3 --disable-pip-version-check install --timeout=120 --retries=5 \
        --index-url https://download.pytorch.org/whl/cu129 \
        torch torchaudio \
    || (echo "Initial install failed — retrying with extended timeout..." && \
        pip3 --disable-pip-version-check install --timeout=300 --retries=3 \
            --index-url https://download.pytorch.org/whl/cu129 \
            torch torchvision torchaudio)
-COPY . .
+# Copy the virtual environment with all dependencies installed
-
+COPY --from=builder-gpu /app/.venv /app/.venv
 # Install WhisperLiveKit directly, allowing for optional dependencies
 # Example: --build-arg EXTRAS="translation"
 RUN if [ -n "$EXTRAS" ]; then \
      echo "Installing with extras: [$EXTRAS]"; \
      pip install --no-cache-dir "whisperlivekit[$EXTRAS]"; \
    else \
      echo "Installing base package only"; \
      pip install --no-cache-dir whisperlivekit; \
    fi
 # In-container caching for Hugging Face models by: 
 # A) Make the cache directory persistent via an anonymous volume.
 #    Note: This only persists for a single, named container. This is 
 #          only for convenience at de/test stage. 
 #          For prod, it is better to use a named volume via host mount/k8s.
 VOLUME ["/root/.cache/huggingface/hub"]
 # or
 # B) Conditionally copy a local pre-cache from the build context to the 
 #    container's cache via the HF_PRECACHE_DIR build-arg.
 #    WARNING: This will copy ALL files in the pre-cache location.
 # Conditionally copy a cache directory if provided
 RUN if [ -n "$HF_PRECACHE_DIR" ]; then \
      echo "Copying Hugging Face cache from $HF_PRECACHE_DIR"; \
      mkdir -p /root/.cache/huggingface/hub && \
      cp -r $HF_PRECACHE_DIR/* /root/.cache/huggingface/hub; \
    else \
      echo "No local Hugging Face cache specified, skipping copy"; \
    fi
 # Conditionally copy a Hugging Face token if provided. Useful for Diart backend (pyannote audio models)
 RUN if [ -n "$HF_TKN_FILE" ]; then \
      echo "Copying Hugging Face token from $HF_TKN_FILE"; \
      mkdir -p /root/.cache/huggingface && \
      cp $HF_TKN_FILE /root/.cache/huggingface/token; \
    else \
      echo "No Hugging Face token file specified, skipping token setup"; \
    fi
 EXPOSE 8000
 ENV PATH="/app/.venv/bin:$PATH"
 ENV UV_PYTHON_DOWNLOADS=0
 HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \
-    CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1
+  CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1
 ENTRYPOINT ["whisperlivekit-server", "--host", "0.0.0.0"]
--- a/Dockerfile.cpu
+++ b/Dockerfile.cpu
@ -1,64 +1,76 @@
-FROM python:3.13-slim
+FROM ghcr.io/astral-sh/uv:0.10.4 AS uvbin
 # --- MARK: Builder Stage
 FROM debian:bookworm-slim AS builder-cpu
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
 WORKDIR /app
-ARG EXTRAS
+RUN apt-get update && \
-ARG HF_PRECACHE_DIR
+  apt-get install -y --no-install-recommends \
-ARG HF_TKN_FILE
+  build-essential \
  python3-dev && \
  rm -rf /var/lib/apt/lists/*
 # Install UV and set up the environment 
 COPY --from=uvbin /uv /uvx /bin/
 ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy UV_NO_DEV=1
 ENV UV_PYTHON_PREFERENCE=only-managed
 ENV UV_PYTHON_INSTALL_DIR=/python
 RUN uv python install 3.12
 # Install dependencies first to leverage caching
 ARG EXTRAS=cpu
 COPY pyproject.toml uv.lock /app/
 RUN set -eux; \
  set --; \
  for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \
  set -- "$@" --extra "$extra"; \
  done; \
  uv sync --frozen --no-install-project --no-editable --no-cache "$@"
 # Copy the source code and install the package only
 COPY whisperlivekit /app/whisperlivekit
 RUN set -eux; \
  set --; \
  for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \
  set -- "$@" --extra "$extra"; \
  done; \
  uv sync --frozen --no-editable --no-cache "$@"
 # --- MARK: Runtime Stage 
 FROM debian:bookworm-slim
 ENV DEBIAN_FRONTEND=noninteractive
 WORKDIR /app
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
+  apt-get install -y --no-install-recommends \
-        ffmpeg \
+  ffmpeg &&\
-        git \
+  rm -rf /var/lib/apt/lists/*
        build-essential \
        python3-dev && \
    rm -rf /var/lib/apt/lists/*
-# Install CPU-only PyTorch
+# Copy UV binaries
-RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+COPY --from=uvbin /uv /uvx /bin/
-COPY . .
+# Copy the Python version
 COPY --from=builder-cpu --chown=python:python /python /python
-# Install WhisperLiveKit directly, allowing for optional dependencies
+# Copy the virtual environment with all dependencies installed
-RUN if [ -n "$EXTRAS" ]; then \
+COPY --from=builder-cpu /app/.venv /app/.venv
      echo "Installing with extras: [$EXTRAS]"; \
      pip install --no-cache-dir whisperlivekit[$EXTRAS]; \
    else \
      echo "Installing base package only"; \
      pip install --no-cache-dir whisperlivekit; \
    fi
 # Enable in-container caching for Hugging Face models
 VOLUME ["/root/.cache/huggingface/hub"]
 # Conditionally copy a local pre-cache from the build context
 RUN if [ -n "$HF_PRECACHE_DIR" ]; then \
      echo "Copying Hugging Face cache from $HF_PRECACHE_DIR"; \
      mkdir -p /root/.cache/huggingface/hub && \
      cp -r $HF_PRECACHE_DIR/* /root/.cache/huggingface/hub; \
    else \
      echo "No local Hugging Face cache specified, skipping copy"; \
    fi
 # Conditionally copy a Hugging Face token if provided
 RUN if [ -n "$HF_TKN_FILE" ]; then \
      echo "Copying Hugging Face token from $HF_TKN_FILE"; \
      mkdir -p /root/.cache/huggingface && \
      cp $HF_TKN_FILE /root/.cache/huggingface/token; \
    else \
      echo "No Hugging Face token file specified, skipping token setup"; \
    fi
 # Expose port for the transcription server
 EXPOSE 8000
 ENV PATH="/app/.venv/bin:$PATH"
 ENV UV_PYTHON_DOWNLOADS=0
 HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \
-    CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1
+  CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1
 ENTRYPOINT ["whisperlivekit-server", "--host", "0.0.0.0"]
 # Default args - you might want to use a smaller model for CPU
-CMD ["--model", "tiny"]
+CMD ["--model", "tiny"]
--- a/compose.yml
+++ b/compose.yml
@ -0,0 +1,52 @@
 services:
  wlk-gpu-sortformer:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        EXTRAS: ${GPU_SORTFORMER_EXTRAS:-gpu-cu129,diarization-sortformer}
    image: wlk:gpu-sortformer
    gpus: all
    ports:
      - "8000:8000"
    volumes:
      - hf-cache:/root/.cache/huggingface/hub
      # - ${HF_TKN_FILE:-./token}:/root/.cache/huggingface/token:ro
    environment:
      - HF_TOKEN
    command: ["--model", "medium", "--diarization", "--pcm-input"]
  wlk-gpu-voxtral:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        EXTRAS: ${GPU_VOXTRAL_EXTRAS:-gpu-cu129,voxtral-hf,translation}
    image: wlk:gpu-voxtral
    gpus: all
    ports:
      - "8001:8000"
    volumes:
      - hf-cache:/root/.cache/huggingface/hub
      # - ${HF_TKN_FILE:-./token}:/root/.cache/huggingface/token:ro
    environment:
      - HF_TOKEN
    command: ["--backend", "voxtral", "--pcm-input"]
  wlk-cpu:
    build:
      context: .
      dockerfile: Dockerfile.cpu
      args:
        EXTRAS: ${CPU_EXTRAS:-cpu,diarization-diart,translation}
    image: wlk:cpu
    ports:
      - "8000:8000"
    volumes:
      - hf-cache:/root/.cache/huggingface/hub
      # - ${HF_TKN_FILE:-./token}:/root/.cache/huggingface/token:ro
    environment:
      - HF_TOKEN
 volumes:
  hf-cache: