deps(diart): improve diart dependency tree. rename gpu-cu129 dependency group to cu129
This commit is contained in:
parent
27ca028479
commit
4bb58dc7aa
6 changed files with 1518 additions and 992 deletions
134
.github/workflows/support-matrix.yml
vendored
134
.github/workflows/support-matrix.yml
vendored
|
|
@ -1,134 +0,0 @@
|
||||||
name: Support Matrix
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
timeout_sec:
|
|
||||||
description: "Per-case timeout in seconds"
|
|
||||||
required: true
|
|
||||||
default: "300"
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test-and-support-matrix:
|
|
||||||
name: |
|
|
||||||
${{ matrix.os }} | py${{ matrix.python-version }} | tests + support matrix
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
|
||||||
python-version: ["3.11", "3.12", "3.13"]
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Setup uv
|
|
||||||
uses: astral-sh/setup-uv@v6
|
|
||||||
|
|
||||||
- name: Install dependencies for tests
|
|
||||||
run: uv sync --extra test --python ${{ matrix.python-version }}
|
|
||||||
|
|
||||||
# - name: Run unit tests
|
|
||||||
# run: uv run pytest tests/ -v
|
|
||||||
|
|
||||||
- name: Run compatibility matrix
|
|
||||||
shell: bash
|
|
||||||
env:
|
|
||||||
MATRIX_PY: ${{ matrix.python-version }}
|
|
||||||
TIMEOUT_SEC: ${{ github.event.inputs.timeout_sec || '300' }}
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
base_port=8010
|
|
||||||
|
|
||||||
run_case() {
|
|
||||||
local name="$1"
|
|
||||||
shift 1
|
|
||||||
|
|
||||||
echo "[matrix] scenario=${name}"
|
|
||||||
|
|
||||||
uv run wlk \
|
|
||||||
--host 127.0.0.1 \
|
|
||||||
--port "${base_port}" \
|
|
||||||
--warmup-file "" \
|
|
||||||
--model tiny \
|
|
||||||
"$@" &
|
|
||||||
local server_pid=$!
|
|
||||||
|
|
||||||
cleanup() {
|
|
||||||
kill "${server_pid}" >/dev/null 2>&1 || true
|
|
||||||
wait "${server_pid}" >/dev/null 2>&1 || true
|
|
||||||
}
|
|
||||||
trap cleanup RETURN
|
|
||||||
|
|
||||||
local ready=0
|
|
||||||
local checks=$((TIMEOUT_SEC / 2))
|
|
||||||
local i=0
|
|
||||||
while [ "${i}" -lt "${checks}" ]; do
|
|
||||||
if python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:${base_port}/', timeout=2).read(1)" >/dev/null 2>&1; then
|
|
||||||
ready=1
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
if ! kill -0 "${server_pid}" >/dev/null 2>&1; then
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
sleep 2
|
|
||||||
i=$((i + 1))
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ "${ready}" -ne 1 ]; then
|
|
||||||
echo "[matrix] ${name} failed (startup_not_ready)"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "[matrix] ${name} passed"
|
|
||||||
base_port=$((base_port + 1))
|
|
||||||
}
|
|
||||||
|
|
||||||
# FW profile: cpu + diart + sortformer + translation
|
|
||||||
export UV_PROJECT_ENVIRONMENT=".ci-envs/${{ matrix.os }}-py${{ matrix.python-version }}-fw"
|
|
||||||
uv sync --python "${MATRIX_PY}" --no-dev \
|
|
||||||
--extra cpu \
|
|
||||||
--extra diarization-diart \
|
|
||||||
--extra diarization-sortformer \
|
|
||||||
--extra translation
|
|
||||||
|
|
||||||
run_case \
|
|
||||||
"fw-diart-translation" \
|
|
||||||
--backend faster-whisper \
|
|
||||||
--diarization \
|
|
||||||
--diarization-backend diart \
|
|
||||||
--language en \
|
|
||||||
--target-language es
|
|
||||||
|
|
||||||
run_case \
|
|
||||||
"fw-sortformer-translation" \
|
|
||||||
--backend faster-whisper \
|
|
||||||
--diarization \
|
|
||||||
--diarization-backend sortformer \
|
|
||||||
--language en \
|
|
||||||
--target-language es
|
|
||||||
|
|
||||||
unset UV_PROJECT_ENVIRONMENT
|
|
||||||
|
|
||||||
# Voxtral profile: cpu + diart + voxtral-hf + translation
|
|
||||||
export UV_PROJECT_ENVIRONMENT=".ci-envs/${{ matrix.os }}-py${{ matrix.python-version }}-voxtral"
|
|
||||||
uv sync --python "${MATRIX_PY}" --no-dev \
|
|
||||||
--extra cpu \
|
|
||||||
--extra diarization-diart \
|
|
||||||
--extra voxtral-hf \
|
|
||||||
--extra translation
|
|
||||||
|
|
||||||
run_case \
|
|
||||||
"voxtral-diart-translation" \
|
|
||||||
--backend voxtral \
|
|
||||||
--diarization \
|
|
||||||
--diarization-backend diart \
|
|
||||||
--language en \
|
|
||||||
--target-language es
|
|
||||||
|
|
||||||
unset UV_PROJECT_ENVIRONMENT
|
|
||||||
|
|
@ -23,7 +23,7 @@ ENV UV_PYTHON_INSTALL_DIR=/python
|
||||||
RUN uv python install 3.12
|
RUN uv python install 3.12
|
||||||
|
|
||||||
# Install dependencies first to leverage caching
|
# Install dependencies first to leverage caching
|
||||||
ARG EXTRAS=gpu-cu129
|
ARG EXTRAS=cu129
|
||||||
COPY pyproject.toml uv.lock /app/
|
COPY pyproject.toml uv.lock /app/
|
||||||
RUN set -eux; \
|
RUN set -eux; \
|
||||||
set --; \
|
set --; \
|
||||||
|
|
|
||||||
10
README.md
10
README.md
|
|
@ -75,7 +75,7 @@ Go to `chrome-extension` for instructions.
|
||||||
| Feature | `uv sync` | `pip install -e` |
|
| Feature | `uv sync` | `pip install -e` |
|
||||||
|-----------|-------------|-------------|
|
|-----------|-------------|-------------|
|
||||||
| **CPU PyTorch stack** | `uv sync --extra cpu` | `pip install -e ".[cpu]"` |
|
| **CPU PyTorch stack** | `uv sync --extra cpu` | `pip install -e ".[cpu]"` |
|
||||||
| **CUDA 12.9 PyTorch stack** | `uv sync --extra gpu-cu129` | `pip install -e ".[gpu-cu129]"` |
|
| **CUDA 12.9 PyTorch stack** | `uv sync --extra cu129` | `pip install -e ".[cu129]"` |
|
||||||
| **Translation** | `uv sync --extra translation` | `pip install -e ".[translation]"` |
|
| **Translation** | `uv sync --extra translation` | `pip install -e ".[translation]"` |
|
||||||
| **Sentence tokenizer** | `uv sync --extra sentence_tokenizer` | `pip install -e ".[sentence_tokenizer]"` |
|
| **Sentence tokenizer** | `uv sync --extra sentence_tokenizer` | `pip install -e ".[sentence_tokenizer]"` |
|
||||||
| **Voxtral (HF backend)** | `uv sync --extra voxtral-hf` | `pip install -e ".[voxtral-hf]"` |
|
| **Voxtral (HF backend)** | `uv sync --extra voxtral-hf` | `pip install -e ".[voxtral-hf]"` |
|
||||||
|
|
@ -86,10 +86,10 @@ Supported GPU profiles:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Profile A: Sortformer diarization
|
# Profile A: Sortformer diarization
|
||||||
uv sync --extra gpu-cu129 --extra diarization-sortformer
|
uv sync --extra cu129 --extra diarization-sortformer
|
||||||
|
|
||||||
# Profile B: Voxtral HF + translation
|
# Profile B: Voxtral HF + translation
|
||||||
uv sync --extra gpu-cu129 --extra voxtral-hf --extra translation
|
uv sync --extra cu129 --extra voxtral-hf --extra translation
|
||||||
```
|
```
|
||||||
|
|
||||||
`voxtral-hf` and `diarization-sortformer` are intentionally incompatible extras and must be installed in separate environments.
|
`voxtral-hf` and `diarization-sortformer` are intentionally incompatible extras and must be installed in separate environments.
|
||||||
|
|
@ -322,8 +322,8 @@ docker compose up --build wlk-cpu
|
||||||
#### Customization
|
#### Customization
|
||||||
|
|
||||||
- `--build-arg` Options:
|
- `--build-arg` Options:
|
||||||
- `EXTRAS="gpu-cu129,diarization-sortformer"` - GPU Sortformer profile extras.
|
- `EXTRAS="cu129,diarization-sortformer"` - GPU Sortformer profile extras.
|
||||||
- `EXTRAS="gpu-cu129,voxtral-hf,translation"` - GPU Voxtral profile extras.
|
- `EXTRAS="cu129,voxtral-hf,translation"` - GPU Voxtral profile extras.
|
||||||
- `EXTRAS="cpu,diarization-diart,translation"` - CPU profile extras.
|
- `EXTRAS="cpu,diarization-diart,translation"` - CPU profile extras.
|
||||||
- Hugging Face cache + token are configured in `compose.yml` using a named volume and `HF_TKN_FILE` (default: `./token`).
|
- Hugging Face cache + token are configured in `compose.yml` using a named volume and `HF_TKN_FILE` (default: `./token`).
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ services:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
args:
|
args:
|
||||||
EXTRAS: ${GPU_SORTFORMER_EXTRAS:-gpu-cu129,diarization-sortformer}
|
EXTRAS: ${GPU_SORTFORMER_EXTRAS:-cu129,diarization-sortformer}
|
||||||
image: wlk:gpu-sortformer
|
image: wlk:gpu-sortformer
|
||||||
gpus: all
|
gpus: all
|
||||||
ports:
|
ports:
|
||||||
|
|
@ -21,7 +21,7 @@ services:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
args:
|
args:
|
||||||
EXTRAS: ${GPU_VOXTRAL_EXTRAS:-gpu-cu129,voxtral-hf,translation}
|
EXTRAS: ${GPU_VOXTRAL_EXTRAS:-cu129,voxtral-hf,translation}
|
||||||
image: wlk:gpu-voxtral
|
image: wlk:gpu-voxtral
|
||||||
gpus: all
|
gpus: all
|
||||||
ports:
|
ports:
|
||||||
|
|
|
||||||
|
|
@ -39,21 +39,31 @@ sentence_tokenizer = ["mosestokenizer", "wtpsplit"]
|
||||||
voxtral-hf = [
|
voxtral-hf = [
|
||||||
"transformers>=5.2.0; python_version >= '3.10'",
|
"transformers>=5.2.0; python_version >= '3.10'",
|
||||||
"mistral-common[audio]",
|
"mistral-common[audio]",
|
||||||
|
"accelerate>=0.12",
|
||||||
]
|
]
|
||||||
cpu = ["torch>=2.0.0", "torchaudio>=2.0.0"]
|
cpu = ["torch>=2.0.0", "torchaudio>=2.0.0"]
|
||||||
gpu-cu129 = [
|
cu129 = [
|
||||||
"torch>=2.0.0",
|
"torch>=2.0.0",
|
||||||
"torchaudio>=2.0.0",
|
"torchaudio>=2.0.0",
|
||||||
'triton>=2.0.0; platform_machine == "x86_64" and (sys_platform == "linux" or sys_platform == "linux2")',
|
'triton>=2.0.0; platform_machine == "x86_64" and (sys_platform == "linux" or sys_platform == "linux2")',
|
||||||
]
|
]
|
||||||
diarization-sortformer = ["nemo-toolkit[asr]>2.4; python_version >= '3.10'"]
|
diarization-sortformer = ["nemo-toolkit[asr]>2.4; python_version >= '3.10'"]
|
||||||
diarization-diart = ["diart"]
|
diarization-diart = [
|
||||||
|
"diart",
|
||||||
|
"torch<2.9.0",
|
||||||
|
"torchaudio<2.9.0",
|
||||||
|
"torchvision<0.24.0",
|
||||||
|
]
|
||||||
|
|
||||||
[tool.uv]
|
[tool.uv]
|
||||||
conflicts = [
|
conflicts = [
|
||||||
[
|
[
|
||||||
{ extra = "cpu" },
|
{ extra = "cpu" },
|
||||||
{ extra = "gpu-cu129" },
|
{ extra = "cu129" },
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{ extra = "diarization-diart" },
|
||||||
|
{ extra = "cu129" },
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
{ extra = "voxtral-hf" },
|
{ extra = "voxtral-hf" },
|
||||||
|
|
@ -64,11 +74,16 @@ conflicts = [
|
||||||
[tool.uv.sources]
|
[tool.uv.sources]
|
||||||
torch = [
|
torch = [
|
||||||
{ index = "pytorch-cpu", extra = "cpu", marker = "platform_system != 'Darwin'" },
|
{ index = "pytorch-cpu", extra = "cpu", marker = "platform_system != 'Darwin'" },
|
||||||
{ index = "pytorch-cu129", extra = "gpu-cu129", marker = "platform_system == 'Linux' and platform_machine == 'x86_64'" },
|
{ index = "pytorch-cpu", extra = "diarization-diart", marker = "platform_system != 'Darwin'" },
|
||||||
|
{ index = "pytorch-cu129", extra = "cu129", marker = "platform_system == 'Linux' and platform_machine == 'x86_64'" },
|
||||||
]
|
]
|
||||||
torchaudio = [
|
torchaudio = [
|
||||||
{ index = "pytorch-cpu", extra = "cpu", marker = "platform_system != 'Darwin'" },
|
{ index = "pytorch-cpu", extra = "cpu", marker = "platform_system != 'Darwin'" },
|
||||||
{ index = "pytorch-cu129", extra = "gpu-cu129", marker = "platform_system == 'Linux' and platform_machine == 'x86_64'" },
|
{ index = "pytorch-cpu", extra = "diarization-diart", marker = "platform_system != 'Darwin'" },
|
||||||
|
{ index = "pytorch-cu129", extra = "cu129", marker = "platform_system == 'Linux' and platform_machine == 'x86_64'" },
|
||||||
|
]
|
||||||
|
torchvision = [
|
||||||
|
{ index = "pytorch-cpu", extra = "diarization-diart", marker = "platform_system != 'Darwin'" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[tool.uv.index]]
|
[[tool.uv.index]]
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue