"""CLI entry point for WhisperLiveKit. Provides subcommands: wlk serve — Start the transcription server (default when no args) wlk listen — Live microphone transcription wlk run — Auto-pull model and start server wlk transcribe — Transcribe audio files offline wlk bench — Benchmark speed and accuracy on standard test audio wlk models — List available and installed backends/models wlk pull — Download a model for offline use wlk rm — Delete downloaded models wlk check — Verify system dependencies (ffmpeg, etc.) wlk diagnose — Run pipeline diagnostics on audio file """ import importlib.util import logging import platform import sys logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Backend detection # --------------------------------------------------------------------------- def _module_available(name: str) -> bool: return importlib.util.find_spec(name) is not None def _gpu_info() -> str: """Return a short string describing available accelerators.""" parts = [] try: import torch if torch.cuda.is_available(): name = torch.cuda.get_device_name(0) parts.append(f"CUDA ({name})") if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): parts.append("MPS (Apple Silicon)") except ImportError: pass if platform.system() == "Darwin" and platform.machine() == "arm64": if _module_available("mlx"): parts.append("MLX") return ", ".join(parts) if parts else "CPU only" BACKENDS = [ { "id": "faster-whisper", "name": "Faster Whisper", "module": "faster_whisper", "install": "pip install faster-whisper", "description": "CTranslate2-based Whisper (fast, CPU/CUDA)", "policy": "localagreement", "streaming": "chunk", # batch inference with LocalAgreement/SimulStreaming "devices": ["cpu", "cuda"], }, { "id": "whisper", "name": "OpenAI Whisper", "module": "whisper", "install": "pip install openai-whisper", "description": "Original OpenAI Whisper (PyTorch)", "policy": "simulstreaming", "streaming": "chunk", "devices": ["cpu", "cuda"], }, { "id": "mlx-whisper", "name": "MLX Whisper", "module": "mlx_whisper", "install": "pip install mlx-whisper", "description": "Apple Silicon native Whisper (MLX)", "policy": "localagreement", "platform": "darwin-arm64", "streaming": "chunk", "devices": ["mlx"], }, { "id": "voxtral-mlx", "name": "Voxtral MLX", "module": "mlx", "install": "pip install whisperlivekit[voxtral-mlx]", "description": "Mistral Voxtral Mini on Apple Silicon (MLX, native streaming)", "platform": "darwin-arm64", "streaming": "native", # truly streaming (token-by-token) "devices": ["mlx"], }, { "id": "voxtral", "name": "Voxtral HF", "module": "transformers", "install": "pip install whisperlivekit[voxtral-hf]", "description": "Mistral Voxtral Mini (HF Transformers, native streaming)", "streaming": "native", "devices": ["cuda", "mps", "cpu"], }, { "id": "qwen3", "name": "Qwen3 ASR", "module": "qwen_asr", "install": "pip install qwen-asr", "description": "Qwen3-ASR with ForcedAligner timestamps", "streaming": "chunk", "devices": ["cuda", "mps", "cpu"], }, { "id": "openai-api", "name": "OpenAI API", "module": "openai", "install": "pip install openai", "description": "Cloud-based transcription via OpenAI API", "streaming": "cloud", "devices": ["cloud"], }, ] # --------------------------------------------------------------------------- # Model catalog — maps "wlk pull " to download actions # --------------------------------------------------------------------------- # Whisper model sizes available across backends WHISPER_SIZES = [ "tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo", ] # Faster-Whisper uses Systran HuggingFace repos FASTER_WHISPER_REPOS = { "tiny": "Systran/faster-whisper-tiny", "tiny.en": "Systran/faster-whisper-tiny.en", "base": "Systran/faster-whisper-base", "base.en": "Systran/faster-whisper-base.en", "small": "Systran/faster-whisper-small", "small.en": "Systran/faster-whisper-small.en", "medium": "Systran/faster-whisper-medium", "medium.en": "Systran/faster-whisper-medium.en", "large-v1": "Systran/faster-whisper-large-v1", "large-v2": "Systran/faster-whisper-large-v2", "large-v3": "Systran/faster-whisper-large-v3", "large-v3-turbo": "Systran/faster-distil-whisper-large-v3", } # MLX Whisper repos from model_mapping.py MLX_WHISPER_REPOS = { "tiny.en": "mlx-community/whisper-tiny.en-mlx", "tiny": "mlx-community/whisper-tiny-mlx", "base.en": "mlx-community/whisper-base.en-mlx", "base": "mlx-community/whisper-base-mlx", "small.en": "mlx-community/whisper-small.en-mlx", "small": "mlx-community/whisper-small-mlx", "medium.en": "mlx-community/whisper-medium.en-mlx", "medium": "mlx-community/whisper-medium-mlx", "large-v1": "mlx-community/whisper-large-v1-mlx", "large-v2": "mlx-community/whisper-large-v2-mlx", "large-v3": "mlx-community/whisper-large-v3-mlx", "large-v3-turbo": "mlx-community/whisper-large-v3-turbo", "large": "mlx-community/whisper-large-mlx", } # Voxtral/Qwen3 model repos VOXTRAL_HF_REPO = "mistralai/Voxtral-Mini-4B-Realtime-2602" VOXTRAL_MLX_REPO = "mlx-community/Voxtral-Mini-4B-Realtime-6bit" QWEN3_REPOS = { "1.7b": "Qwen/Qwen3-ASR-1.7B", "0.6b": "Qwen/Qwen3-ASR-0.6B", } QWEN3_ALIGNER_REPO = "Qwen/Qwen3-ForcedAligner-0.6B" # Model catalog: metadata for display in `wlk models` # params = approximate parameter count, disk = approximate download size MODEL_CATALOG = [ # Whisper family (available across faster-whisper, mlx-whisper, whisper backends) {"name": "tiny", "family": "whisper", "params": "39M", "disk": "75 MB", "languages": 99, "quality": "low", "speed": "fastest"}, {"name": "tiny.en", "family": "whisper", "params": "39M", "disk": "75 MB", "languages": 1, "quality": "low", "speed": "fastest"}, {"name": "base", "family": "whisper", "params": "74M", "disk": "142 MB", "languages": 99, "quality": "fair", "speed": "fast"}, {"name": "base.en", "family": "whisper", "params": "74M", "disk": "142 MB", "languages": 1, "quality": "fair", "speed": "fast"}, {"name": "small", "family": "whisper", "params": "244M", "disk": "466 MB", "languages": 99, "quality": "good", "speed": "medium"}, {"name": "small.en", "family": "whisper", "params": "244M", "disk": "466 MB", "languages": 1, "quality": "good", "speed": "medium"}, {"name": "medium", "family": "whisper", "params": "769M", "disk": "1.5 GB", "languages": 99, "quality": "great", "speed": "slow"}, {"name": "medium.en", "family": "whisper", "params": "769M", "disk": "1.5 GB", "languages": 1, "quality": "great", "speed": "slow"}, {"name": "large-v3", "family": "whisper", "params": "1.5B", "disk": "3.1 GB", "languages": 99, "quality": "best", "speed": "slowest"}, {"name": "large-v3-turbo", "family": "whisper", "params": "809M", "disk": "1.6 GB", "languages": 99, "quality": "great", "speed": "medium"}, # Voxtral (native streaming, single model) {"name": "voxtral", "family": "voxtral", "params": "4B", "disk": "8.2 GB", "languages": 15, "quality": "great", "speed": "medium"}, {"name": "voxtral-mlx", "family": "voxtral", "params": "4B", "disk": "2.7 GB", "languages": 15, "quality": "great", "speed": "medium"}, # Qwen3 ASR {"name": "qwen3:1.7b", "family": "qwen3", "params": "1.7B", "disk": "3.6 GB", "languages": 12, "quality": "good", "speed": "fast"}, {"name": "qwen3:0.6b", "family": "qwen3", "params": "0.6B", "disk": "1.4 GB", "languages": 12, "quality": "fair", "speed": "fastest"}, ] def _check_platform(backend: dict) -> bool: """Check if backend is compatible with current platform.""" req = backend.get("platform") if req is None: return True if req == "darwin-arm64": return platform.system() == "Darwin" and platform.machine() == "arm64" return True def _is_installed(backend: dict) -> bool: return _module_available(backend["module"]) def _check_ffmpeg() -> bool: """Check if ffmpeg is available.""" import shutil return shutil.which("ffmpeg") is not None def _scan_downloaded_models() -> dict: """Scan HuggingFace and Whisper caches to find downloaded models. Returns: dict mapping repo_id → cached path (or True if found). """ found = {} # 1. Scan HuggingFace hub cache try: from huggingface_hub import scan_cache_dir cache_info = scan_cache_dir() for repo in cache_info.repos: found[repo.repo_id] = str(repo.repo_path) except Exception: pass # 2. Scan native Whisper cache (~/.cache/whisper) import os whisper_cache = os.path.join(os.getenv("XDG_CACHE_HOME", os.path.join(os.path.expanduser("~"), ".cache")), "whisper") if os.path.isdir(whisper_cache): for f in os.listdir(whisper_cache): if f.endswith(".pt"): # e.g. "base.pt" or "large-v3.pt" size = f.rsplit(".", 1)[0] found[f"openai/whisper-{size}"] = os.path.join(whisper_cache, f) return found # --------------------------------------------------------------------------- # Startup banner # --------------------------------------------------------------------------- def print_banner(config, host: str, port: int, ssl: bool = False): """Print a clean startup banner with server info.""" protocol = "https" if ssl else "http" ws_protocol = "wss" if ssl else "ws" # Resolve display host display_host = host if host not in ("0.0.0.0", "::") else "localhost" base_url = f"{protocol}://{display_host}:{port}" ws_url = f"{ws_protocol}://{display_host}:{port}" backend = getattr(config, "backend", "auto") model = getattr(config, "model_size", "base") language = getattr(config, "lan", "auto") # Resolve actual backend name backend_label = backend if backend == "auto": backend_label = "auto (will resolve on first request)" lines = [ "", " WhisperLiveKit", f" Backend: {backend_label} | Model: {model} | Language: {language}", f" Accelerator: {_gpu_info()}", "", f" Web UI: {base_url}/", f" WebSocket: {ws_url}/asr", f" Deepgram: {ws_url}/v1/listen", f" REST API: {base_url}/v1/audio/transcriptions", f" Models: {base_url}/v1/models", f" Health: {base_url}/health", "", ] print("\n".join(lines), file=sys.stderr) # --------------------------------------------------------------------------- # `wlk models` subcommand # --------------------------------------------------------------------------- def _model_is_downloaded(model_entry: dict, downloaded: dict) -> bool: """Check if a model catalog entry has been downloaded.""" name = model_entry["name"] family = model_entry["family"] if family == "whisper": # Check all whisper backends repos = [ FASTER_WHISPER_REPOS.get(name), MLX_WHISPER_REPOS.get(name), f"openai/whisper-{name}", ] return any(r in downloaded for r in repos if r) elif name == "voxtral": return VOXTRAL_HF_REPO in downloaded elif name == "voxtral-mlx": return VOXTRAL_MLX_REPO in downloaded elif family == "qwen3": size = name.split(":")[1] if ":" in name else "1.7b" return QWEN3_REPOS.get(size, "") in downloaded return False def _best_backend_for_model(model_entry: dict) -> str: """Suggest the best available backend for a model.""" family = model_entry["family"] is_apple = platform.system() == "Darwin" and platform.machine() == "arm64" if family == "voxtral": if "mlx" in model_entry["name"]: return "voxtral-mlx" return "voxtral" elif family == "qwen3": return "qwen3" elif family == "whisper": if is_apple and _module_available("mlx_whisper"): return "mlx-whisper" if _module_available("faster_whisper"): return "faster-whisper" if _module_available("whisper"): return "whisper" # Suggest best installable return "mlx-whisper" if is_apple else "faster-whisper" return "auto" def cmd_models(): """List available models and backends (ollama-style).""" is_apple_silicon = platform.system() == "Darwin" and platform.machine() == "arm64" downloaded = _scan_downloaded_models() # --- Installed backends --- print("\n Backends:\n") max_name = max(len(b["name"]) for b in BACKENDS) for b in BACKENDS: compatible = _check_platform(b) installed = _is_installed(b) streaming = b.get("streaming", "chunk") stream_label = {"native": "streaming", "chunk": "chunked", "cloud": "cloud"}.get(streaming, streaming) if installed: status = "\033[32m+\033[0m" elif not compatible: status = "\033[90m-\033[0m" else: status = "\033[33m-\033[0m" name_pad = b["name"].ljust(max_name) desc_short = b["description"] print(f" {status} {name_pad} {desc_short} [{stream_label}]") if not installed and compatible: print(f" {''.ljust(max_name)} \033[90m{b['install']}\033[0m") # --- System info --- print(f"\n Platform: {platform.system()} {platform.machine()}") print(f" Accelerator: {_gpu_info()}") print(f" ffmpeg: {'found' if _check_ffmpeg() else '\033[31mNOT FOUND\033[0m (required)'}") # --- Model catalog --- print("\n Models:\n") # Table header hdr = f" {'NAME':<20} {'PARAMS':>7} {'SIZE':>8} {'QUALITY':<8} {'SPEED':<8} {'LANGS':>5} {'STATUS':<10}" print(hdr) print(f" {'─' * 20} {'─' * 7} {'─' * 8} {'─' * 8} {'─' * 8} {'─' * 5} {'─' * 10}") for m in MODEL_CATALOG: name = m["name"] # Skip platform-incompatible models if name == "voxtral-mlx" and not is_apple_silicon: continue is_dl = _model_is_downloaded(m, downloaded) if is_dl: status = "\033[32mpulled\033[0m " else: status = "\033[90mavailable\033[0m " langs = str(m["languages"]) if m["languages"] < 99 else "99+" print( f" {name:<20} {m['params']:>7} {m['disk']:>8} " f"{m['quality']:<8} {m['speed']:<8} {langs:>5} {status}" ) # --- Quick start --- print(f"\n Quick start:\n") if is_apple_silicon: print(" wlk run voxtral-mlx # Best streaming on Apple Silicon") print(" wlk run large-v3-turbo # Best quality/speed balance") else: print(" wlk run large-v3-turbo # Best quality/speed balance") print(" wlk run voxtral # Native streaming (CUDA/CPU)") print(" wlk pull base # Download smallest multilingual model") print(" wlk transcribe audio.mp3 # Offline transcription") print() # --------------------------------------------------------------------------- # `wlk pull` subcommand # --------------------------------------------------------------------------- def _hf_download(repo_id: str, label: str): """Download a HuggingFace model repo to the local cache.""" from huggingface_hub import snapshot_download print(f" Downloading {label} ({repo_id})...") path = snapshot_download(repo_id) print(f" Saved to: {path}") return path def _resolve_pull_target(spec: str): """Parse a pull spec like 'faster-whisper:large-v3' or 'base' into (backend, size/repo). Returns: list of (backend_id, repo_id, label) tuples to download. """ targets = [] # Check for backend:size format if ":" in spec: backend_part, size_part = spec.split(":", 1) else: backend_part = None size_part = spec # Handle voxtral if size_part == "voxtral" or backend_part == "voxtral": targets.append(("voxtral", VOXTRAL_HF_REPO, "Voxtral Mini (HF)")) return targets if size_part == "voxtral-mlx" or backend_part == "voxtral-mlx": targets.append(("voxtral-mlx", VOXTRAL_MLX_REPO, "Voxtral Mini (MLX)")) return targets # Handle qwen3 if backend_part == "qwen3" or size_part.startswith("qwen3"): qwen_size = size_part.split(":")[-1] if ":" in spec else "1.7b" if qwen_size.startswith("qwen3"): qwen_size = "1.7b" # default repo = QWEN3_REPOS.get(qwen_size) if not repo: print(f" Unknown Qwen3 size: {qwen_size}. Available: {', '.join(QWEN3_REPOS.keys())}") return [] targets.append(("qwen3", repo, f"Qwen3-ASR {qwen_size}")) targets.append(("qwen3-aligner", QWEN3_ALIGNER_REPO, "Qwen3 ForcedAligner")) return targets # Handle whisper-family models with optional backend prefix if backend_part: # Specific backend requested if backend_part == "faster-whisper": repo = FASTER_WHISPER_REPOS.get(size_part) if not repo: print(f" Unknown size: {size_part}. Available: {', '.join(FASTER_WHISPER_REPOS.keys())}") return [] targets.append(("faster-whisper", repo, f"Faster Whisper {size_part}")) elif backend_part == "mlx-whisper": repo = MLX_WHISPER_REPOS.get(size_part) if not repo: print(f" Unknown size: {size_part}. Available: {', '.join(MLX_WHISPER_REPOS.keys())}") return [] targets.append(("mlx-whisper", repo, f"MLX Whisper {size_part}")) elif backend_part == "whisper": # OpenAI whisper downloads on first use; we can at least pull HF version targets.append(("whisper", f"openai/whisper-{size_part}", f"Whisper {size_part}")) else: print(f" Unknown backend: {backend_part}") return [] else: # No backend specified — download for the best available backend is_apple = platform.system() == "Darwin" and platform.machine() == "arm64" if size_part in WHISPER_SIZES: if is_apple and _module_available("mlx_whisper"): repo = MLX_WHISPER_REPOS.get(size_part) if repo: targets.append(("mlx-whisper", repo, f"MLX Whisper {size_part}")) if _module_available("faster_whisper"): repo = FASTER_WHISPER_REPOS.get(size_part) if repo: targets.append(("faster-whisper", repo, f"Faster Whisper {size_part}")) if not targets: # Fallback: download for any available backend repo = FASTER_WHISPER_REPOS.get(size_part) if repo: targets.append(("faster-whisper", repo, f"Faster Whisper {size_part}")) else: print(f" Unknown model: {spec}") print(f" Available sizes: {', '.join(WHISPER_SIZES)}") print(" Other models: voxtral, voxtral-mlx, qwen3:1.7b, qwen3:0.6b") return [] return targets def cmd_pull(spec: str): """Download a model for offline use.""" targets = _resolve_pull_target(spec) if not targets: return 1 print(f"\n Pulling model: {spec}\n") for backend_id, repo_id, label in targets: try: _hf_download(repo_id, label) except Exception as e: print(f" Failed to download {label}: {e}") return 1 print("\n Done. Model ready for offline use.") print() return 0 # --------------------------------------------------------------------------- # `wlk transcribe` subcommand # --------------------------------------------------------------------------- def cmd_transcribe(args: list): """Transcribe audio files using the full pipeline, no server needed. Usage: wlk transcribe [options] [audio_file ...] """ import argparse parser = argparse.ArgumentParser( prog="wlk transcribe", description="Transcribe audio files offline using WhisperLiveKit.", ) parser.add_argument("files", nargs="+", help="Audio files to transcribe") parser.add_argument("--backend", default="auto", help="ASR backend (default: auto)") parser.add_argument("--model", default="base", dest="model_size", help="Model size (default: base)") parser.add_argument("--language", "--lan", default="auto", dest="lan", help="Language code (default: auto)") parser.add_argument("--format", default="text", choices=["text", "json", "srt", "vtt", "verbose_json"], help="Output format (default: text)") parser.add_argument("--output", "-o", default=None, help="Output file (default: stdout)") parser.add_argument("--diarization", action="store_true", help="Enable speaker diarization") parser.add_argument("--verbose", "-v", action="store_true", help="Show detailed processing logs") parsed = parser.parse_args(args) import asyncio # Suppress noisy logging unless --verbose. # Must happen AFTER importing (some modules set levels at import time) # so we use a wrapper that silences after import. if not parsed.verbose: asyncio.run(_transcribe_files_quiet(parsed)) else: asyncio.run(_transcribe_files(parsed)) async def _transcribe_files_quiet(parsed): """Wrapper that silences logging after imports are done.""" import warnings warnings.filterwarnings("ignore") # Force root logger to ERROR — overrides any per-module settings logging.root.setLevel(logging.ERROR) for handler in logging.root.handlers: handler.setLevel(logging.ERROR) # Silence all known noisy loggers for name in list(logging.Logger.manager.loggerDict.keys()): logging.getLogger(name).setLevel(logging.ERROR) await _transcribe_files(parsed) async def _transcribe_files(parsed): """Run transcription on one or more audio files.""" import json as json_module from whisperlivekit.test_harness import TestHarness, load_audio_pcm results = [] for audio_path in parsed.files: print(f" Transcribing: {audio_path}", file=sys.stderr) kwargs = { "model_size": parsed.model_size, "lan": parsed.lan, "pcm_input": True, } if parsed.backend != "auto": kwargs["backend"] = parsed.backend if parsed.diarization: kwargs["diarization"] = True async with TestHarness(**kwargs) as h: await h.feed(audio_path, speed=0) await h.drain(5.0) result = await h.finish(timeout=120) duration = len(load_audio_pcm(audio_path)) / (16000 * 2) if parsed.format == "text": results.append(result.committed_text or result.text) elif parsed.format == "json": results.append(json_module.dumps({"text": result.committed_text or result.text})) elif parsed.format == "verbose_json": results.append(json_module.dumps({ "text": result.committed_text or result.text, "duration": round(duration, 2), "language": parsed.lan, "segments": [ { "text": line.get("text", ""), "start": line.get("start", "0:00:00"), "end": line.get("end", "0:00:00"), "speaker": line.get("speaker", 0), } for line in result.lines if line.get("text") and line.get("speaker", 0) != -2 ], }, indent=2)) elif parsed.format in ("srt", "vtt"): results.append(_format_subtitle(result, parsed.format)) # Output output_text = "\n".join(results) if parsed.output: with open(parsed.output, "w") as f: f.write(output_text) print(f" Output written to: {parsed.output}", file=sys.stderr) else: print(output_text) def _format_subtitle(result, fmt: str) -> str: """Format result as SRT or VTT subtitles.""" from whisperlivekit.test_harness import _parse_time lines_out = [] if fmt == "vtt": lines_out.append("WEBVTT\n") idx = 0 for line in result.lines: if line.get("speaker") == -2 or not line.get("text"): continue idx += 1 start = line.get("start", "0:00:00") end = line.get("end", "0:00:00") start_s = _parse_time(start) end_s = _parse_time(end) start_ts = _subtitle_timestamp(start_s, fmt) end_ts = _subtitle_timestamp(end_s, fmt) if fmt == "srt": lines_out.append(str(idx)) lines_out.append(f"{start_ts} --> {end_ts}") lines_out.append(line["text"]) lines_out.append("") return "\n".join(lines_out) def _subtitle_timestamp(seconds: float, fmt: str) -> str: """Format seconds as SRT or VTT timestamp.""" h = int(seconds // 3600) m = int((seconds % 3600) // 60) s = int(seconds % 60) ms = int(round((seconds % 1) * 1000)) sep = "," if fmt == "srt" else "." return f"{h:02d}:{m:02d}:{s:02d}{sep}{ms:03d}" # --------------------------------------------------------------------------- # `wlk bench` subcommand # --------------------------------------------------------------------------- def cmd_bench(args: list): """Benchmark the transcription pipeline on public test audio. Downloads samples from LibriSpeech, Multilingual LibriSpeech, FLEURS, and AMI on first run. Supports multilingual benchmarking across all available backends. Usage: wlk bench [options] """ import argparse parser = argparse.ArgumentParser( prog="wlk bench", description="Benchmark WhisperLiveKit on public test audio.", ) parser.add_argument("--backend", default="auto", help="ASR backend (default: auto-detect)") parser.add_argument("--model", default="base", dest="model_size", help="Model size (default: base)") parser.add_argument("--languages", "--lan", default=None, help="Comma-separated language codes, or 'all' (default: en)") parser.add_argument("--categories", default=None, help="Comma-separated categories: clean,noisy,multilingual,meeting") parser.add_argument("--quick", action="store_true", help="Quick mode: small subset for smoke tests") parser.add_argument("--json", default=None, dest="json_out", help="Export full report to JSON file") parser.add_argument("--transcriptions", action="store_true", help="Show hypothesis vs reference for each sample") parser.add_argument("--verbose", "-v", action="store_true", help="Show detailed logs") parsed = parser.parse_args(args) # Parse languages languages = None if parsed.languages and parsed.languages != "all": languages = [l.strip() for l in parsed.languages.split(",")] elif parsed.languages is None: languages = ["en"] # default to English only categories = None if parsed.categories: categories = [c.strip() for c in parsed.categories.split(",")] import asyncio if not parsed.verbose: _suppress_logging() asyncio.run(_run_bench_new(parsed, languages, categories)) def _suppress_logging(): """Suppress noisy logs during benchmark.""" import warnings warnings.filterwarnings("ignore") logging.root.setLevel(logging.ERROR) for handler in logging.root.handlers: handler.setLevel(logging.ERROR) for name in list(logging.Logger.manager.loggerDict.keys()): logging.getLogger(name).setLevel(logging.ERROR) async def _run_bench_new(parsed, languages, categories): """Run the benchmark using the new benchmark module.""" from whisperlivekit.benchmark.report import print_report, print_transcriptions, write_json from whisperlivekit.benchmark.runner import BenchmarkRunner def on_progress(name, i, total): if name == "done": print(f"\r [{total}/{total}] Done.{' ' * 30}", file=sys.stderr) else: print(f"\r [{i + 1}/{total}] {name}...{' ' * 20}", end="", file=sys.stderr, flush=True) runner = BenchmarkRunner( backend=parsed.backend, model_size=parsed.model_size, languages=languages, categories=categories, quick=parsed.quick, on_progress=on_progress, ) print(f"\n Downloading benchmark samples (cached after first run)...", file=sys.stderr) report = await runner.run() print_report(report) if parsed.transcriptions: print_transcriptions(report) if parsed.json_out: write_json(report, parsed.json_out) print(f" Results exported to: {parsed.json_out}\n", file=sys.stderr) # --------------------------------------------------------------------------- # `wlk listen` subcommand # --------------------------------------------------------------------------- def cmd_listen(args: list): """Live microphone transcription. Usage: wlk listen [options] """ import argparse parser = argparse.ArgumentParser( prog="wlk listen", description="Transcribe live microphone input in real-time.", ) parser.add_argument("--backend", default="auto", help="ASR backend (default: auto)") parser.add_argument("--model", default="base", dest="model_size", help="Model size (default: base)") parser.add_argument("--language", "--lan", default="auto", dest="lan", help="Language code (default: auto)") parser.add_argument("--diarization", action="store_true", help="Enable speaker diarization") parser.add_argument("--output", "-o", default=None, help="Save transcription to file on exit") parser.add_argument("--verbose", "-v", action="store_true", help="Show detailed logs") parsed = parser.parse_args(args) try: import sounddevice # noqa: F401 except ImportError: print("\n sounddevice is required for microphone input.", file=sys.stderr) print(" Install it with: pip install sounddevice\n", file=sys.stderr) sys.exit(1) import asyncio if not parsed.verbose: asyncio.run(_listen_quiet(parsed)) else: asyncio.run(_listen_main(parsed)) async def _listen_quiet(parsed): """Run listen with suppressed logging.""" import warnings warnings.filterwarnings("ignore") logging.root.setLevel(logging.ERROR) for handler in logging.root.handlers: handler.setLevel(logging.ERROR) for name in list(logging.Logger.manager.loggerDict.keys()): logging.getLogger(name).setLevel(logging.ERROR) await _listen_main(parsed) async def _listen_main(parsed): """Live microphone transcription loop.""" import numpy as np import sounddevice as sd from whisperlivekit.test_harness import TestHarness SAMPLE_RATE = 16000 BLOCK_SIZE = int(SAMPLE_RATE * 0.5) # 500ms chunks kwargs = { "model_size": parsed.model_size, "lan": parsed.lan, "pcm_input": True, } if parsed.backend != "auto": kwargs["backend"] = parsed.backend if parsed.diarization: kwargs["diarization"] = True out = sys.stderr out.write("\n Loading model...") out.flush() async with TestHarness(**kwargs) as h: out.write(" done.\n") out.write(" Listening (Ctrl+C to stop)\n\n") out.flush() n_lines_printed = 0 pipe_stdout = not sys.stdout.isatty() def on_state_update(state): nonlocal n_lines_printed speech = state.speech_lines buf = state.buffer_transcription.strip() # Clear the buffer line out.write("\r\033[K") # Print new committed lines while n_lines_printed < len(speech): text = speech[n_lines_printed].get("text", "") out.write(f" {text}\n") if pipe_stdout: sys.stdout.write(f"{text}\n") sys.stdout.flush() n_lines_printed += 1 # Show buffer (ephemeral, overwritten next update) if buf: out.write(f" \033[90m| {buf}\033[0m") out.flush() h.on_update(on_state_update) # Bridge sounddevice thread -> async event loop import asyncio feed_queue = asyncio.Queue() loop = asyncio.get_running_loop() def audio_callback(indata, frames, time_info, status): pcm = (indata[:, 0] * 32767).astype(np.int16).tobytes() loop.call_soon_threadsafe(feed_queue.put_nowait, pcm) try: stream = sd.InputStream( samplerate=SAMPLE_RATE, channels=1, dtype="float32", blocksize=BLOCK_SIZE, callback=audio_callback, ) stream.start() except Exception as e: out.write(f"\n Could not open microphone: {e}\n") out.write(" Check that a microphone is connected and permissions are granted.\n\n") return try: while True: try: pcm_data = await asyncio.wait_for(feed_queue.get(), timeout=0.1) await h.feed_pcm(pcm_data, speed=0) except asyncio.TimeoutError: pass except KeyboardInterrupt: pass finally: stream.stop() stream.close() out.write("\r\033[K\n Finishing...\n") out.flush() result = await h.finish(timeout=30) # Print any remaining committed lines speech = result.speech_lines while n_lines_printed < len(speech): text = speech[n_lines_printed].get("text", "") out.write(f" {text}\n") if pipe_stdout: sys.stdout.write(f"{text}\n") sys.stdout.flush() n_lines_printed += 1 # Print remaining buffer buf = result.buffer_transcription.strip() if buf: out.write(f" {buf}\n") if pipe_stdout: sys.stdout.write(f"{buf}\n") sys.stdout.flush() out.write("\n") out.flush() if parsed.output: with open(parsed.output, "w") as f: f.write(result.text + "\n") out.write(f" Saved to: {parsed.output}\n\n") out.flush() # --------------------------------------------------------------------------- # `wlk run` subcommand # --------------------------------------------------------------------------- def _resolve_run_spec(spec: str): """Map a model spec to (backend, model_size). Returns (backend_id_or_None, model_size_or_None). """ if ":" in spec: backend_part, model_part = spec.split(":", 1) return backend_part, model_part backend_ids = {b["id"] for b in BACKENDS} if spec in backend_ids: return spec, None if spec == "voxtral-mlx": return "voxtral-mlx", None if spec in WHISPER_SIZES: return None, spec return None, spec def cmd_run(args: list): """Auto-pull model if needed and start the server. Usage: wlk run [model] [server options] """ import argparse parser = argparse.ArgumentParser( prog="wlk run", description="Download model (if needed) and start the transcription server.", ) parser.add_argument("model", nargs="?", default=None, help="Model spec (e.g., voxtral, large-v3, faster-whisper:base)") parsed, extra_args = parser.parse_known_args(args) backend_flag = None model_flag = None if parsed.model: backend_flag, model_flag = _resolve_run_spec(parsed.model) # Show what we resolved catalog_match = next( (m for m in MODEL_CATALOG if m["name"] == parsed.model), None, ) if catalog_match: print( f"\n Model: {catalog_match['name']} " f"({catalog_match['params']} params, {catalog_match['disk']})", file=sys.stderr, ) if backend_flag: print(f" Backend: {backend_flag}", file=sys.stderr) else: best = _best_backend_for_model(catalog_match) print(f" Backend: {best} (auto-detected)", file=sys.stderr) # Auto-pull if needed downloaded = _scan_downloaded_models() targets = _resolve_pull_target(parsed.model) need_pull = any(repo_id not in downloaded for _, repo_id, _ in targets) if need_pull and targets: print("\n Model not found locally. Downloading...\n", file=sys.stderr) result = cmd_pull(parsed.model) if result != 0: sys.exit(1) print(file=sys.stderr) # Build server argv server_argv = [sys.argv[0]] if backend_flag: server_argv.extend(["--backend", backend_flag]) if model_flag: server_argv.extend(["--model", model_flag]) server_argv.extend(extra_args) sys.argv = server_argv from whisperlivekit.basic_server import main as serve_main serve_main() # --------------------------------------------------------------------------- # `wlk rm` subcommand # --------------------------------------------------------------------------- def cmd_rm(spec: str): """Delete a downloaded model from the cache.""" targets = _resolve_pull_target(spec) if not targets: return 1 downloaded = _scan_downloaded_models() found_any = any(repo_id in downloaded for _, repo_id, _ in targets) if not found_any: print(f"\n Model '{spec}' is not downloaded.\n", file=sys.stderr) return 1 print(file=sys.stderr) for _, repo_id, label in targets: if repo_id not in downloaded: continue try: # Try HuggingFace cache first from huggingface_hub import scan_cache_dir cache_info = scan_cache_dir() deleted = False for repo in cache_info.repos: if repo.repo_id == repo_id: size_bytes = repo.size_on_disk size_str = f"{size_bytes / 1e9:.1f} GB" if size_bytes > 1e9 else f"{size_bytes / 1e6:.0f} MB" hashes = [rev.commit_hash for rev in repo.revisions] strategy = cache_info.delete_revisions(*hashes) print(f" Deleting {label} ({repo_id})...", file=sys.stderr) strategy.execute() print(f" Freed {size_str}", file=sys.stderr) deleted = True break if not deleted: # Native whisper cache — plain file import os path = downloaded.get(repo_id) if path and os.path.isfile(path): size = os.path.getsize(path) size_str = f"{size / 1e6:.0f} MB" os.remove(path) print(f" Deleted {label} ({path})", file=sys.stderr) print(f" Freed {size_str}", file=sys.stderr) except Exception as e: print(f" Failed to delete {label}: {e}", file=sys.stderr) return 1 print(file=sys.stderr) return 0 # --------------------------------------------------------------------------- # `wlk check` subcommand # --------------------------------------------------------------------------- def cmd_check(): """Verify system dependencies.""" print("\nSystem check:\n") checks = [ ("Python >= 3.11", sys.version_info >= (3, 11)), ("ffmpeg", _check_ffmpeg()), ("torch", _module_available("torch")), ("torchaudio", _module_available("torchaudio")), ("faster-whisper", _module_available("faster_whisper")), ("uvicorn", _module_available("uvicorn")), ("fastapi", _module_available("fastapi")), ] all_ok = True for name, ok in checks: icon = "\033[32m OK\033[0m" if ok else "\033[31m MISSING\033[0m" print(f" {icon} {name}") if not ok: all_ok = False print() if all_ok: print(" All dependencies OK. Ready to serve.") else: print(" Some dependencies are missing. Install them before running the server.") print() return 0 if all_ok else 1 # --------------------------------------------------------------------------- # `wlk diagnose` subcommand # --------------------------------------------------------------------------- def cmd_diagnose(args: list): """Run pipeline diagnostics on an audio file. Feeds audio through the full pipeline while probing internal backend state at regular intervals. Produces a timeline of what happened inside the pipeline, flags anomalies (stuck tokens, generate thread errors, etc.), and prints a pass/fail summary. Usage: wlk diagnose [audio_file] [options] """ import argparse parser = argparse.ArgumentParser( prog="wlk diagnose", description="Run pipeline diagnostics to debug transcription issues.", ) parser.add_argument("file", nargs="?", default=None, help="Audio file to diagnose (default: built-in test sample)") parser.add_argument("--backend", default="auto", help="ASR backend (default: auto)") parser.add_argument("--model", default="base", dest="model_size", help="Model size (default: base)") parser.add_argument("--language", "--lan", default="auto", dest="lan", help="Language code (default: auto)") parser.add_argument("--speed", type=float, default=1.0, help="Playback speed (1.0=realtime, 0=instant, default: 1.0)") parser.add_argument("--probe-interval", type=float, default=2.0, help="Seconds between state probes (default: 2.0)") parser.add_argument("--diarization", action="store_true", help="Enable speaker diarization") parsed = parser.parse_args(args) import asyncio asyncio.run(_diagnose_main(parsed)) def _probe_backend_state(processor) -> dict: """Probe internal state of whatever ASR backend is running. Returns a dict of diagnostic key-value pairs specific to the backend. """ info = {} transcription = processor.transcription if transcription is None: info["error"] = "no transcription processor" return info # Common: audio buffer size audio_buf = getattr(transcription, "audio_buffer", None) if audio_buf is not None: info["audio_buffer_samples"] = len(audio_buf) info["audio_buffer_sec"] = round(len(audio_buf) / 16000, 2) # Common: get_buffer result try: buf = transcription.get_buffer() info["buffer_text"] = buf.text if buf else "" except Exception as e: info["buffer_error"] = str(e) # Voxtral HF streaming specifics if hasattr(transcription, "_generate_started"): info["backend_type"] = "voxtral-hf-streaming" info["generate_started"] = transcription._generate_started info["generate_finished"] = transcription._generate_finished info["n_audio_tokens_fed"] = transcription._n_audio_tokens_fed info["n_text_tokens_received"] = transcription._n_text_tokens_received info["n_committed_words"] = transcription._n_committed_words info["pending_audio_samples"] = transcription._pending_len with transcription._text_lock: info["accumulated_text"] = transcription._get_accumulated_text() if transcription._generate_error: info["generate_error"] = str(transcription._generate_error) # Audio queue depth info["audio_queue_depth"] = transcription._audio_queue.qsize() # Voxtral MLX specifics elif hasattr(transcription, "_mlx_processor"): info["backend_type"] = "voxtral-mlx" # SimulStreaming specifics elif hasattr(transcription, "prev_output"): info["backend_type"] = "simulstreaming" info["prev_output_len"] = len(getattr(transcription, "prev_output", "") or "") # LocalAgreement (OnlineASRProcessor) specifics elif hasattr(transcription, "hypothesis_buffer"): info["backend_type"] = "localagreement" hb = transcription.hypothesis_buffer if hasattr(hb, "committed"): info["committed_words"] = len(hb.committed) if hasattr(hb, "buffer"): info["hypothesis_buffer_words"] = len(hb.buffer) else: info["backend_type"] = "unknown" return info def _probe_pipeline_state(processor) -> dict: """Probe pipeline-level state (queues, tasks, ffmpeg).""" info = {} if processor.transcription_queue: info["transcription_queue_size"] = processor.transcription_queue.qsize() if processor.diarization_queue: info["diarization_queue_size"] = processor.diarization_queue.qsize() if processor.translation_queue: info["translation_queue_size"] = processor.translation_queue.qsize() info["total_pcm_samples"] = processor.total_pcm_samples info["total_audio_sec"] = round(processor.total_pcm_samples / 16000, 2) info["is_stopping"] = processor.is_stopping info["in_silence"] = processor.current_silence is not None info["n_state_lines"] = len(processor.state.tokens) info["n_state_updates"] = len(getattr(processor.state, "new_tokens", [])) return info async def _diagnose_main(parsed): """Run the full diagnostic pipeline.""" import asyncio import time as time_module from whisperlivekit.test_harness import TestHarness, load_audio_pcm out = sys.stderr # Resolve audio file audio_path = parsed.file if audio_path is None: try: from whisperlivekit.test_data import get_samples samples = get_samples() # Prefer a sample matching the requested language lang_match = [s for s in samples if s.language == parsed.lan] sample = lang_match[0] if lang_match else samples[0] audio_path = sample.path out.write(f"\n Using test sample: {sample.name} ({sample.duration:.1f}s)\n") except Exception as e: out.write(f"\n No audio file provided and couldn't load test sample: {e}\n") out.write(" Usage: wlk diagnose [options]\n\n") sys.exit(1) # Load audio try: pcm = load_audio_pcm(audio_path) except Exception as e: out.write(f"\n Failed to load audio: {e}\n\n") sys.exit(1) audio_duration = len(pcm) / (16000 * 2) # Print header out.write(f"\n {'━' * 70}\n") out.write(" WhisperLiveKit Pipeline Diagnostic\n") out.write(f" {'━' * 70}\n\n") out.write(f" Audio: {audio_path}\n") out.write(f" Duration: {audio_duration:.1f}s\n") out.write(f" Backend: {parsed.backend}\n") out.write(f" Model: {parsed.model_size}\n") out.write(f" Language: {parsed.lan}\n") out.write(f" Speed: {parsed.speed}x\n") out.write(f" Probe every: {parsed.probe_interval}s\n") out.write(f" Platform: {platform.system()} {platform.machine()}\n") out.write(f" Accelerator: {_gpu_info()}\n") out.write(f"\n {'─' * 70}\n") out.write(" Loading model...\n") out.flush() kwargs = { "model_size": parsed.model_size, "lan": parsed.lan, "pcm_input": True, } if parsed.backend != "auto": kwargs["backend"] = parsed.backend if parsed.diarization: kwargs["diarization"] = True t_load_start = time_module.perf_counter() probes = [] anomalies = [] async with TestHarness(**kwargs) as h: t_load = time_module.perf_counter() - t_load_start out.write(f" Model loaded in {t_load:.1f}s\n") out.write(f" {'─' * 70}\n") out.write(" Feeding audio...\n\n") out.flush() processor = h._processor chunk_duration = 0.5 # seconds per chunk chunk_bytes = int(chunk_duration * 16000 * 2) offset = 0 t_start = time_module.perf_counter() last_probe = t_start probe_idx = 0 # Feed audio with periodic probes while offset < len(pcm): end = min(offset + chunk_bytes, len(pcm)) await processor.process_audio(pcm[offset:end]) chunk_seconds = (end - offset) / (16000 * 2) h._audio_position += chunk_seconds offset = end if parsed.speed > 0: await asyncio.sleep(chunk_duration / parsed.speed) # Probe at intervals now = time_module.perf_counter() if now - last_probe >= parsed.probe_interval: probe_idx += 1 elapsed = now - t_start audio_pos = h._audio_position backend_state = _probe_backend_state(processor) pipeline_state = _probe_pipeline_state(processor) harness_state = { "n_history": len(h.history), "state_text_len": len(h.state.text), "state_lines": len(h.state.lines), "state_speech_lines": len(h.state.speech_lines), "buffer": h.state.buffer_transcription[:80] if h.state.buffer_transcription else "", } probe = { "idx": probe_idx, "wall_time": round(elapsed, 1), "audio_pos": round(audio_pos, 1), "backend": backend_state, "pipeline": pipeline_state, "harness": harness_state, } probes.append(probe) # Print probe out.write(f" [{probe_idx:3d}] wall={elapsed:5.1f}s audio={audio_pos:5.1f}s") bt = backend_state.get("backend_type", "?") if bt == "voxtral-hf-streaming": out.write( f" | gen={'Y' if backend_state.get('generate_started') else 'N'}" f" fin={'Y' if backend_state.get('generate_finished') else 'N'}" f" audio_tok={backend_state.get('n_audio_tokens_fed', 0)}" f" text_tok={backend_state.get('n_text_tokens_received', 0)}" f" words={backend_state.get('n_committed_words', 0)}" f" q={backend_state.get('audio_queue_depth', 0)}" ) if backend_state.get("generate_error"): out.write(f" \033[31mERROR: {backend_state['generate_error']}\033[0m") elif bt == "localagreement": out.write( f" | committed={backend_state.get('committed_words', 0)}" f" buf_words={backend_state.get('hypothesis_buffer_words', 0)}" ) elif bt == "simulstreaming": out.write( f" | prev_out_len={backend_state.get('prev_output_len', 0)}" ) buf_text = backend_state.get("buffer_text", "") if buf_text: display = buf_text[:50] + ("..." if len(buf_text) > 50 else "") out.write(f'\n buf="{display}"') out.write("\n") out.flush() # Anomaly detection if bt == "voxtral-hf-streaming": if backend_state.get("generate_started") and not backend_state.get("generate_finished"): if backend_state.get("n_audio_tokens_fed", 0) > 10 and backend_state.get("n_text_tokens_received", 0) == 0: anomalies.append(f"[probe {probe_idx}] {backend_state['n_audio_tokens_fed']} audio tokens fed but 0 text tokens received — model may be stalled") if backend_state.get("generate_error"): anomalies.append(f"[probe {probe_idx}] Generate thread error: {backend_state['generate_error']}") if harness_state["n_history"] == 0 and elapsed > 5: anomalies.append(f"[probe {probe_idx}] No state updates after {elapsed:.0f}s — pipeline may be stuck") last_probe = now # Done feeding — drain and finish out.write(f"\n {'─' * 70}\n") out.write(" Audio feeding complete. Draining pipeline...\n") out.flush() await h.drain(3.0) # One more probe after drain backend_state = _probe_backend_state(processor) pipeline_state = _probe_pipeline_state(processor) probe_idx += 1 elapsed = time_module.perf_counter() - t_start out.write(f" [{probe_idx:3d}] wall={elapsed:5.1f}s audio={h._audio_position:5.1f}s (post-drain)\n") bt = backend_state.get("backend_type", "?") if bt == "voxtral-hf-streaming": out.write( f" text_tok={backend_state.get('n_text_tokens_received', 0)}" f" words={backend_state.get('n_committed_words', 0)}" f" accumulated_text_len={len(backend_state.get('accumulated_text', ''))}\n" ) result = await h.finish(timeout=60) t_total = time_module.perf_counter() - t_start # === Summary === out.write(f"\n {'━' * 70}\n") out.write(" Diagnostic Summary\n") out.write(f" {'━' * 70}\n\n") out.write(f" Wall time: {t_total:.1f}s\n") out.write(f" Audio duration: {audio_duration:.1f}s\n") rtf = t_total / audio_duration if audio_duration > 0 else 0 out.write(f" RTF: {rtf:.3f}x\n") out.write(f" Model load: {t_load:.1f}s\n") out.write(f" Probes taken: {probe_idx}\n\n") # Text output summary text = result.committed_text or result.text n_words = len(text.split()) if text.strip() else 0 n_lines = len(result.speech_lines) has_silence = result.has_silence out.write(f" Output words: {n_words}\n") out.write(f" Output lines: {n_lines}\n") out.write(f" Has silence: {has_silence}\n") out.write(f" Timing valid: {result.timing_valid}\n") out.write(f" Timing monotonic: {result.timing_monotonic}\n") timing_errors = result.timing_errors() if timing_errors: out.write("\n Timing errors:\n") for err in timing_errors[:10]: out.write(f" - {err}\n") # Transcription preview if text: preview = text[:200] + ("..." if len(text) > 200 else "") out.write(f'\n Transcription:\n "{preview}"\n') else: out.write("\n \033[31mNo transcription output!\033[0m\n") # Anomalies out.write(f"\n {'─' * 70}\n") if anomalies: out.write(f" \033[33mAnomalies detected ({len(anomalies)}):\033[0m\n") for a in anomalies: out.write(f" ⚠ {a}\n") else: out.write(" \033[32mNo anomalies detected.\033[0m\n") # Pass/fail checks out.write(f"\n {'─' * 70}\n") out.write(" Health checks:\n\n") checks = [ ("Model loaded successfully", t_load < 300), ("Audio processed without errors", not anomalies), ("Transcription produced output", n_words > 0), ("At least one committed line", n_lines > 0), ("Timestamps are valid", result.timing_valid), ("Timestamps are monotonic", result.timing_monotonic), ("RTF < 2.0x (faster than half real-time)", rtf < 2.0), ] all_pass = True for label, ok in checks: icon = "\033[32m PASS\033[0m" if ok else "\033[31m FAIL\033[0m" out.write(f" {icon} {label}\n") if not ok: all_pass = False out.write(f"\n {'━' * 70}\n") if all_pass: out.write(" \033[32mAll checks passed.\033[0m\n") else: out.write(" \033[31mSome checks failed. Review the timeline above for details.\033[0m\n") out.write(f" {'━' * 70}\n\n") # --------------------------------------------------------------------------- # Main entry point # --------------------------------------------------------------------------- def _print_version(): """Print version.""" from importlib.metadata import version try: v = version("whisperlivekit") except Exception: v = "dev" print(f"WhisperLiveKit {v}") def _print_help(): """Print top-level help.""" print(""" WhisperLiveKit — Local speech-to-text toolkit Usage: wlk [options] Commands: serve Start the transcription server (default) listen Live microphone transcription run Auto-pull model and start server transcribe Transcribe audio files offline bench Benchmark speed and accuracy diagnose Run pipeline diagnostics on audio models List available backends and models pull Download models for offline use rm Delete downloaded models check Verify system dependencies Examples: wlk # Start server with defaults wlk listen # Transcribe from microphone wlk listen --backend voxtral # Listen with specific backend wlk run voxtral # Auto-pull + start server wlk run large-v3 # Auto-pull + start server wlk transcribe audio.wav # Transcribe a file wlk transcribe --format srt audio.wav # Generate SRT subtitles wlk bench # Benchmark current backend wlk diagnose audio.wav --backend voxtral # Diagnose pipeline issues wlk models # List backends + models wlk pull large-v3 # Download model wlk rm large-v3 # Delete downloaded model wlk check # Check dependencies Run 'wlk --help' for command-specific help. """) def main(): """CLI entry point: routes to subcommands or defaults to 'serve'.""" # Quick subcommand routing before argparse (so `wlk models` works # without loading the full server stack) if len(sys.argv) >= 2: subcmd = sys.argv[1] if subcmd == "models": cmd_models() return if subcmd == "check": sys.exit(cmd_check()) if subcmd == "pull": if len(sys.argv) < 3: print("Usage: wlk pull ") print(" e.g.: wlk pull base, wlk pull faster-whisper:large-v3, wlk pull voxtral") sys.exit(1) sys.exit(cmd_pull(sys.argv[2])) if subcmd == "rm": if len(sys.argv) < 3: print("Usage: wlk rm ") print(" e.g.: wlk rm base, wlk rm voxtral") sys.exit(1) sys.exit(cmd_rm(sys.argv[2])) if subcmd == "transcribe": cmd_transcribe(sys.argv[2:]) return if subcmd == "bench": cmd_bench(sys.argv[2:]) return if subcmd == "listen": cmd_listen(sys.argv[2:]) return if subcmd == "diagnose": cmd_diagnose(sys.argv[2:]) return if subcmd == "run": cmd_run(sys.argv[2:]) return if subcmd in ("-h", "--help", "help"): _print_help() return if subcmd in ("version", "--version", "-V"): _print_version() return if subcmd == "serve": # Strip "serve" and pass remaining args to the server sys.argv = [sys.argv[0]] + sys.argv[2:] # Default: serve from whisperlivekit.basic_server import main as serve_main serve_main()