diff --git a/README.md b/README.md index 82685ff..78b5d65 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,12 @@ pip install tokenize_uk # If you work with Ukrainian text # If you want to use diarization pip install diart + +# Optional backends. Default is faster-whisper +pip install whisperlivekit[whisper] # Original Whisper backend +pip install whisperlivekit[whisper-timestamped] # Whisper with improved timestamps +pip install whisperlivekit[mlx-whisper] # Optimized for Apple Silicon +pip install whisperlivekit[openai] # OpenAI API backend ``` ### Get access to 🎹 pyannote models diff --git a/setup.py b/setup.py index b027e96..fff5d18 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,7 @@ from setuptools import setup, find_packages - setup( name="whisperlivekit", - version="0.1.2", + version="0.1.3", description="Real-time, Fully Local Whisper's Speech-to-Text and Speaker Diarization", long_description=open("README.md", "r", encoding="utf-8").read(), long_description_content_type="text/markdown", @@ -22,6 +21,10 @@ setup( "diarization": ["diart"], "vac": ["torch"], "sentence": ["mosestokenizer", "wtpsplit"], + "whisper": ["whisper"], + "whisper-timestamped": ["whisper-timestamped"], + "mlx-whisper": ["mlx-whisper"], + "openai": ["openai"], }, package_data={ 'whisperlivekit': ['web/*.html'], diff --git a/whisperlivekit/core.py b/whisperlivekit/core.py index c0185d9..0182555 100644 --- a/whisperlivekit/core.py +++ b/whisperlivekit/core.py @@ -1,7 +1,7 @@ try: from whisperlivekit.whisper_streaming_custom.whisper_online import backend_factory, warmup_asr -except: - from whisper_streaming_custom.whisper_online import backend_factory, warmup_asr +except ImportError: + from .whisper_streaming_custom.whisper_online import backend_factory, warmup_asr from argparse import Namespace, ArgumentParser def parse_args(): diff --git a/whisperlivekit/whisper_streaming_custom/backends.py b/whisperlivekit/whisper_streaming_custom/backends.py index 8f1090e..71890d5 100644 --- a/whisperlivekit/whisper_streaming_custom/backends.py +++ b/whisperlivekit/whisper_streaming_custom/backends.py @@ -3,7 +3,10 @@ import logging import io import soundfile as sf import math -import torch +try: + import torch +except ImportError: + torch = None from typing import List import numpy as np from whisperlivekit.timed_objects import ASRToken @@ -102,7 +105,7 @@ class FasterWhisperASR(ASRBase): model_size_or_path = modelsize else: raise ValueError("Either modelsize or model_dir must be set") - device = "cuda" if torch.cuda.is_available() else "cpu" + device = "cuda" if torch and torch.cuda.is_available() else "cpu" compute_type = "float16" if device == "cuda" else "float32" model = WhisperModel(