diff --git a/setup.py b/setup.py index a47a1f8..b5f9767 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,6 @@ setup( }, package_data={ 'whisperlivekit': ['web/*.html'], - 'whisperlivekit.simul_whisper': ['dual_license_simulstreaming.md'], 'whisperlivekit.simul_whisper.whisper.assets': ['*.tiktoken', '*.npz'], }, entry_points={ diff --git a/whisperlivekit/simul_whisper/dual_license_simulstreaming.md b/whisperlivekit/simul_whisper/dual_license_simulstreaming.md deleted file mode 100644 index ee28dd2..0000000 --- a/whisperlivekit/simul_whisper/dual_license_simulstreaming.md +++ /dev/null @@ -1,25 +0,0 @@ -📄 SimulStreaming (https://github.com/ufal/SimulStreaming) Licence - -SimulStreaming is dual-licensed: - -🔹 Non-Commercial Use - -You may use SimulStreaming under the **PolyForm Noncommercial License 1.0.0** if you -obtain the code through the GitHub repository. This license is **free of charge** -and comes with **no obligations** for non-commercial users. - -🔸 Commercial Use - -Understanding who uses SimulStreaming commercially helps us improve and -prioritize development. Therefore, we want to **require registration** of those who acquire a commercial licence. - -We plan to make the commercial licenceses **affordable** to SMEs and individuals. We -are considering to provide commercial licenses either for free or for symbolic -one-time fee, and maybe also provide additional support. You can share your preference via the [questionnaire](https://forms.cloud.microsoft/e/7tCxb4gJfB). - -You can also leave your contact [there](https://forms.cloud.microsoft/e/7tCxb4gJfB) to be notified when the commercial licenses become -available. - -✉️ Contact - -[Dominik Macháček](https://ufal.mff.cuni.cz/dominik-machacek/), machacek@ufal.mff.cuni.cz \ No newline at end of file diff --git a/whisperlivekit/simul_whisper/license_simulstreaming.py b/whisperlivekit/simul_whisper/license_simulstreaming.py new file mode 100644 index 0000000..e606efa --- /dev/null +++ b/whisperlivekit/simul_whisper/license_simulstreaming.py @@ -0,0 +1,18 @@ +SIMULSTREAMING_LICENSE = f""" +{"*"*80} +SimulStreaming (https://github.com/ufal/SimulStreaming) is dual-licensed: + +🔹 Non-Commercial Use +You may use SimulStreaming under the PolyForm Noncommercial License 1.0.0 if you obtain the code through the GitHub repository. This license is free of charge and comes with no obligations for non-commercial users. + +🔸 Commercial Use +Understanding who uses SimulStreaming commercially helps us improve and +prioritize development. Therefore, we want to require registration of those who acquire a commercial licence. +We plan to make the commercial licenceses affordable to SMEs and individuals. We are considering to provide commercial licenses either for free or for symbolic one-time fee, and maybe also provide additional support. You can share your preference via the questionnaire https://forms.cloud.microsoft/e/7tCxb4gJfB. +You can also leave your contact there: https://forms.cloud.microsoft/e/7tCxb4gJfB to be notified when the commercial licenses become +available. + +✉️ Contact +Dominik Macháček (https://ufal.mff.cuni.cz/dominik-machacek/), machacek@ufal.mff.cuni.cz +{"*"*80} +""" \ No newline at end of file diff --git a/whisperlivekit/simul_whisper/whisper/__init__.py b/whisperlivekit/simul_whisper/whisper/__init__.py index e210718..f06dd6c 100644 --- a/whisperlivekit/simul_whisper/whisper/__init__.py +++ b/whisperlivekit/simul_whisper/whisper/__init__.py @@ -4,6 +4,7 @@ import os import urllib import warnings from typing import List, Optional, Union +import logging import torch from tqdm import tqdm @@ -14,6 +15,8 @@ from .model import ModelDimensions, Whisper from .transcribe import transcribe from .version import __version__ +logger = logging.getLogger(__name__) + _MODELS = { "tiny.en": "https://openaipublic.azureedge.net/main/whisper/models/d3dd57d32accea0b295c96e26691aa14d8822fac7d9d27d5dc00b4ca2826dd03/tiny.en.pt", "tiny": "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt", @@ -71,6 +74,7 @@ def _download(url: str, root: str, in_memory: bool) -> Union[bytes, str]: ) with urllib.request.urlopen(url) as source, open(download_target, "wb") as output: + logger.info(f'Downloading model weights to {download_target}') with tqdm( total=int(source.info().get("Content-Length")), ncols=80, diff --git a/whisperlivekit/whisper_streaming_custom/backends.py b/whisperlivekit/whisper_streaming_custom/backends.py index 7af0cc4..eea017d 100644 --- a/whisperlivekit/whisper_streaming_custom/backends.py +++ b/whisperlivekit/whisper_streaming_custom/backends.py @@ -10,7 +10,7 @@ except ImportError: from typing import List import numpy as np from whisperlivekit.timed_objects import ASRToken - +from whisperlivekit.simul_whisper.license_simulstreaming import SIMULSTREAMING_LICENSE logger = logging.getLogger(__name__) SIMULSTREAMING_ERROR_AND_INSTALLATION_INSTRUCTIONS = ImportError( """SimulStreaming dependencies are not available. @@ -319,8 +319,7 @@ class SimulStreamingASR(ASRBase): def __init__(self, lan, modelsize=None, cache_dir=None, model_dir=None, logfile=sys.stderr, **kwargs): if not SIMULSTREAMING_AVAILABLE: raise SIMULSTREAMING_ERROR_AND_INSTALLATION_INSTRUCTIONS - with open("whisperlivekit/simul_whisper/dual_license_simulstreaming.md", "r") as f: - print("*"*80 + f.read() + "*"*80) + logger.warning(SIMULSTREAMING_LICENSE) self.logfile = logfile self.transcribe_kargs = {} self.original_language = None if lan == "auto" else lan @@ -482,9 +481,10 @@ class SimulStreamingASR(ASRBase): try: if isinstance(audio, np.ndarray): audio = torch.from_numpy(audio).float() + print(audio) self.model.insert_audio(audio) self.model.infer(True) self.model.refresh_segment(complete=True) logger.info("SimulStreaming model warmed up successfully") except Exception as e: - logger.warning(f"SimulStreaming warmup failed: {e}") + logger.exception(f"SimulStreaming warmup failed: {e}")