From cb5cf3933681e4e664c61e81c5012cc02e99dfb9 Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Thu, 13 Mar 2025 15:03:16 +0100 Subject: [PATCH 1/2] fix #84 --- whisper_fastapi_online_server.py | 3 +- whisper_streaming_custom/whisper_online.py | 33 ++++++++++++++++++---- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/whisper_fastapi_online_server.py b/whisper_fastapi_online_server.py index 8a08381..f399fc4 100644 --- a/whisper_fastapi_online_server.py +++ b/whisper_fastapi_online_server.py @@ -10,7 +10,7 @@ from fastapi import FastAPI, WebSocket, WebSocketDisconnect from fastapi.responses import HTMLResponse from fastapi.middleware.cors import CORSMiddleware -from whisper_streaming_custom.whisper_online import backend_factory, online_factory, add_shared_args +from whisper_streaming_custom.whisper_online import backend_factory, online_factory, add_shared_args,warmup_asr from timed_objects import ASRToken import math @@ -160,6 +160,7 @@ async def lifespan(app: FastAPI): global asr, tokenizer, diarization if args.transcription: asr, tokenizer = backend_factory(args) + warmup_asr(asr, args.warmup_file) else: asr, tokenizer = None, None diff --git a/whisper_streaming_custom/whisper_online.py b/whisper_streaming_custom/whisper_online.py index 147749f..ea52062 100644 --- a/whisper_streaming_custom/whisper_online.py +++ b/whisper_streaming_custom/whisper_online.py @@ -227,11 +227,34 @@ def asr_factory(args, logfile=sys.stderr): online = online_factory(args, asr, tokenizer, logfile=logfile) return asr, online -def set_logging(args, logger, others=[]): - logging.basicConfig(format="%(levelname)s\t%(message)s") # format='%(name)s - logger.setLevel(args.log_level) +def warmup_asr(asr, warmup_file=None): + """ + Warmup the ASR model by transcribing a short audio file. + """ + if warmup_file: + warmup_file = warmup_file + else: + # Download JFK sample if not already present + import tempfile + import os - for other in others: - logging.getLogger(other).setLevel(args.log_level) + + jfk_url = "https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav" + temp_dir = tempfile.gettempdir() + warmup_file = os.path.join(temp_dir, "whisper_warmup_jfk.wav") + + if not os.path.exists(warmup_file): + logger.debug(f"Downloading warmup file from {jfk_url}") + import urllib.request + urllib.request.urlretrieve(jfk_url, warmup_file) + # Load the warmup file + audio, sr = librosa.load(warmup_file, sr=16000) + + # Process the audio + asr.transcribe(audio) + + + logger.info("Whisper is warmed up") + From b768b219fef780c2ce5b1ce7cc6597623572ac8d Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Fri, 14 Mar 2025 11:41:18 +0100 Subject: [PATCH 2/2] Warmup functionality: add timeout option (for VM not connected to internet); False option to disable warmup --- whisper_fastapi_online_server.py | 9 +++- whisper_streaming_custom/whisper_online.py | 53 ++++++++++++++++------ 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/whisper_fastapi_online_server.py b/whisper_fastapi_online_server.py index f399fc4..7684705 100644 --- a/whisper_fastapi_online_server.py +++ b/whisper_fastapi_online_server.py @@ -10,7 +10,7 @@ from fastapi import FastAPI, WebSocket, WebSocketDisconnect from fastapi.responses import HTMLResponse from fastapi.middleware.cors import CORSMiddleware -from whisper_streaming_custom.whisper_online import backend_factory, online_factory, add_shared_args,warmup_asr +from whisper_streaming_custom.whisper_online import backend_factory, online_factory, add_shared_args, warmup_asr from timed_objects import ASRToken import math @@ -42,8 +42,13 @@ parser.add_argument( parser.add_argument( "--warmup-file", type=str, + default=None, dest="warmup_file", - help="The path to a speech audio wav file to warm up Whisper so that the very first chunk processing is fast. It can be e.g. https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav .", + help=""" + The path to a speech audio wav file to warm up Whisper so that the very first chunk processing is fast. + If not set, uses https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav. + If False, no warmup is performed. + """, ) parser.add_argument( diff --git a/whisper_streaming_custom/whisper_online.py b/whisper_streaming_custom/whisper_online.py index ea52062..617f05b 100644 --- a/whisper_streaming_custom/whisper_online.py +++ b/whisper_streaming_custom/whisper_online.py @@ -227,34 +227,57 @@ def asr_factory(args, logfile=sys.stderr): online = online_factory(args, asr, tokenizer, logfile=logfile) return asr, online -def warmup_asr(asr, warmup_file=None): +def warmup_asr(asr, warmup_file=None, timeout=5): """ Warmup the ASR model by transcribing a short audio file. """ - if warmup_file: - warmup_file = warmup_file - else: + import os + import tempfile + + + if warmup_file is None: # Download JFK sample if not already present - import tempfile - import os - - jfk_url = "https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav" temp_dir = tempfile.gettempdir() warmup_file = os.path.join(temp_dir, "whisper_warmup_jfk.wav") if not os.path.exists(warmup_file): logger.debug(f"Downloading warmup file from {jfk_url}") + print(f"Downloading warmup file from {jfk_url}") + import time import urllib.request - urllib.request.urlretrieve(jfk_url, warmup_file) - - - # Load the warmup file - audio, sr = librosa.load(warmup_file, sr=16000) - + import urllib.error + import socket + + original_timeout = socket.getdefaulttimeout() + socket.setdefaulttimeout(timeout) + + start_time = time.time() + try: + urllib.request.urlretrieve(jfk_url, warmup_file) + logger.debug(f"Download successful in {time.time() - start_time:.2f}s") + except (urllib.error.URLError, socket.timeout) as e: + logger.warning(f"Download failed: {e}. Proceeding without warmup.") + return False + finally: + socket.setdefaulttimeout(original_timeout) + elif not warmup_file: + return False + + if not warmup_file or not os.path.exists(warmup_file) or os.path.getsize(warmup_file) == 0: + logger.warning(f"Warmup file {warmup_file} invalid or missing.") + return False + + print(f"Warmping up Whisper with {warmup_file}") + try: + import librosa + audio, sr = librosa.load(warmup_file, sr=16000) + except Exception as e: + logger.warning(f"Failed to load audio file: {e}") + return False + # Process the audio asr.transcribe(audio) - logger.info("Whisper is warmed up")