From 74c4dc791d8d3c99d393cd8abc7984cef700a272 Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Sun, 4 Jan 2026 14:15:00 +0100 Subject: [PATCH] Lint scripts and tests --- audio_tests/generate_transcripts.py | 1 + run_benchmark.py | 1 - scripts/determine_alignment_heads.py | 6 +++--- scripts/sync_extension.py | 17 ++++++++--------- test_backend_offline.py | 9 +++++---- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/audio_tests/generate_transcripts.py b/audio_tests/generate_transcripts.py index 7eb180f..6749a5c 100644 --- a/audio_tests/generate_transcripts.py +++ b/audio_tests/generate_transcripts.py @@ -6,6 +6,7 @@ Produces one JSON file per audio with: [{word, start, end}, ...] import json import os + from faster_whisper import WhisperModel AUDIO_DIR = os.path.dirname(os.path.abspath(__file__)) diff --git a/run_benchmark.py b/run_benchmark.py index 5a4e23b..8c737fb 100644 --- a/run_benchmark.py +++ b/run_benchmark.py @@ -33,7 +33,6 @@ sys.path.insert(0, str(Path(__file__).parent)) from test_backend_offline import ( AUDIO_TESTS_DIR, SAMPLE_RATE, - TestResult, create_engine, discover_audio_files, download_sample_audio, diff --git a/scripts/determine_alignment_heads.py b/scripts/determine_alignment_heads.py index 5341853..3f3e4e3 100644 --- a/scripts/determine_alignment_heads.py +++ b/scripts/determine_alignment_heads.py @@ -8,7 +8,7 @@ import io import math import pathlib import sys -from typing import List, Optional, Sequence, Tuple, Union +from typing import Sequence, Tuple, Union import matplotlib.pyplot as plt import numpy as np @@ -24,7 +24,7 @@ sys.path.insert(0, str(REPO_ROOT)) sys.path.insert(0, str(WHISPER_ROOT)) from whisper import load_model -from whisper.audio import load_audio, log_mel_spectrogram, pad_or_trim +from whisper.audio import log_mel_spectrogram, pad_or_trim from whisper.tokenizer import get_tokenizer AudioInput = Union[str, pathlib.Path, np.ndarray, torch.Tensor] @@ -85,7 +85,7 @@ def _parse_args(): parser.add_argument( "--dataset-config", type=str, - default="clean" + default="clean" ) parser.add_argument( "--dataset-split", diff --git a/scripts/sync_extension.py b/scripts/sync_extension.py index 2e34cac..1eea448 100644 --- a/scripts/sync_extension.py +++ b/scripts/sync_extension.py @@ -1,40 +1,39 @@ """Copy core files from web directory to Chrome extension directory.""" -import os import shutil from pathlib import Path def sync_extension_files(): - + web_dir = Path("whisperlivekit/web") extension_dir = Path("chrome-extension") - + files_to_sync = [ "live_transcription.html", "live_transcription.js", "live_transcription.css" ] svg_files = [ "system_mode.svg", - "light_mode.svg", + "light_mode.svg", "dark_mode.svg", "settings.svg" ] - + for file in files_to_sync: src_path = web_dir / file dest_path = extension_dir / file - + dest_path.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(src_path, dest_path) - + for svg_file in svg_files: src_path = web_dir / "src" / svg_file dest_path = extension_dir / "web" / "src" / svg_file dest_path.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(src_path, dest_path) - + if __name__ == "__main__": - sync_extension_files() \ No newline at end of file + sync_extension_files() diff --git a/test_backend_offline.py b/test_backend_offline.py index 48ebba9..75af927 100644 --- a/test_backend_offline.py +++ b/test_backend_offline.py @@ -36,8 +36,8 @@ import logging import sys import time import urllib.request +from dataclasses import asdict, dataclass, field from pathlib import Path -from dataclasses import dataclass, asdict, field from typing import List, Optional import numpy as np @@ -157,6 +157,7 @@ def create_engine( ): """Create a TranscriptionEngine with the given backend config.""" import gc + from whisperlivekit.core import TranscriptionEngine # Reset singleton so we get a fresh instance @@ -320,7 +321,7 @@ async def run_test( transcription = _extract_text_from_response(last) # --- Compute WER and timestamp accuracy against ground truth --- - from whisperlivekit.metrics import compute_wer, compute_timestamp_accuracy + from whisperlivekit.metrics import compute_timestamp_accuracy, compute_wer wer_val = None wer_details = None @@ -434,7 +435,7 @@ async def run_all_tests( file_lan = lan if "french" in audio_path.name.lower() and lan == "en": file_lan = "fr" - logger.info(f"Auto-detected language 'fr' from filename") + logger.info("Auto-detected language 'fr' from filename") audio = load_audio(str(audio_path)) @@ -495,7 +496,7 @@ def print_benchmark_summary(results: List[TestResult]): print(f"{'=' * 110}") # Print transcription excerpts - print(f"\nTRANSCRIPTIONS:") + print("\nTRANSCRIPTIONS:") print(f"{'-' * 110}") for r in results: excerpt = r.transcription[:120] + "..." if len(r.transcription) > 120 else r.transcription