Refactor timed objects and data structures

This commit is contained in:
Quentin Fuxa 2026-01-11 16:08:00 +01:00
parent 83362c89c4
commit e144abbbc7
4 changed files with 25 additions and 20 deletions

View file

@ -6,7 +6,7 @@ text normalization, and word-level timestamp accuracy metrics with greedy alignm
import re
import unicodedata
from typing import Dict, List, Optional
from typing import Dict, List
def normalize_text(text: str) -> str:

View file

@ -78,7 +78,6 @@ class SessionMetrics:
def log_summary(self) -> None:
"""Emit a structured log line summarising the session."""
self.total_processing_time_s = sum(self.transcription_durations)
d = self.to_dict()
d["session_elapsed_s"] = round(time.time() - self.session_start, 3) if self.session_start else 0
logger.info(f"SESSION_METRICS {d}")

View file

@ -20,8 +20,8 @@ Usage:
export WHISPERLIVEKIT_LOCK_TIMEOUT=60
"""
import os
import logging
import os
import threading
logger = logging.getLogger(__name__)

View file

@ -1,12 +1,18 @@
from dataclasses import dataclass, field
from datetime import timedelta
from typing import Any, Dict, List, Optional, Union
PUNCTUATION_MARKS = {'.', '!', '?', '', '', ''}
def format_time(seconds: float) -> str:
"""Format seconds as HH:MM:SS."""
return str(timedelta(seconds=int(seconds)))
"""Format seconds as H:MM:SS.cc (centisecond precision)."""
total_cs = int(round(seconds * 100))
cs = total_cs % 100
total_s = total_cs // 100
s = total_s % 60
total_m = total_s // 60
m = total_m % 60
h = total_m // 60
return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
@dataclass
class Timed:
@ -227,4 +233,4 @@ class State():
new_translation: List[Any] = field(default_factory=list)
new_diarization: List[Any] = field(default_factory=list)
new_tokens_buffer: List[Any] = field(default_factory=list) # only when local agreement
new_translation_buffer= TimedText()
new_translation_buffer: TimedText = field(default_factory=TimedText)