Refactor timed objects and data structures
This commit is contained in:
parent
83362c89c4
commit
e144abbbc7
4 changed files with 25 additions and 20 deletions
|
|
@ -6,7 +6,7 @@ text normalization, and word-level timestamp accuracy metrics with greedy alignm
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import unicodedata
|
import unicodedata
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List
|
||||||
|
|
||||||
|
|
||||||
def normalize_text(text: str) -> str:
|
def normalize_text(text: str) -> str:
|
||||||
|
|
|
||||||
|
|
@ -78,7 +78,6 @@ class SessionMetrics:
|
||||||
|
|
||||||
def log_summary(self) -> None:
|
def log_summary(self) -> None:
|
||||||
"""Emit a structured log line summarising the session."""
|
"""Emit a structured log line summarising the session."""
|
||||||
self.total_processing_time_s = sum(self.transcription_durations)
|
|
||||||
d = self.to_dict()
|
d = self.to_dict()
|
||||||
d["session_elapsed_s"] = round(time.time() - self.session_start, 3) if self.session_start else 0
|
d["session_elapsed_s"] = round(time.time() - self.session_start, 3) if self.session_start else 0
|
||||||
logger.info(f"SESSION_METRICS {d}")
|
logger.info(f"SESSION_METRICS {d}")
|
||||||
|
|
|
||||||
|
|
@ -20,8 +20,8 @@ Usage:
|
||||||
export WHISPERLIVEKIT_LOCK_TIMEOUT=60
|
export WHISPERLIVEKIT_LOCK_TIMEOUT=60
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,18 @@
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import timedelta
|
|
||||||
from typing import Any, Dict, List, Optional, Union
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
PUNCTUATION_MARKS = {'.', '!', '?', '。', '!', '?'}
|
PUNCTUATION_MARKS = {'.', '!', '?', '。', '!', '?'}
|
||||||
|
|
||||||
def format_time(seconds: float) -> str:
|
def format_time(seconds: float) -> str:
|
||||||
"""Format seconds as HH:MM:SS."""
|
"""Format seconds as H:MM:SS.cc (centisecond precision)."""
|
||||||
return str(timedelta(seconds=int(seconds)))
|
total_cs = int(round(seconds * 100))
|
||||||
|
cs = total_cs % 100
|
||||||
|
total_s = total_cs // 100
|
||||||
|
s = total_s % 60
|
||||||
|
total_m = total_s // 60
|
||||||
|
m = total_m % 60
|
||||||
|
h = total_m // 60
|
||||||
|
return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Timed:
|
class Timed:
|
||||||
|
|
@ -227,4 +233,4 @@ class State():
|
||||||
new_translation: List[Any] = field(default_factory=list)
|
new_translation: List[Any] = field(default_factory=list)
|
||||||
new_diarization: List[Any] = field(default_factory=list)
|
new_diarization: List[Any] = field(default_factory=list)
|
||||||
new_tokens_buffer: List[Any] = field(default_factory=list) # only when local agreement
|
new_tokens_buffer: List[Any] = field(default_factory=list) # only when local agreement
|
||||||
new_translation_buffer= TimedText()
|
new_translation_buffer: TimedText = field(default_factory=TimedText)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue