raise Exception when language == auto and task == translation
This commit is contained in:
parent
583a2ec2e4
commit
4a5d5e1f3b
3 changed files with 15 additions and 10 deletions
|
|
@ -120,7 +120,7 @@ class AudioProcessor:
|
|||
async def add_dummy_token(self):
|
||||
"""Placeholder token when no transcription is available."""
|
||||
async with self.lock:
|
||||
current_time = time() - self.beg_loop
|
||||
current_time = time() - self.beg_loop if self.beg_loop else 0
|
||||
self.tokens.append(ASRToken(
|
||||
start=current_time, end=current_time + 1,
|
||||
text=".", speaker=-1, is_dummy=True
|
||||
|
|
@ -295,7 +295,7 @@ class AudioProcessor:
|
|||
|
||||
if type(item) is Silence:
|
||||
cumulative_pcm_duration_stream_time += item.duration
|
||||
self.online.insert_silence(item.duration, self.tokens[-1].end)
|
||||
self.online.insert_silence(item.duration, self.tokens[-1].end if self.tokens else 0)
|
||||
continue
|
||||
|
||||
if isinstance(item, np.ndarray):
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ def ends_with_silence(tokens, buffer_transcription, buffer_diarization, current_
|
|||
if not tokens:
|
||||
return [], buffer_transcription, buffer_diarization
|
||||
last_token = tokens[-1]
|
||||
if tokens and (
|
||||
if tokens and current_time and (
|
||||
current_time - last_token.end >= END_SILENCE_DURATION
|
||||
or
|
||||
(current_time - last_token.end >= 3 and vac_detected_silence)
|
||||
|
|
|
|||
|
|
@ -42,6 +42,8 @@ class SimulStreamingOnlineProcessor:
|
|||
self.committed: List[ASRToken] = []
|
||||
self.last_result_tokens: List[ASRToken] = []
|
||||
self.load_new_backend()
|
||||
|
||||
#can be moved
|
||||
if asr.tokenizer:
|
||||
self.model.tokenizer = asr.tokenizer
|
||||
|
||||
|
|
@ -249,11 +251,6 @@ class SimulStreamingASR():
|
|||
}
|
||||
self.model_path = model_mapping.get(modelsize, f'./{modelsize}.pt')
|
||||
|
||||
# Set up tokenizer for translation if needed
|
||||
if self.task == "translate":
|
||||
self.tokenizer = self.set_translate_task()
|
||||
else:
|
||||
self.tokenizer = None
|
||||
self.cfg = AlignAttConfig(
|
||||
model_path=self.model_path,
|
||||
segment_length=self.segment_length,
|
||||
|
|
@ -271,6 +268,12 @@ class SimulStreamingASR():
|
|||
static_init_prompt=self.static_init_prompt,
|
||||
)
|
||||
|
||||
# Set up tokenizer for translation if needed
|
||||
if self.task == "translate":
|
||||
self.tokenizer = self.set_translate_task()
|
||||
else:
|
||||
self.tokenizer = None
|
||||
|
||||
self.model_name = os.path.basename(self.cfg.model_path).replace(".pt", "")
|
||||
self.model_path = os.path.dirname(os.path.abspath(self.cfg.model_path))
|
||||
self.models = [self.load_model() for i in range(self.preload_model_count)]
|
||||
|
|
@ -301,10 +304,12 @@ class SimulStreamingASR():
|
|||
|
||||
def set_translate_task(self):
|
||||
"""Set up translation task."""
|
||||
if self.cfg.language == 'auto':
|
||||
raise Exception('Translation cannot be done with language = auto')
|
||||
return tokenizer.get_tokenizer(
|
||||
multilingual=True,
|
||||
language=self.model.cfg.language,
|
||||
num_languages=self.model.model.num_languages,
|
||||
language=self.cfg.language,
|
||||
num_languages=99,
|
||||
task="translate"
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue