From 99dc96c644b32021b6fb31b20976d1ab8d75928c Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Sun, 14 Sep 2025 17:03:00 +0200 Subject: [PATCH 1/3] fixes #224 --- whisperlivekit/audio_processor.py | 28 +++++++------- whisperlivekit/remove_silences.py | 2 +- whisperlivekit/results_formater.py | 41 ++++++++++++++------ whisperlivekit/timed_objects.py | 47 ++++++++++++++++++++++- whisperlivekit/translation/translation.py | 10 ++++- whisperlivekit/web/live_transcription.css | 1 - 6 files changed, 100 insertions(+), 29 deletions(-) diff --git a/whisperlivekit/audio_processor.py b/whisperlivekit/audio_processor.py index 525c4fc..01c0c17 100644 --- a/whisperlivekit/audio_processor.py +++ b/whisperlivekit/audio_processor.py @@ -257,12 +257,11 @@ class AudioProcessor: asr_processing_logs += f" + Silence of = {item.duration:.2f}s" if self.tokens: asr_processing_logs += f" | last_end = {self.tokens[-1].end} |" - logger.info(asr_processing_logs) - - if type(item) is Silence: + logger.info(asr_processing_logs) cumulative_pcm_duration_stream_time += item.duration self.online.insert_silence(item.duration, self.tokens[-1].end if self.tokens else 0) continue + logger.info(asr_processing_logs) if isinstance(item, np.ndarray): pcm_array = item @@ -301,7 +300,7 @@ class AudioProcessor: new_tokens, buffer_text, new_end_buffer ) - if new_tokens and self.args.target_language and self.translation_queue: + if self.translation_queue: for token in new_tokens: await self.translation_queue.put(token) @@ -326,13 +325,11 @@ class AudioProcessor: logger.debug("Diarization processor received sentinel. Finishing.") self.diarization_queue.task_done() break - - if type(item) is Silence: + elif type(item) is Silence: cumulative_pcm_duration_stream_time += item.duration diarization_obj.insert_silence(item.duration) continue - - if isinstance(item, np.ndarray): + elif isinstance(item, np.ndarray): pcm_array = item else: raise Exception('item should be pcm_array') @@ -365,14 +362,17 @@ class AudioProcessor: # in the future we want to have different languages for each speaker etc, so it will be more complex. while True: try: - token = await self.translation_queue.get() #block until at least 1 token - if token is SENTINEL: + item = await self.translation_queue.get() #block until at least 1 token + if item is SENTINEL: logger.debug("Translation processor received sentinel. Finishing.") self.translation_queue.task_done() break + elif type(item) is Silence: + online_translation.insert_silence(item.duration) + continue # get all the available tokens for translation. The more words, the more precise - tokens_to_process = [token] + tokens_to_process = [item] additional_tokens = await get_all_from_queue(self.translation_queue) sentinel_found = False @@ -396,7 +396,7 @@ class AudioProcessor: except Exception as e: logger.warning(f"Exception in translation_processor: {e}") logger.warning(f"Traceback: {traceback.format_exc()}") - if 'token' in locals() and token is not SENTINEL: + if 'token' in locals() and item is not SENTINEL: self.translation_queue.task_done() if 'additional_tokens' in locals(): for _ in additional_tokens: @@ -446,7 +446,7 @@ class AudioProcessor: if not state.tokens and not buffer_transcription and not buffer_diarization: response_status = "no_audio_detected" lines = [] - elif response_status == "active_transcription" and not lines: + elif not lines: lines = [Line( speaker=1, start=state.get("end_buffer", 0), @@ -638,6 +638,8 @@ class AudioProcessor: await self.transcription_queue.put(silence_buffer) if self.args.diarization and self.diarization_queue: await self.diarization_queue.put(silence_buffer) + if self.translation_queue: + await self.translation_queue.put(silence_buffer) if not self.silence: if self.args.transcription and self.transcription_queue: diff --git a/whisperlivekit/remove_silences.py b/whisperlivekit/remove_silences.py index dc207fc..3e4edb1 100644 --- a/whisperlivekit/remove_silences.py +++ b/whisperlivekit/remove_silences.py @@ -39,7 +39,7 @@ def blank_to_silence(tokens): ) else: if silence_token: #there was silence but no more - if silence_token.end - silence_token.start >= MIN_SILENCE_DURATION: + if silence_token.duration() >= MIN_SILENCE_DURATION: cleaned_tokens.append( silence_token ) diff --git a/whisperlivekit/results_formater.py b/whisperlivekit/results_formater.py index 1526ef1..1556ac9 100644 --- a/whisperlivekit/results_formater.py +++ b/whisperlivekit/results_formater.py @@ -123,14 +123,33 @@ def format_output(state, silence, current_time, args, debug, sep): append_token_to_last_line(lines, sep, token, debug_info) if lines and translated_segments: - cts_idx = 0 # current_translated_segment_idx - for line in lines: - while cts_idx < len(translated_segments): - ts = translated_segments[cts_idx] - if ts and ts.start and ts.start >= line.start and ts.end <= line.end: - line.translation += ts.text + ' ' - cts_idx += 1 - else: - break - return lines, undiarized_text, buffer_transcription, '' - + unassigned_translated_segments = [] + for ts in translated_segments: + assigned = False + for line in lines: + if ts and ts.overlaps_with(line): + if ts.is_within(line): + line.translation += ts.text + ' ' + assigned = True + break + else: + ts0, ts1 = ts.approximate_cut_at(line.end) + if ts0 and line.overlaps_with(ts0): + line.translation += ts0.text + ' ' + if ts1: + unassigned_translated_segments.append(ts1) + assigned = True + break + if not assigned: + unassigned_translated_segments.append(ts) + + if unassigned_translated_segments: + for line in lines: + remaining_segments = [] + for ts in unassigned_translated_segments: + if ts and ts.overlaps_with(line): + line.translation += ts.text + ' ' + else: + remaining_segments.append(ts) + unassigned_translated_segments = remaining_segments #maybe do smth in the future about that + return lines, undiarized_text, buffer_transcription, '' diff --git a/whisperlivekit/timed_objects.py b/whisperlivekit/timed_objects.py index 3acf7c8..a9df490 100644 --- a/whisperlivekit/timed_objects.py +++ b/whisperlivekit/timed_objects.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import Optional +from typing import Optional, Any from datetime import timedelta def format_time(seconds: float) -> str: @@ -15,6 +15,21 @@ class TimedText: speaker: Optional[int] = -1 probability: Optional[float] = None is_dummy: Optional[bool] = False + + def overlaps_with(self, other: 'TimedText') -> bool: + return not (self.end <= other.start or other.end <= self.start) + + def is_within(self, other: 'TimedText') -> bool: + return other.contains_timespan(self) + + def duration(self) -> float: + return self.end - self.start + + def contains_time(self, time: float) -> bool: + return self.start <= time <= self.end + + def contains_timespan(self, other: 'TimedText') -> bool: + return self.start <= other.start and self.end >= other.end @dataclass class ASRToken(TimedText): @@ -41,6 +56,34 @@ class SpeakerSegment(TimedText): class Translation(TimedText): pass + def approximate_cut_at(self, cut_time): + """ + Each word in text is considered to be of duration (end-start)/len(words in text) + """ + if not self.text or not self.contains_time(cut_time): + return self, None + + words = self.text.split() + num_words = len(words) + if num_words == 0: + return self, None + + duration_per_word = self.duration() / num_words + + cut_word_index = int((cut_time - self.start) / duration_per_word) + + if cut_word_index >= num_words: + cut_word_index = num_words -1 + + text0 = " ".join(words[:cut_word_index]) + text1 = " ".join(words[cut_word_index:]) + + segment0 = Translation(start=self.start, end=cut_time, text=text0) + segment1 = Translation(start=cut_time, end=self.end, text=text1) + + return segment0, segment1 + + @dataclass class Silence(): duration: float @@ -91,4 +134,4 @@ class State(): end_buffer: float end_attributed_speaker: float remaining_time_transcription: float - remaining_time_diarization: float \ No newline at end of file + remaining_time_diarization: float diff --git a/whisperlivekit/translation/translation.py b/whisperlivekit/translation/translation.py index a28f2fa..88bb5e2 100644 --- a/whisperlivekit/translation/translation.py +++ b/whisperlivekit/translation/translation.py @@ -1,3 +1,4 @@ +import logging import ctranslate2 import torch import transformers @@ -6,11 +7,14 @@ import huggingface_hub from whisperlivekit.translation.mapping_languages import get_nllb_code from whisperlivekit.timed_objects import Translation +logger = logging.getLogger(__name__) #In diarization case, we may want to translate just one speaker, or at least start the sentences there PUNCTUATION_MARKS = {'.', '!', '?', '。', '!', '?'} +MIN_SILENCE_DURATION_DEL_BUFFER = 3 #After a silence of x seconds, we consider the model should not use the buffer, even if the previous +# sentence is not finished. @dataclass class TranslationModel(): @@ -109,7 +113,11 @@ class OnlineTranslation: self.translation_remaining = self.translate_tokens(self.buffer) self.len_processed_buffer = len(self.buffer) return self.validated + [self.translation_remaining] - + + def insert_silence(self, silence_duration: float): + if silence_duration >= MIN_SILENCE_DURATION_DEL_BUFFER: + self.buffer = [] + self.validated += [self.translation_remaining] if __name__ == '__main__': output_lang = 'fr' diff --git a/whisperlivekit/web/live_transcription.css b/whisperlivekit/web/live_transcription.css index 422d156..3cf5007 100644 --- a/whisperlivekit/web/live_transcription.css +++ b/whisperlivekit/web/live_transcription.css @@ -438,7 +438,6 @@ label { font-size: 13px; border-radius: 30px; padding: 2px 10px; - display: none; } .loading { From bbba1d9bb74600a719ac15528ae7108630f92fe3 Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Tue, 16 Sep 2025 20:45:01 +0200 Subject: [PATCH 2/3] add nllb-backend and translation perf test in dev_notes --- DEV_NOTES.md | 25 +++++++++++++-- README.md | 4 +++ whisperlivekit/core.py | 10 ++++-- whisperlivekit/parse_args.py | 7 +++++ whisperlivekit/translation/translation.py | 37 +++++++++++++++-------- 5 files changed, 66 insertions(+), 17 deletions(-) diff --git a/DEV_NOTES.md b/DEV_NOTES.md index c41016f..f9c3c4a 100644 --- a/DEV_NOTES.md +++ b/DEV_NOTES.md @@ -18,8 +18,29 @@ Decoder weights: 59110771 bytes Encoder weights: 15268874 bytes +# 2. Translation: Faster model for each system -# 2. SortFormer Diarization: 4-to-2 Speaker Constraint Algorithm +## Benchmark Results + +Testing on MacBook M3 with NLLB-200-distilled-600M model: + +### Standard Transformers vs CTranslate2 + +| Test Text | Standard Inference Time | CTranslate2 Inference Time | Speedup | +|-----------|-------------------------|---------------------------|---------| +| UN Chief says there is no military solution in Syria | 0.9395s | 2.0472s | 0.5x | +| The rapid advancement of AI technology is transforming various industries | 0.7171s | 1.7516s | 0.4x | +| Climate change poses a significant threat to global ecosystems | 0.8533s | 1.8323s | 0.5x | +| International cooperation is essential for addressing global challenges | 0.7209s | 1.3575s | 0.5x | +| The development of renewable energy sources is crucial for a sustainable future | 0.8760s | 1.5589s | 0.6x | + +**Results:** +- Total Standard time: 4.1068s +- Total CTranslate2 time: 8.5476s +- CTranslate2 is slower on this system --> Use Transformers, and ideally we would have an mlx implementation. + + +# 3. SortFormer Diarization: 4-to-2 Speaker Constraint Algorithm Transform a diarization model that predicts up to 4 speakers into one that predicts up to 2 speakers by mapping the output predictions. @@ -67,4 +88,4 @@ ELSE: AS_2 ← B to finish -``` \ No newline at end of file +``` diff --git a/README.md b/README.md index 2d0cb83..2c267a7 100644 --- a/README.md +++ b/README.md @@ -198,6 +198,10 @@ An important list of parameters can be changed. But what *should* you change? | `--embedding-model` | Hugging Face model ID for Diart embedding model. [Available models](https://github.com/juanmc2005/diart/tree/main?tab=readme-ov-file#pre-trained-models) | `speechbrain/spkrec-ecapa-voxceleb` | +| Translation options | Description | Default | +|-----------|-------------|---------| +| `--nllb-backend` | [NOT FUNCTIONNAL YET] transformer or ctranslate2 | `ctranslate2` | + > For diarization using Diart, you need access to pyannote.audio models: > 1. [Accept user conditions](https://huggingface.co/pyannote/segmentation) for the `pyannote/segmentation` model > 2. [Accept user conditions](https://huggingface.co/pyannote/segmentation-3.0) for the `pyannote/segmentation-3.0` model diff --git a/whisperlivekit/core.py b/whisperlivekit/core.py index fd290d5..7f2eaf4 100644 --- a/whisperlivekit/core.py +++ b/whisperlivekit/core.py @@ -43,10 +43,12 @@ class TranscriptionEngine: "transcription": True, "vad": True, "pcm_input": False, + # whisperstreaming params: "buffer_trimming": "segment", "confidence_validation": False, "buffer_trimming_sec": 15, + # simulstreaming params: "disable_fast_encoder": False, "frame_threshold": 25, @@ -61,10 +63,14 @@ class TranscriptionEngine: "max_context_tokens": None, "model_path": './base.pt', "diarization_backend": "sortformer", + # diarization params: "disable_punctuation_split" : False, "segmentation_model": "pyannote/segmentation-3.0", - "embedding_model": "pyannote/embedding", + "embedding_model": "pyannote/embedding", + + # translation params: + "nllb_backend": "ctranslate2" } config_dict = {**defaults, **kwargs} @@ -142,7 +148,7 @@ class TranscriptionEngine: raise Exception('Translation cannot be set with language auto') else: from whisperlivekit.translation.translation import load_model - self.translation_model = load_model([self.args.lan]) #in the future we want to handle different languages for different speakers + self.translation_model = load_model([self.args.lan], backend=self.args.nllb_backend) #in the future we want to handle different languages for different speakers TranscriptionEngine._initialized = True diff --git a/whisperlivekit/parse_args.py b/whisperlivekit/parse_args.py index 3ef74bf..c73e6d4 100644 --- a/whisperlivekit/parse_args.py +++ b/whisperlivekit/parse_args.py @@ -287,6 +287,13 @@ def parse_args(): help="Optional. Number of models to preload in memory to speed up loading (set up to the expected number of concurrent instances).", ) + simulstreaming_group.add_argument( + "--nllb-backend", + type=str, + default="ctranslate2", + help="transformer or ctranslate2", + ) + args = parser.parse_args() args.transcription = not args.no_transcription diff --git a/whisperlivekit/translation/translation.py b/whisperlivekit/translation/translation.py index 88bb5e2..7923243 100644 --- a/whisperlivekit/translation/translation.py +++ b/whisperlivekit/translation/translation.py @@ -21,26 +21,37 @@ class TranslationModel(): translator: ctranslate2.Translator tokenizer: dict -def load_model(src_langs): - MODEL = 'nllb-200-distilled-600M-ctranslate2' - MODEL_GUY = 'entai2965' - huggingface_hub.snapshot_download(MODEL_GUY + '/' + MODEL,local_dir=MODEL) - device = "cuda" if torch.cuda.is_available() else "cpu" - translator = ctranslate2.Translator(MODEL,device=device) - tokenizer = dict() - for src_lang in src_langs: - tokenizer[src_lang] = transformers.AutoTokenizer.from_pretrained(MODEL, src_lang=src_lang, clean_up_tokenization_spaces=True) +def load_model(src_langs, backend='ctranslate2'): + if backend=='ctranslate2': + MODEL = 'nllb-200-distilled-600M-ctranslate2' + MODEL_GUY = 'entai2965' + huggingface_hub.snapshot_download(MODEL_GUY + '/' + MODEL,local_dir=MODEL) + device = "cuda" if torch.cuda.is_available() else "cpu" + translator = ctranslate2.Translator(MODEL,device=device) + tokenizer = dict() + for src_lang in src_langs: + tokenizer[src_lang] = transformers.AutoTokenizer.from_pretrained(MODEL, src_lang=src_lang, clean_up_tokenization_spaces=True) + elif backend=='transformers': + raise Exception('not implemented yet') return TranslationModel( translator=translator, tokenizer=tokenizer ) -def translate(input, translation_model, tgt_lang): - source = translation_model.tokenizer.convert_ids_to_tokens(translation_model.tokenizer.encode(input)) +def translate(input, translation_model, tgt_lang, src_lang="en"): + # Get the specific tokenizer for the source language + tokenizer = translation_model.tokenizer[src_lang] + + # Convert input to tokens + source = tokenizer.convert_ids_to_tokens(tokenizer.encode(input)) + + # Translate with target language prefix target_prefix = [tgt_lang] results = translation_model.translator.translate_batch([source], target_prefix=[target_prefix]) + + # Get translated tokens and decode target = results[0].hypotheses[0][1:] - return translation_model.tokenizer.decode(translation_model.tokenizer.convert_tokens_to_ids(target)) + return tokenizer.decode(tokenizer.convert_tokens_to_ids(target)) class OnlineTranslation: def __init__(self, translation_model: TranslationModel, input_languages: list, output_languages: list): @@ -142,4 +153,4 @@ if __name__ == '__main__': - # print(result) \ No newline at end of file + # print(result) From 65025cc448880250d2be7cd92cef98e4f247b9b3 Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Tue, 16 Sep 2025 23:30:00 +0200 Subject: [PATCH 3/3] nllb backend can be transformers, and model size can be 1.3B --- README.md | 3 +- whisperlivekit/core.py | 6 +- whisperlivekit/parse_args.py | 9 ++- whisperlivekit/translation/translation.py | 68 +++++++++++------------ 4 files changed, 44 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 2c267a7..656b5fb 100644 --- a/README.md +++ b/README.md @@ -200,7 +200,8 @@ An important list of parameters can be changed. But what *should* you change? | Translation options | Description | Default | |-----------|-------------|---------| -| `--nllb-backend` | [NOT FUNCTIONNAL YET] transformer or ctranslate2 | `ctranslate2` | +| `--nllb-backend` | `transformers` or `ctranslate2` | `ctranslate2` | +| `--nllb-size` | `600M` or `1.3B` | `600M` | > For diarization using Diart, you need access to pyannote.audio models: > 1. [Accept user conditions](https://huggingface.co/pyannote/segmentation) for the `pyannote/segmentation` model diff --git a/whisperlivekit/core.py b/whisperlivekit/core.py index 7f2eaf4..578e624 100644 --- a/whisperlivekit/core.py +++ b/whisperlivekit/core.py @@ -70,7 +70,8 @@ class TranscriptionEngine: "embedding_model": "pyannote/embedding", # translation params: - "nllb_backend": "ctranslate2" + "nllb_backend": "ctranslate2", + "nllb_size": "600M" } config_dict = {**defaults, **kwargs} @@ -148,8 +149,7 @@ class TranscriptionEngine: raise Exception('Translation cannot be set with language auto') else: from whisperlivekit.translation.translation import load_model - self.translation_model = load_model([self.args.lan], backend=self.args.nllb_backend) #in the future we want to handle different languages for different speakers - + self.translation_model = load_model([self.args.lan], backend=self.args.nllb_backend, model_size=self.args.nllb_size) #in the future we want to handle different languages for different speakers TranscriptionEngine._initialized = True diff --git a/whisperlivekit/parse_args.py b/whisperlivekit/parse_args.py index c73e6d4..55d4173 100644 --- a/whisperlivekit/parse_args.py +++ b/whisperlivekit/parse_args.py @@ -291,7 +291,14 @@ def parse_args(): "--nllb-backend", type=str, default="ctranslate2", - help="transformer or ctranslate2", + help="transformers or ctranslate2", + ) + + simulstreaming_group.add_argument( + "--nllb-size", + type=str, + default="600M", + help="600M or 1.3B", ) args = parser.parse_args() diff --git a/whisperlivekit/translation/translation.py b/whisperlivekit/translation/translation.py index 7923243..c08f190 100644 --- a/whisperlivekit/translation/translation.py +++ b/whisperlivekit/translation/translation.py @@ -1,4 +1,5 @@ import logging +import time import ctranslate2 import torch import transformers @@ -20,39 +21,29 @@ MIN_SILENCE_DURATION_DEL_BUFFER = 3 #After a silence of x seconds, we consider t class TranslationModel(): translator: ctranslate2.Translator tokenizer: dict + device: str + backend_type: str = 'ctranslate2' -def load_model(src_langs, backend='ctranslate2'): +def load_model(src_langs, backend='ctranslate2', model_size='600M'): + device = "cuda" if torch.cuda.is_available() else "cpu" + MODEL = f'nllb-200-distilled-{model_size}-ctranslate2' if backend=='ctranslate2': - MODEL = 'nllb-200-distilled-600M-ctranslate2' MODEL_GUY = 'entai2965' huggingface_hub.snapshot_download(MODEL_GUY + '/' + MODEL,local_dir=MODEL) - device = "cuda" if torch.cuda.is_available() else "cpu" translator = ctranslate2.Translator(MODEL,device=device) - tokenizer = dict() - for src_lang in src_langs: - tokenizer[src_lang] = transformers.AutoTokenizer.from_pretrained(MODEL, src_lang=src_lang, clean_up_tokenization_spaces=True) elif backend=='transformers': - raise Exception('not implemented yet') + translator = transformers.AutoModelForSeq2SeqLM.from_pretrained(f"facebook/nllb-200-distilled-{model_size}") + tokenizer = dict() + for src_lang in src_langs: + tokenizer[src_lang] = transformers.AutoTokenizer.from_pretrained(MODEL, src_lang=src_lang, clean_up_tokenization_spaces=True) + return TranslationModel( translator=translator, - tokenizer=tokenizer + tokenizer=tokenizer, + backend_type=backend, + device = device ) -def translate(input, translation_model, tgt_lang, src_lang="en"): - # Get the specific tokenizer for the source language - tokenizer = translation_model.tokenizer[src_lang] - - # Convert input to tokens - source = tokenizer.convert_ids_to_tokens(tokenizer.encode(input)) - - # Translate with target language prefix - target_prefix = [tgt_lang] - results = translation_model.translator.translate_batch([source], target_prefix=[target_prefix]) - - # Get translated tokens and decode - target = results[0].hypotheses[0][1:] - return tokenizer.decode(tokenizer.convert_tokens_to_ids(target)) - class OnlineTranslation: def __init__(self, translation_model: TranslationModel, input_languages: list, output_languages: list): self.buffer = [] @@ -83,12 +74,19 @@ class OnlineTranslation: output_lang = self.output_languages[0] nllb_output_lang = get_nllb_code(output_lang) - source = self.translation_model.tokenizer[input_lang].convert_ids_to_tokens(self.translation_model.tokenizer[input_lang].encode(input)) - results = self.translation_model.translator.translate_batch([source], target_prefix=[[nllb_output_lang]]) #we can use return_attention=True to try to optimize the stuff. - target = results[0].hypotheses[0][1:] - results = self.translation_model.tokenizer[input_lang].decode(self.translation_model.tokenizer[input_lang].convert_tokens_to_ids(target)) - return results - + tokenizer = self.translation_model.tokenizer[input_lang] + tokenizer_output = tokenizer(input, return_tensors="pt").to(self.translation_model.device) + + if self.translation_model.backend_type == 'ctranslate2': + source = tokenizer.convert_ids_to_tokens(tokenizer_output['input_ids'][0]) + results = self.translation_model.translator.translate_batch([source], target_prefix=[[nllb_output_lang]]) + target = results[0].hypotheses[0][1:] + result = tokenizer.decode(tokenizer.convert_tokens_to_ids(target)) + else: + translated_tokens = self.translation_model.translator.generate(**tokenizer_output, forced_bos_token_id=tokenizer.convert_tokens_to_ids(nllb_output_lang)) + result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0] + return result + def translate_tokens(self, tokens): if tokens: text = ' '.join([token.text for token in tokens]) @@ -103,7 +101,6 @@ class OnlineTranslation: return translation return None - def insert_tokens(self, tokens): self.buffer.extend(tokens) @@ -141,16 +138,13 @@ if __name__ == '__main__': test = test_string.split(' ') step = len(test) // 3 - shared_model = load_model([input_lang]) + shared_model = load_model([input_lang], backend='ctranslate2') online_translation = OnlineTranslation(shared_model, input_languages=[input_lang], output_languages=[output_lang]) - + + beg_inference = time.time() for id in range(5): val = test[id*step : (id+1)*step] val_str = ' '.join(val) result = online_translation.translate(val_str) print(result) - - - - - # print(result) + print('inference time:', time.time() - beg_inference) \ No newline at end of file