improve diarization + silence handling
This commit is contained in:
parent
b101ce06bd
commit
9a8d3cbd90
1 changed files with 7 additions and 1 deletions
|
|
@ -125,7 +125,13 @@ def format_output(state, silence, current_time, diarization, debug):
|
|||
# should become:
|
||||
# Are you okay? |SPLIT SPEAKER| yeah, sure. Absolutely
|
||||
append_token_to_last_line(lines, sep, token, debug_info, last_end_diarized)
|
||||
continue
|
||||
continue
|
||||
elif speaker == -2 or previous_speaker == -2: #silences can happen anytime
|
||||
lines.append(new_line(token, speaker, last_end_diarized, debug_info = ""))
|
||||
continue
|
||||
else: #we create a new speaker, but that's no ideal. We are not sure about the split. We prefer to append to previous line
|
||||
# lines.append(new_line(token, speaker, last_end_diarized, debug_info = ""))
|
||||
pass
|
||||
|
||||
append_token_to_last_line(lines, sep, token, debug_info, last_end_diarized)
|
||||
return lines, undiarized_text, buffer_transcription, ''
|
||||
|
|
|
|||
Loading…
Reference in a new issue