Merge pull request #52 from QuentinFuxa/diart_integration_improvements

Diart integration improvements
This commit is contained in:
Quentin Fuxa 2025-02-19 14:43:39 +01:00 committed by GitHub
commit 97c0ae6154
3 changed files with 85 additions and 28 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 174 KiB

After

Width:  |  Height:  |  Size: 155 KiB

View file

@ -78,12 +78,59 @@
#linesTranscript strong {
color: #333;
}
/* Grey buffer styling */
#speaker {
background-color: #dcefff;
border-radius: 30px;
padding: 2px 10px;
font-size: 14px;
}
#timeInfo {
color: #666;
margin-left: 10px;
}
.textcontent {
font-size: 16px;
margin-left: 10px;
padding-left: 10px;
border-left: 2px solid #dcefff;
margin-bottom: 10px;
}
.buffer {
color: rgb(180, 180, 180);
font-style: italic;
margin-left: 4px;
}
.spinner {
display: inline-block;
width: 8px;
height: 8px;
border: 2px solid rgba(0, 0, 0, 0.2);
border-top: 2px solid #333;
border-radius: 50%;
animation: spin 0.6s linear infinite;
vertical-align: middle;
margin-bottom: 2px;
}
@keyframes spin {
to {
transform: rotate(360deg);
}
}
.silence {
color: #666;
background-color: #f3f3f3;
font-size: 13px;
border-radius: 30px;
padding: 2px 10px;
}
.loading {
color: #666;
background-color: #eff9ff;
font-size: 14px;
border-radius: 30px;
padding: 2px 10px;
}
</style>
</head>
<body>
@ -188,7 +235,7 @@
}
*/
const { lines = [], buffer = "" } = data;
renderLinesWithBuffer(lines, buffer);
renderLinesWithBuffer( lines, buffer);
};
});
}
@ -199,27 +246,36 @@
linesTranscriptDiv.innerHTML = "";
return;
}
const linesHtml = lines.map((item, idx) => {
let speakerLabel = "";
if (item.speaker === -2) {
speakerLabel = "No speaker";
} else if (item.speaker !== -1) {
speakerLabel = `Speaker ${item.speaker}`;
}
let timeInfo = "";
if (item.beg !== undefined && item.end !== undefined) {
timeInfo = ` [${item.beg}, ${item.end}]`;
timeInfo = ` ${item.beg} - ${item.end}`;
}
let speakerLabel = "";
if (item.speaker === -2) {
speakerLabel = `<span class="silence">Silence<span id='timeInfo'>${timeInfo}</span></span>`;
} else if (item.speaker == -1) {
speakerLabel = `<span class='loading'> <span class="spinner"></span><span id='timeInfo'>${item.diff} second(s) of audio are undergoing diarization</span></span>`;
} else if (item.speaker == -3) {
speakerLabel = `<span id="speaker"><span id='timeInfo'>${timeInfo}</span>`;
} else if (item.speaker !== -1) {
speakerLabel = `<span id="speaker">Speaker ${item.speaker}<span id='timeInfo'>${timeInfo}</span></span>`;
}
let textContent = item.text;
if (idx === lines.length - 1 && buffer) {
textContent += `<span class="buffer">${buffer}</span>`;
}
return speakerLabel
? `<p><strong>${speakerLabel}${timeInfo}</strong> ${textContent}</p>`
: `<p>${textContent}</p>`;
return textContent
? `<p>${speakerLabel}<br/><div class='textcontent'>${textContent}</div></p>`
: `<p >${speakerLabel}<br/></p>`;
}).join("");
linesTranscriptDiv.innerHTML = linesHtml;

View file

@ -214,10 +214,10 @@ async def websocket_endpoint(websocket: WebSocket):
else:
chunk_history.append({
"beg": time() - beg_loop,
"end": time() - beg_loop + 0.1,
"end": time() - beg_loop + 1,
"text": '',
})
sleep(0.1)
sleep(1)
buffer = ''
if args.diarization:
@ -225,28 +225,29 @@ async def websocket_endpoint(websocket: WebSocket):
diarization.assign_speakers_to_chunks(chunk_history)
current_speaker = -1
lines = [{
"beg": 0,
"end": 0,
"speaker": current_speaker,
"text": ""
}]
for ch in chunk_history:
if args.diarization and ch["speaker"] and ch["speaker"] != current_speaker:
new_speaker = ch["speaker"]
current_speaker = 0
lines = []
last_end_diarized = 0
for ind, ch in enumerate(chunk_history):
speaker = ch.get("speaker", -3)
if speaker == -1 and ind < len(chunk_history) - 1:
continue
elif speaker != current_speaker:
lines.append(
{
"speaker": new_speaker,
"speaker": speaker,
"text": ch['text'],
"beg": format_time(ch['beg']),
"end": format_time(ch['end']),
"diff": round(ch['end'] - last_end_diarized, 2)
}
)
current_speaker = new_speaker
else:
current_speaker = speaker
elif speaker != -1:
lines[-1]["text"] += ch['text']
lines[-1]["end"] = format_time(ch['end'])
if speaker != -1:
last_end_diarized = max(ch['end'], last_end_diarized)
response = {"lines": lines, "buffer": buffer}
await websocket.send_json(response)