diff --git a/whisperlivekit/web/live_transcription.js b/whisperlivekit/web/live_transcription.js index c4db1cd..1fc22b0 100644 --- a/whisperlivekit/web/live_transcription.js +++ b/whisperlivekit/web/live_transcription.js @@ -273,6 +273,13 @@ function setupWebSocket() { return; } + // Ignore diff/snapshot messages — the default frontend uses full-state mode. + // These are only sent when a client explicitly opts in via ?mode=diff. + if (data.type === "diff" || data.type === "snapshot") { + console.warn("Received diff-protocol message but frontend is in full mode; ignoring.", data.type); + return; + } + if (data.type === "ready_to_stop") { console.log("Ready to stop received, finalizing display and closing WebSocket."); waitingForStop = false; @@ -364,7 +371,13 @@ function renderLinesWithBuffer( } lastSignature = signature; - const linesHtml = (lines || []) + // When there are no committed lines yet but buffer text exists (common with + // slow backends like voxtral on MPS), render the buffer as a standalone line. + const effectiveLines = (lines || []).length === 0 && (buffer_transcription || buffer_diarization) + ? [{ speaker: 1, text: "" }] + : (lines || []); + + const linesHtml = effectiveLines .map((item, idx) => { let timeInfo = ""; if (item.start !== undefined && item.end !== undefined) { @@ -389,7 +402,7 @@ function renderLinesWithBuffer( let currentLineText = item.text || ""; - if (idx === lines.length - 1) { + if (idx === effectiveLines.length - 1) { if (!isFinalizing && item.speaker !== -2) { speakerLabel += `Transcription lag ${fmt1( remaining_time_transcription @@ -424,7 +437,7 @@ function renderLinesWithBuffer( if (item.translation) { translationContent += item.translation.trim(); } - if (idx === lines.length - 1 && buffer_translation) { + if (idx === effectiveLines.length - 1 && buffer_translation) { const bufferPiece = isFinalizing ? buffer_translation : `${buffer_translation}`; diff --git a/whisperlivekit/web/web_interface.py b/whisperlivekit/web/web_interface.py index d8e13bc..a3b30ea 100644 --- a/whisperlivekit/web/web_interface.py +++ b/whisperlivekit/web/web_interface.py @@ -17,17 +17,17 @@ def get_inline_ui_html(): """Returns the complete web interface HTML with all assets embedded in a single call.""" try: with resources.files('whisperlivekit.web').joinpath('live_transcription.html').open('r', encoding='utf-8') as f: - html_content = f.read() + html_content = f.read() with resources.files('whisperlivekit.web').joinpath('live_transcription.css').open('r', encoding='utf-8') as f: css_content = f.read() with resources.files('whisperlivekit.web').joinpath('live_transcription.js').open('r', encoding='utf-8') as f: js_content = f.read() - + with resources.files('whisperlivekit.web').joinpath('pcm_worklet.js').open('r', encoding='utf-8') as f: worklet_code = f.read() with resources.files('whisperlivekit.web').joinpath('recorder_worker.js').open('r', encoding='utf-8') as f: worker_code = f.read() - + js_content = js_content.replace( 'await audioContext.audioWorklet.addModule("/web/pcm_worklet.js");', 'const workletBlob = new Blob([`' + worklet_code + '`], { type: "application/javascript" });\n' + @@ -40,7 +40,7 @@ def get_inline_ui_html(): 'const workerUrl = URL.createObjectURL(workerBlob);\n' + 'recorderWorker = new Worker(workerUrl);' ) - + # SVG files with resources.files('whisperlivekit.web').joinpath('src', 'system_mode.svg').open('r', encoding='utf-8') as f: system_svg = f.read() @@ -60,42 +60,42 @@ def get_inline_ui_html(): '', f'' ) - + html_content = html_content.replace( '', f'' ) - + # Replace SVG references html_content = html_content.replace( '', f'' ) - + html_content = html_content.replace( '', f'' ) - + html_content = html_content.replace( '', f'' ) - + html_content = html_content.replace( 'Settings', f'' ) - + return html_content - + except Exception as e: logger.error(f"Error creating embedded web interface: {e}") return "

Error loading embedded interface

" if __name__ == '__main__': - + import pathlib import uvicorn @@ -104,11 +104,11 @@ if __name__ == '__main__': from starlette.staticfiles import StaticFiles import whisperlivekit.web as webpkg - - app = FastAPI() + + app = FastAPI() web_dir = pathlib.Path(webpkg.__file__).parent app.mount("/web", StaticFiles(directory=str(web_dir)), name="web") - + @app.get("/") async def get(): return HTMLResponse(get_inline_ui_html())