remove whisper_fastapi_online_server.py
This commit is contained in:
parent
b56fcffde1
commit
2ab3dac948
3 changed files with 5 additions and 88 deletions
10
README.md
10
README.md
|
|
@ -15,14 +15,14 @@
|
||||||
|
|
||||||
## 🚀 Overview
|
## 🚀 Overview
|
||||||
|
|
||||||
This project is based on [Whisper Streaming](https://github.com/ufal/whisper_streaming) and lets you transcribe audio directly from your browser. WhisperLiveKit provides a complete backend solution for real-time speech transcription with an example frontend that you can customize for your own needs. Everything runs locally on your machine ✨
|
This project is based on [Whisper Streaming](https://github.com/ufal/whisper_streaming) and lets you transcribe audio directly from your browser. WhisperLiveKit provides a complete backend solution for real-time speech transcription with a functional and simple frontend that you can customize for your own needs. Everything runs locally on your machine ✨
|
||||||
|
|
||||||
### 🔄 Architecture
|
### 🔄 Architecture
|
||||||
|
|
||||||
WhisperLiveKit consists of two main components:
|
WhisperLiveKit consists of two main components:
|
||||||
|
|
||||||
- **Backend (Server)**: FastAPI WebSocket server that processes audio and provides real-time transcription
|
- **Backend (Server)**: FastAPI WebSocket server that processes audio and provides real-time transcription
|
||||||
- **Frontend Example**: Basic HTML & JavaScript implementation that demonstrates how to capture and stream audio
|
- **Frontend Example**: Basic HTML & JavaScript implementation to capture and stream audio
|
||||||
|
|
||||||
> **Note**: We recommend installing this library on the server/backend. For the frontend, you can use and adapt the provided HTML template from [whisperlivekit/web/live_transcription.html](https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/whisperlivekit/web/live_transcription.html) for your specific use case.
|
> **Note**: We recommend installing this library on the server/backend. For the frontend, you can use and adapt the provided HTML template from [whisperlivekit/web/live_transcription.html](https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/whisperlivekit/web/live_transcription.html) for your specific use case.
|
||||||
|
|
||||||
|
|
@ -33,13 +33,13 @@ WhisperLiveKit consists of two main components:
|
||||||
- **🔒 Fully Local** - All processing happens on your machine - no data sent to external servers
|
- **🔒 Fully Local** - All processing happens on your machine - no data sent to external servers
|
||||||
- **📱 Multi-User Support** - Handle multiple users simultaneously with a single backend/server
|
- **📱 Multi-User Support** - Handle multiple users simultaneously with a single backend/server
|
||||||
|
|
||||||
### ⚙️ Differences from [Whisper Streaming](https://github.com/ufal/whisper_streaming)
|
### ⚙️ Core ifferences from [Whisper Streaming](https://github.com/ufal/whisper_streaming)
|
||||||
|
|
||||||
|
- **Automatic Silence Chunking** – Automatically chunks when no audio is detected to limit buffer size
|
||||||
- **Multi-User Support** – Handles multiple users simultaneously by decoupling backend and online ASR
|
- **Multi-User Support** – Handles multiple users simultaneously by decoupling backend and online ASR
|
||||||
|
- **Confidence Validation** – Immediately validate high-confidence tokens for faster inference
|
||||||
- **MLX Whisper Backend** – Optimized for Apple Silicon for faster local processing
|
- **MLX Whisper Backend** – Optimized for Apple Silicon for faster local processing
|
||||||
- **Buffering Preview** – Displays unvalidated transcription segments
|
- **Buffering Preview** – Displays unvalidated transcription segments
|
||||||
- **Confidence Validation** – Immediately validate high-confidence tokens for faster inference
|
|
||||||
- **Apple Silicon Optimized** - MLX backend for faster local processing on Mac
|
|
||||||
|
|
||||||
## 📖 Quick Start
|
## 📖 Quick Start
|
||||||
|
|
||||||
|
|
|
||||||
BIN
demo.png
BIN
demo.png
Binary file not shown.
|
Before Width: | Height: | Size: 424 KiB After Width: | Height: | Size: 438 KiB |
|
|
@ -1,83 +0,0 @@
|
||||||
from contextlib import asynccontextmanager
|
|
||||||
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
|
||||||
from fastapi.responses import HTMLResponse
|
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
|
||||||
|
|
||||||
from whisperlivekit import WhisperLiveKit, parse_args
|
|
||||||
from whisperlivekit.audio_processor import AudioProcessor
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
||||||
logging.getLogger().setLevel(logging.WARNING)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
logger.setLevel(logging.DEBUG)
|
|
||||||
|
|
||||||
kit = None
|
|
||||||
|
|
||||||
@asynccontextmanager
|
|
||||||
async def lifespan(app: FastAPI):
|
|
||||||
global kit
|
|
||||||
kit = WhisperLiveKit()
|
|
||||||
yield
|
|
||||||
|
|
||||||
app = FastAPI(lifespan=lifespan)
|
|
||||||
app.add_middleware(
|
|
||||||
CORSMiddleware,
|
|
||||||
allow_origins=["*"],
|
|
||||||
allow_credentials=True,
|
|
||||||
allow_methods=["*"],
|
|
||||||
allow_headers=["*"],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
|
||||||
async def get():
|
|
||||||
return HTMLResponse(kit.web_interface())
|
|
||||||
|
|
||||||
|
|
||||||
async def handle_websocket_results(websocket, results_generator):
|
|
||||||
"""Consumes results from the audio processor and sends them via WebSocket."""
|
|
||||||
try:
|
|
||||||
async for response in results_generator:
|
|
||||||
await websocket.send_json(response)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error in WebSocket results handler: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
@app.websocket("/asr")
|
|
||||||
async def websocket_endpoint(websocket: WebSocket):
|
|
||||||
audio_processor = AudioProcessor()
|
|
||||||
|
|
||||||
await websocket.accept()
|
|
||||||
logger.info("WebSocket connection opened.")
|
|
||||||
|
|
||||||
results_generator = await audio_processor.create_tasks()
|
|
||||||
websocket_task = asyncio.create_task(handle_websocket_results(websocket, results_generator))
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
message = await websocket.receive_bytes()
|
|
||||||
await audio_processor.process_audio(message)
|
|
||||||
except WebSocketDisconnect:
|
|
||||||
logger.warning("WebSocket disconnected.")
|
|
||||||
finally:
|
|
||||||
websocket_task.cancel()
|
|
||||||
await audio_processor.cleanup()
|
|
||||||
logger.info("WebSocket endpoint cleaned up.")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
args = parse_args()
|
|
||||||
|
|
||||||
uvicorn.run(
|
|
||||||
"whisper_fastapi_online_server:app",
|
|
||||||
host=args.host,
|
|
||||||
port=args.port,
|
|
||||||
reload=False,
|
|
||||||
log_level="info",
|
|
||||||
lifespan="on",
|
|
||||||
)
|
|
||||||
Loading…
Reference in a new issue