From 7e494607b9d9f3ceac7d31c303ab385befda58b1 Mon Sep 17 00:00:00 2001 From: akdeb Date: Fri, 17 Apr 2026 13:46:46 +0530 Subject: [PATCH] test new tts --- server-cloudflare/models/openai.ts | 54 +++++++++++------------------- 1 file changed, 19 insertions(+), 35 deletions(-) diff --git a/server-cloudflare/models/openai.ts b/server-cloudflare/models/openai.ts index 2a68801..8fd912a 100644 --- a/server-cloudflare/models/openai.ts +++ b/server-cloudflare/models/openai.ts @@ -1,5 +1,5 @@ import { DurableObject } from "cloudflare:workers"; -import { WorkersAIFluxSTT, type TranscriberSession } from "@cloudflare/voice"; +import { WorkersAIFluxSTT, WorkersAITTS, type TranscriberSession } from "@cloudflare/voice"; import type { Env } from "../src/types"; import { createOpusPacketizer } from "../src/opus"; import { getFirstMessagePrompt, getSystemPrompt } from "../src/prompt"; @@ -88,24 +88,22 @@ async function generateOpenAIReply( ); } -async function synthesizeSpeech(env: Env, text: string): Promise { +const ttsProviderFor = (env: Env) => new WorkersAITTS(env.AI, { + model: "@cf/deepgram/aura-1", + speaker: "asteria", +}); + +async function synthesizeSpeech(env: Env, text: string): Promise { if (!env.AI) { throw new Error("Cloudflare AI binding is missing"); } - return env.AI.run( - "@cf/deepgram/aura-1", - { - text, - speaker: "asteria", - encoding: "linear16", - container: "none", - sample_rate: AUDIO_OUTPUT_SAMPLE_RATE, - }, - { - returnRawResponse: true, - }, - ) as Promise; + const audio = await ttsProviderFor(env).synthesize(text); + if (!audio) { + throw new Error("WorkersAITTS returned no audio"); + } + + return audio; } export class ElatoOpenAiVoiceAgent extends DurableObject { @@ -189,29 +187,15 @@ export class ElatoOpenAiVoiceAgent extends DurableObject { opus.reset(); websocket.send(createServerMessage("RESPONSE.CREATED")); - const ttsResponse = await synthesizeSpeech(this.env, reply); - if (!ttsResponse.ok || !ttsResponse.body) { - console.error( - `[cloudflare][tts] request failed: ${ttsResponse.status} ${ttsResponse.statusText}`, - ); - websocket.send(createServerMessage("RESPONSE.ERROR")); - return; - } - - const reader = ttsResponse.body.getReader(); try { - while (true) { - const { done, value } = await reader.read(); - if (done) break; - if (value) { - opus.push(value); - } - } + const audio = await synthesizeSpeech(this.env, reply); + opus.push(new Uint8Array(audio)); opus.flush(true); websocket.send(createServerMessage("RESPONSE.COMPLETE", { volume_control: 100 })); - console.log(`[cloudflare][tts] streamed reply successfully (${reply.length} chars)`); - } finally { - reader.releaseLock(); + console.log(`[cloudflare][tts] synthesized reply successfully via WorkersAITTS (${reply.length} chars)`); + } catch (error) { + console.error(`[cloudflare][tts] ${errorMessage(error)}`); + websocket.send(createServerMessage("RESPONSE.ERROR")); } }