separate parts

This commit is contained in:
akdeb 2026-04-17 15:14:40 +05:30
parent 4a6368b988
commit 299197e839
7 changed files with 127 additions and 104 deletions

View file

@ -0,0 +1,54 @@
import type { Env } from "../src/types";
import { getFirstMessagePrompt, getSystemPrompt } from "../src/prompt";
export interface ChatMessage {
role: "system" | "user" | "assistant";
content: string;
}
export async function generateOpenAIReply(
env: Env,
transcript: string | null,
history: ChatMessage[],
): Promise<string> {
if (!env.OPENAI_API_KEY?.trim()) {
throw new Error("OPENAI_API_KEY is missing");
}
const messages: ChatMessage[] = [
{ role: "system", content: getSystemPrompt(env) },
...history,
];
if (transcript && transcript.trim().length > 0) {
messages.push({ role: "user", content: transcript });
} else {
messages.push({ role: "user", content: getFirstMessagePrompt(env) });
}
const response = await fetch("https://api.openai.com/v1/chat/completions", {
method: "POST",
headers: {
Authorization: `Bearer ${env.OPENAI_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: env.ELATO_OPENAI_MODEL || "gpt-4.1-mini",
messages,
temperature: 0.7,
}),
});
if (!response.ok) {
throw new Error(`OpenAI request failed: ${response.status} ${await response.text()}`);
}
const data = (await response.json()) as {
choices?: Array<{ message?: { content?: string } }>;
};
return (
data.choices?.[0]?.message?.content?.trim() ||
"I heard you, but I do not have a response yet."
);
}

View file

@ -1,16 +1,10 @@
import { DurableObject } from "cloudflare:workers";
import { WorkersAIFluxSTT, type TranscriberSession } from "@cloudflare/voice";
import type { Env } from "../src/types";
import { createOpusPacketizer } from "../src/opus";
import { getFirstMessagePrompt, getSystemPrompt } from "../src/prompt";
const AUDIO_OUTPUT_SAMPLE_RATE = 24_000;
const STT_SAMPLE_RATE = 16_000;
interface OpenAIChatMessage {
role: "system" | "user" | "assistant";
content: string;
}
import { createSttSession } from "./stt";
import { generateOpenAIReply, type ChatMessage } from "./llm";
import { synthesizeSpeech } from "./tts";
import type { TranscriberSession } from "@cloudflare/voice";
function createAuthMessage() {
return {
@ -37,80 +31,13 @@ function errorMessage(error: unknown): string {
return String(error);
}
async function generateOpenAIReply(
env: Env,
transcript: string | null,
history: OpenAIChatMessage[],
): Promise<string> {
if (!env.OPENAI_API_KEY?.trim()) {
throw new Error("OPENAI_API_KEY is missing");
}
const messages: OpenAIChatMessage[] = [
{ role: "system", content: getSystemPrompt(env) },
...history,
];
if (transcript && transcript.trim().length > 0) {
messages.push({ role: "user", content: transcript });
} else {
messages.push({ role: "user", content: getFirstMessagePrompt(env) });
}
const response = await fetch("https://api.openai.com/v1/chat/completions", {
method: "POST",
headers: {
Authorization: `Bearer ${env.OPENAI_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: env.ELATO_OPENAI_MODEL || "gpt-4.1-mini",
messages,
temperature: 0.7,
}),
});
if (!response.ok) {
throw new Error(`OpenAI request failed: ${response.status} ${await response.text()}`);
}
const data = (await response.json()) as {
choices?: Array<{ message?: { content?: string } }>;
};
return (
data.choices?.[0]?.message?.content?.trim() ||
"I heard you, but I do not have a response yet."
);
}
async function synthesizeSpeech(env: Env, text: string): Promise<Response> {
if (!env.AI) {
throw new Error("Cloudflare AI binding is missing");
}
return env.AI.run(
"@cf/deepgram/aura-2-en",
{
text,
speaker: "asteria",
encoding: "linear16",
container: "none",
sample_rate: AUDIO_OUTPUT_SAMPLE_RATE,
},
{
returnRawResponse: true,
},
) as Promise<Response>;
}
export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
export class ElatoVoiceSession extends DurableObject<Env> {
private isGenerating = false;
private opusPromise: Promise<Awaited<ReturnType<typeof createOpusPacketizer>>> | null = null;
private hasStartedConversation = false;
private transcriberSession: TranscriberSession | null = null;
private currentWebSocket: WebSocket | null = null;
private history: OpenAIChatMessage[] = [];
private history: ChatMessage[] = [];
constructor(ctx: DurableObjectState, env: Env) {
super(ctx, env);
@ -141,23 +68,12 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
return;
}
const transcriber = new WorkersAIFluxSTT(this.env.AI, {
sampleRate: STT_SAMPLE_RATE,
eotTimeoutMs: 1000,
});
this.transcriberSession = transcriber.createSession({
onInterim: (text) => {
if (text.trim()) {
console.log(`[cloudflare][stt] interim: ${text}`);
}
this.transcriberSession = createSttSession(
this.env,
(text) => {
console.log(`[cloudflare][stt] interim: ${text}`);
},
onUtterance: (transcript) => {
const text = transcript.trim();
if (!text) {
return;
}
(text) => {
void this.ctx.blockConcurrencyWhile(async () => {
if (!this.currentWebSocket || this.isGenerating) {
return;
@ -174,7 +90,7 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
}
});
},
});
);
console.log("[cloudflare][stt] started continuous Flux session");
}
@ -262,11 +178,12 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
return new Response("Expected websocket", { status: 426 });
}
this.resetSessionState();
const pair = new WebSocketPair();
const [client, server] = Object.values(pair);
server.accept();
this.resetSessionState();
this.currentWebSocket = server;
this.ensureTranscriberSession();

View file

@ -0,0 +1,29 @@
import { WorkersAIFluxSTT, type TranscriberSession } from "@cloudflare/voice";
import type { Env } from "../src/types";
const STT_SAMPLE_RATE = 16_000;
export function createSttSession(
env: Env,
onInterim: (text: string) => void,
onUtterance: (transcript: string) => void,
): TranscriberSession {
const transcriber = new WorkersAIFluxSTT(env.AI, {
sampleRate: STT_SAMPLE_RATE,
eotTimeoutMs: 1000,
});
return transcriber.createSession({
onInterim: (text) => {
if (text.trim()) {
onInterim(text);
}
},
onUtterance: (transcript) => {
const text = transcript.trim();
if (text) {
onUtterance(text);
}
},
});
}

View file

@ -0,0 +1,23 @@
import type { Env } from "../src/types";
const AUDIO_OUTPUT_SAMPLE_RATE = 24_000;
export async function synthesizeSpeech(env: Env, text: string): Promise<Response> {
if (!env.AI) {
throw new Error("Cloudflare AI binding is missing");
}
return env.AI.run(
"@cf/deepgram/aura-2-en",
{
text,
speaker: "asteria",
encoding: "linear16",
container: "none",
sample_rate: AUDIO_OUTPUT_SAMPLE_RATE,
},
{
returnRawResponse: true,
},
) as Promise<Response>;
}

View file

@ -1,6 +1,6 @@
import type { Env } from "./types";
export { ElatoOpenAiVoiceAgent } from "../models/openai";
export { ElatoVoiceSession } from "../models/session";
export default {
async fetch(request: Request, env: Env): Promise<Response> {
@ -13,8 +13,8 @@ export default {
if (url.pathname === "/ws/esp32" || url.pathname.startsWith("/ws/esp32/")) {
/* Add AUTH here */
const stub = env.ElatoOpenAiVoiceAgent.get(
env.ElatoOpenAiVoiceAgent.newUniqueId(),
const stub = env.ElatoVoiceSession.get(
env.ElatoVoiceSession.newUniqueId(),
);
return stub.fetch(request);
}

View file

@ -5,5 +5,5 @@ export interface Env {
ELATO_OPENAI_MODEL?: string;
ELATO_OPENAI_SYSTEM_PROMPT?: string;
ELATO_OPENAI_FIRST_MESSAGE?: string;
ElatoOpenAiVoiceAgent: DurableObjectNamespace;
ElatoVoiceSession: DurableObjectNamespace;
}

View file

@ -7,12 +7,12 @@ compatibility_flags = ["nodejs_compat"]
binding = "AI"
[[durable_objects.bindings]]
name = "ElatoOpenAiVoiceAgent"
class_name = "ElatoOpenAiVoiceAgent"
name = "ElatoVoiceSession"
class_name = "ElatoVoiceSession"
[[migrations]]
tag = "v1"
new_sqlite_classes = ["ElatoOpenAiVoiceAgent"]
new_sqlite_classes = ["ElatoVoiceSession"]
[observability]
[observability.logs]