ai speaks first

This commit is contained in:
akdeb 2026-04-17 12:29:29 +05:30
parent 88ca1d517c
commit d1b5d1d15d
3 changed files with 71 additions and 31 deletions

View file

@ -1,7 +1,7 @@
import { DurableObject } from "cloudflare:workers";
import type { Env } from "../src/types";
import { createOpusPacketizer } from "../src/opus";
import { getSystemPrompt } from "../src/prompt";
import { getFirstMessagePrompt, getSystemPrompt } from "../src/prompt";
const AUDIO_OUTPUT_SAMPLE_RATE = 24_000;
@ -42,15 +42,20 @@ async function transcribePcm(env: Env, audio: Uint8Array): Promise<string> {
async function generateOpenAIReply(
env: Env,
transcript: string,
transcript: string | null,
history: OpenAIChatMessage[],
): Promise<string> {
const messages: OpenAIChatMessage[] = [
{ role: "system", content: getSystemPrompt(env) },
...history,
{ role: "user", content: transcript },
];
if (transcript && transcript.trim().length > 0) {
messages.push({ role: "user", content: transcript });
} else {
messages.push({ role: "user", content: getFirstMessagePrompt(env) });
}
const response = await fetch("https://api.openai.com/v1/chat/completions", {
method: "POST",
headers: {
@ -98,6 +103,7 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
private audioBuffer = new Uint8Array(0);
private isGenerating = false;
private opusPromise: Promise<Awaited<ReturnType<typeof createOpusPacketizer>>> | null = null;
private hasStartedConversation = false;
constructor(ctx: DurableObjectState, env: Env) {
super(ctx, env);
@ -130,35 +136,8 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
return this.opusPromise;
}
private async handleTurn(
websocket: WebSocket,
) {
private async streamAssistantReply(websocket: WebSocket, reply: string) {
const opus = await this.getOpusPacketizer(websocket);
const pcm = this.audioBuffer;
this.resetBufferedAudio();
if (pcm.byteLength === 0) {
return;
}
websocket.send(createServerMessage("AUDIO.COMMITTED"));
const transcript = await transcribePcm(this.env, pcm);
if (!transcript) {
websocket.send(createServerMessage("RESPONSE.ERROR"));
return;
}
/* Add user transcript DB call here */
const session = await this.loadSessionState();
const reply = await generateOpenAIReply(this.env, transcript, session.history);
session.history.push(
{ role: "user", content: transcript },
{ role: "assistant", content: reply },
);
await this.saveSessionState(session);
/* Add AI transcript DB call here */
opus.reset();
websocket.send(createServerMessage("RESPONSE.CREATED"));
@ -184,6 +163,58 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
}
}
private async handleTurn(
websocket: WebSocket,
) {
const pcm = this.audioBuffer;
this.resetBufferedAudio();
if (pcm.byteLength === 0) {
return;
}
websocket.send(createServerMessage("AUDIO.COMMITTED"));
const transcript = await transcribePcm(this.env, pcm);
if (!transcript) {
websocket.send(createServerMessage("RESPONSE.ERROR"));
return;
}
/* Add user transcript DB call here */
const session = await this.loadSessionState();
const reply = await generateOpenAIReply(this.env, transcript, session.history);
session.history.push(
{ role: "user", content: transcript },
{ role: "assistant", content: reply },
);
await this.saveSessionState(session);
/* Add AI transcript DB call here */
await this.streamAssistantReply(websocket, reply);
}
private async startInitialTurn(websocket: WebSocket) {
if (this.hasStartedConversation || this.isGenerating) {
return;
}
this.hasStartedConversation = true;
this.isGenerating = true;
try {
const session = await this.loadSessionState();
const reply = await generateOpenAIReply(this.env, null, session.history);
session.history.push({ role: "assistant", content: reply });
await this.saveSessionState(session);
/* Add AI transcript DB call here */
await this.streamAssistantReply(websocket, reply);
} catch {
websocket.send(createServerMessage("RESPONSE.ERROR"));
} finally {
this.isGenerating = false;
}
}
async fetch(request: Request): Promise<Response> {
if (request.headers.get("Upgrade") !== "websocket") {
return new Response("Expected websocket", { status: 426 });
@ -194,6 +225,7 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
server.accept();
server.send(JSON.stringify(createAuthMessage()));
void this.startInitialTurn(server);
server.addEventListener("message", (event) => {
void this.ctx.blockConcurrencyWhile(async () => {

View file

@ -3,6 +3,13 @@ import type { Env } from "./types";
const DEFAULT_PROMPT =
"You are an Elato voice companion. Keep responses concise, natural to speak aloud, and friendly for a realtime conversation.";
const DEFAULT_FIRST_MESSAGE =
"Start the conversation now with a short spoken greeting. Introduce yourself naturally in one sentence.";
export function getSystemPrompt(env: Env): string {
return env.ELATO_OPENAI_SYSTEM_PROMPT?.trim() || DEFAULT_PROMPT;
}
export function getFirstMessagePrompt(env: Env): string {
return env.ELATO_OPENAI_FIRST_MESSAGE?.trim() || DEFAULT_FIRST_MESSAGE;
}

View file

@ -4,5 +4,6 @@ export interface Env {
OPENAI_API_KEY: string;
ELATO_OPENAI_MODEL?: string;
ELATO_OPENAI_SYSTEM_PROMPT?: string;
ELATO_OPENAI_FIRST_MESSAGE?: string;
ElatoOpenAiVoiceAgent: DurableObjectNamespace;
}