AIvoices/frontend-nextjs/app/components/Realtime/App.tsx
2025-06-11 14:57:14 +01:00

412 lines
12 KiB
TypeScript

"use client";
import React, { useEffect, useRef, useState } from "react";
import { useSearchParams } from "next/navigation";
import { v4 as uuidv4 } from "uuid";
// UI components
import BottomToolbar from "./components/BottomToolbar";
// Types
import { AgentConfig, SessionStatus } from "@/app/components/Realtime/types";
// Context providers & hooks
import { useTranscript } from "@/app/components/Realtime/contexts/TranscriptContext";
import { useEvent } from "@/app/components/Realtime/contexts/EventContext";
import { useHandleServerEvent } from "./hooks/useHandleServerEvent";
// Utilities
import { createRealtimeConnection } from "./lib/realtimeConnection";
import { toast } from "@/components/ui/use-toast";
import { Sheet, SheetContent, SheetTrigger } from "@/components/ui/sheet";
import Transcript from "./components/Transcript";
import { useMediaQuery } from "@/hooks/useMediaQuery";
import { getPersonalityById } from "@/db/personalities";
import { createClient } from "@/utils/supabase/client";
interface AppProps {
personalityIdState: string;
isDoctor: boolean;
userId: string;
}
function App({ personalityIdState, isDoctor, userId }: AppProps) {
const supabase = createClient();
const { transcriptItems, addTranscriptMessage, addTranscriptBreadcrumb } =
useTranscript();
const { logClientEvent, logServerEvent } = useEvent();
const [selectedAgentName, setSelectedAgentName] = useState<string>("");
const [selectedAgentConfigSet, setSelectedAgentConfigSet] =
useState<AgentConfig[] | null>(null);
const [isSheetOpen, setIsSheetOpen] = useState<boolean>(false);
const [userText, setUserText] = useState<string>("");
const isMobile = useMediaQuery("(max-width: 768px)");
const [personality, setPersonality] = useState<IPersonality | null>(null);
useEffect(() => {
const fetchPersonality = async () => {
if (personalityIdState) {
const personalityData = await getPersonalityById(supabase, personalityIdState);
setPersonality(personalityData);
}
};
fetchPersonality();
}, [personalityIdState, supabase]);
const [dataChannel, setDataChannel] = useState<RTCDataChannel | null>(null);
const pcRef = useRef<RTCPeerConnection | null>(null);
const dcRef = useRef<RTCDataChannel | null>(null);
const audioElementRef = useRef<HTMLAudioElement | null>(null);
const [sessionStatus, setSessionStatus] =
useState<SessionStatus>("DISCONNECTED");
const [isEventsPaneExpanded, setIsEventsPaneExpanded] =
useState<boolean>(true);
const [isPTTActive, setIsPTTActive] = useState<boolean>(false);
const [isPTTUserSpeaking, setIsPTTUserSpeaking] = useState<boolean>(false);
const [isAudioPlaybackEnabled, setIsAudioPlaybackEnabled] =
useState<boolean>(true);
const sendClientEvent = (eventObj: any, eventNameSuffix = "") => {
if (dcRef.current && dcRef.current.readyState === "open") {
logClientEvent(eventObj, eventNameSuffix);
dcRef.current.send(JSON.stringify(eventObj));
} else {
logClientEvent(
{ attemptedEvent: eventObj.type },
"error.data_channel_not_open"
);
console.error(
"Failed to send message - no data channel available",
eventObj
);
}
};
const handleServerEventRef = useHandleServerEvent({
setSessionStatus,
selectedAgentName,
selectedAgentConfigSet,
sendClientEvent,
setSelectedAgentName,
});
useEffect(() => {
if (selectedAgentName && sessionStatus === "DISCONNECTED") {
connectToRealtime();
}
}, [selectedAgentName]);
useEffect(() => {
if (sessionStatus === "CONNECTED") {
updateSession(true);
}
}, [sessionStatus]);
const fetchEphemeralKey = async (): Promise<string | null> => {
logClientEvent({ url: "/session" }, "fetch_session_token_request");
const tokenResponse = await fetch("/api/session");
const data = await tokenResponse.json();
logServerEvent(data, "fetch_session_token_response");
if (!data.client_secret?.value) {
logClientEvent(data, "error.no_ephemeral_key");
setSessionStatus("DISCONNECTED");
toast({
description: "Your API key is likely invalid. Please add it to your env variables.",
});
return null;
}
return data.client_secret.value;
};
const connectToRealtime = async () => {
if (sessionStatus !== "DISCONNECTED") return;
setSessionStatus("CONNECTING");
try {
const EPHEMERAL_KEY = await fetchEphemeralKey();
if (!EPHEMERAL_KEY) {
return;
}
if (!audioElementRef.current) {
audioElementRef.current = document.createElement("audio");
}
audioElementRef.current.autoplay = isAudioPlaybackEnabled;
const { pc, dc } = await createRealtimeConnection(
EPHEMERAL_KEY,
audioElementRef
);
pcRef.current = pc;
dcRef.current = dc;
dc.addEventListener("open", () => {
logClientEvent({}, "data_channel.open");
});
dc.addEventListener("close", () => {
logClientEvent({}, "data_channel.close");
});
dc.addEventListener("error", (err: any) => {
logClientEvent({ error: err }, "data_channel.error");
});
dc.addEventListener("message", (e: MessageEvent) => {
handleServerEventRef.current(JSON.parse(e.data));
});
setDataChannel(dc);
} catch (err) {
console.error("Error connecting to realtime:", err);
setSessionStatus("DISCONNECTED");
}
};
const disconnectFromRealtime = () => {
if (pcRef.current) {
pcRef.current.getSenders().forEach((sender) => {
if (sender.track) {
sender.track.stop();
}
});
pcRef.current.close();
pcRef.current = null;
}
setDataChannel(null);
setSessionStatus("DISCONNECTED");
setIsPTTUserSpeaking(false);
logClientEvent({}, "disconnected");
};
const sendSimulatedUserMessage = (text: string) => {
const id = uuidv4().slice(0, 32);
addTranscriptMessage(id, "user", text, true);
sendClientEvent(
{
type: "conversation.item.create",
item: {
id,
type: "message",
role: "user",
content: [{ type: "input_text", text }],
},
},
"(simulated user text message)"
);
sendClientEvent(
{ type: "response.create" },
"(trigger response after simulated user text message)"
);
};
const createFirstMessage = () => {
return personality?.first_message_prompt
? `Always start the conversation following these instructions from the user: ${personality?.first_message_prompt}`
: "The user is initiating a new chat here. Say something!";
}
const updateSession = (shouldTriggerResponse: boolean = false) => {
sendClientEvent(
{ type: "input_audio_buffer.clear" },
"clear audio buffer on session update"
);
const currentAgent = selectedAgentConfigSet?.find(
(a) => a.name === selectedAgentName
);
const turnDetection = isPTTActive
? null
: {
type: "server_vad",
threshold: 0.5,
prefix_padding_ms: 300,
silence_duration_ms: 200,
create_response: true,
};
const tools = currentAgent?.tools || [];
const sessionUpdateEvent = {
type: "session.update",
session: {
modalities: ["text", "audio"],
input_audio_format: "pcm16",
output_audio_format: "pcm16",
input_audio_transcription: { model: "whisper-1" },
turn_detection: turnDetection,
tools,
},
};
sendClientEvent(sessionUpdateEvent);
if (shouldTriggerResponse) {
sendSimulatedUserMessage(isDoctor ? "Ask the doctor if everything is good and how you can help them and their patient." : createFirstMessage());
}
};
const onToggleConnection = () => {
// Only connect if we're disconnected
if (sessionStatus === "DISCONNECTED") {
connectToRealtime();
setIsSheetOpen(true);
} else {
// If already connected or connecting, disconnect
disconnectFromRealtime();
setSessionStatus("DISCONNECTED");
setIsSheetOpen(false);
}
};
const cancelAssistantSpeech = async () => {
const mostRecentAssistantMessage = [...transcriptItems]
.reverse()
.find((item) => item.role === "assistant");
if (!mostRecentAssistantMessage) {
console.warn("can't cancel, no recent assistant message found");
return;
}
if (mostRecentAssistantMessage.status === "DONE") {
console.log("No truncation needed, message is DONE");
return;
}
sendClientEvent({
type: "conversation.item.truncate",
item_id: mostRecentAssistantMessage?.itemId,
content_index: 0,
audio_end_ms: Date.now() - mostRecentAssistantMessage.createdAtMs,
});
sendClientEvent(
{ type: "response.cancel" },
"(cancel due to user interruption)"
);
};
const handleSendTextMessage = () => {
if (!userText.trim()) return;
cancelAssistantSpeech();
sendClientEvent(
{
type: "conversation.item.create",
item: {
type: "message",
role: "user",
content: [{ type: "input_text", text: userText.trim() }],
},
},
"(send user text message)"
);
setUserText("");
sendClientEvent({ type: "response.create" }, "trigger response");
};
useEffect(() => {
const storedPushToTalkUI = localStorage.getItem("pushToTalkUI");
if (storedPushToTalkUI) {
setIsPTTActive(storedPushToTalkUI === "true");
}
const storedLogsExpanded = localStorage.getItem("logsExpanded");
if (storedLogsExpanded) {
setIsEventsPaneExpanded(storedLogsExpanded === "true");
}
const storedAudioPlaybackEnabled = localStorage.getItem(
"audioPlaybackEnabled"
);
if (storedAudioPlaybackEnabled) {
setIsAudioPlaybackEnabled(storedAudioPlaybackEnabled === "true");
}
}, []);
useEffect(() => {
localStorage.setItem("pushToTalkUI", isPTTActive.toString());
}, [isPTTActive]);
useEffect(() => {
localStorage.setItem("logsExpanded", isEventsPaneExpanded.toString());
}, [isEventsPaneExpanded]);
useEffect(() => {
localStorage.setItem(
"audioPlaybackEnabled",
isAudioPlaybackEnabled.toString()
);
}, [isAudioPlaybackEnabled]);
useEffect(() => {
if (audioElementRef.current) {
if (isAudioPlaybackEnabled) {
audioElementRef.current.play().catch((err) => {
console.warn("Autoplay may be blocked by browser:", err);
});
} else {
audioElementRef.current.pause();
}
}
}, [isAudioPlaybackEnabled]);
const handleSheetOpenChange = (open: boolean) => {
setIsSheetOpen(open);
// If sheet is closed, disconnect
if (!open && (sessionStatus === "CONNECTED" || sessionStatus === "CONNECTING")) {
disconnectFromRealtime();
setSessionStatus("DISCONNECTED");
}
};
if (!personality) {
return null;
}
return <Sheet open={isSheetOpen} onOpenChange={handleSheetOpenChange}>
<div className="inline-block">
<BottomToolbar
sessionStatus={sessionStatus}
onToggleConnection={onToggleConnection}
isDoctor={isDoctor}
personality={personality}
/>
</div>
<SheetContent
side={isMobile ? "bottom" : "right"}
className="h-[80vh] md:h-full p-0"
style={{ maxWidth: isMobile ? "100%" : "50%" }}
>
<div className="flex flex-col h-full">
<div className="flex-1 overflow-hidden">
<Transcript
userText={userText}
setUserText={setUserText}
onSendMessage={handleSendTextMessage}
canSend={
sessionStatus === "CONNECTED" &&
dcRef.current?.readyState === "open"
}
personality={personality}
userId={userId}
isDoctor={isDoctor}
supabase={supabase}
/>
</div>
</div>
</SheetContent>
</Sheet>
}
export default App;