adding grok

This commit is contained in:
akdeb 2025-12-20 23:25:04 +07:00
parent bd7c9c8b83
commit 4c329073d8
15 changed files with 442 additions and 67 deletions

View file

@ -17,7 +17,7 @@
# 👾 ElatoAI: Realtime Speech AI Agents for ESP32
Realtime AI Speech powered by SoTA AI voice models like **OpenAI Realtime API**, **Eleven Labs AI Agents**, **Gemini Live API**, **Hume AI EVI-4**, on ESP32, with Secure WebSockets, and Deno Edge Functions for >15-minute uninterrupted conversations globally.
Realtime AI Speech powered by SoTA AI voice models like **OpenAI Realtime API**, **Eleven Labs AI Agents**, **Gemini Live API**, **Hume AI EVI-4**, **xAI's Grok Voice Agent API** on ESP32, with Secure WebSockets, and Deno Edge Functions for >15-minute uninterrupted conversations globally.
<div align="center" style="margin: 20px 0;">
<!-- <a href="https://www.kickstarter.com/projects/elatoai/elato-make-toys-talk-with-ai-voices" target="_blank">
@ -49,11 +49,12 @@ Realtime AI Speech powered by SoTA AI voice models like **OpenAI Realtime API**,
</div>
## ⚡️ `With SOTA Realtime AI Speech Models on an ESP32`
## ⚡️ `With SoTA Realtime AI Speech Models on an ESP32`
<div align="center" class="flex flex-row gap-4">
<img src="assets/openai.png" alt="OpenAI Realtime API" width="45%">
<img src="assets/gemini.png" alt="Gemini Live API" width="45%">
<img src="assets/grok.svg" alt="Grok AI" width="45%">
<img src="assets/humeai.avif" alt="Hume AI EVI4" width="45%">
<img src="assets/elevenlabs.svg" alt="Eleven Labs AI Agents" width="45%">
</div>
@ -87,7 +88,7 @@ Control your ESP32 AI device from your phone with the Elato AI webapp.
## `🌟 Full feature list`
1. **Realtime Speech-to-Speech**: Instant speech conversion powered by OpenAI's Realtime API, Gemini's Live API, Eleven Labs Conversational AI Agents and Hume AI EVI4.
1. **Realtime Speech-to-Speech**: Instant speech conversion powered by OpenAI's Realtime API, Gemini's Live API, xAI's Grok Voice Agent API, Eleven Labs Conversational AI Agents and Hume AI EVI4.
2. **Create Custom AI Agents**: Create custom agents with different personalities and voices.
3. **Customizable Voices**: Choose from a variety of voices and personalities.
4. **Secure WebSockets**: Reliable, encrypted WebSocket communication.
@ -145,7 +146,7 @@ cp .env.example .env.local
# In .env.local, set your environment variables
# NEXT_PUBLIC_SUPABASE_ANON_KEY=<your-supabase-anon-key>
# OPENAI_API_KEY=<your-openai-api-key>
# OPENAI_API_KEY=<your-openai-api-key> (to test OpenAI on the browser)
# Run the development server
npm run dev
@ -171,6 +172,7 @@ cp .env.example .env
# SUPABASE_KEY=<your-supabase-anon-key>
# OPENAI_API_KEY=<your-openai-api-key>
# GEMINI_API_KEY=<your-gemini-api-key>
# XAI_API_KEY=<your-xai-api-key>
# ELEVENLABS_API_KEY=<your-elevenlabs-api-key>
# HUME_API_KEY=<your-hume-api-key>
@ -226,10 +228,12 @@ flowchart TD
ESP32[ESP32 Device] -->|WebSocket| Edge[Deno Edge Function]
Edge -->|OpenAI API| OpenAI[OpenAI Realtime API]
Edge -->|Gemini API| Gemini[Gemini Live API]
Edge -->|xAI API| xAI[xAI Grok Voice Agent API]
Edge -->|ElevenLabs API| ElevenLabs[ElevenLabs AI Agents]
Edge -->|Hume API| Hume[Hume AI EVI4]
OpenAI --> Edge
Gemini --> Edge
xAI --> Edge
ElevenLabs --> Edge
Hume --> Edge
Edge -->|WebSocket| ESP32
@ -308,6 +312,8 @@ lib_deps =
5. ~~Plug in Eleven Labs API for voice generation~~
6. Add Azure OpenAI Support (easy pickens)
7. Add Cartesia Support (easy pickens)
8. Add Amazon Nova Support
9. Add Deepgram
We welcome contributions
- Fork this repository.

1
assets/grok.svg Normal file
View file

@ -0,0 +1 @@
<svg width="2500" height="938" fill="none" xmlns="http://www.w3.org/2000/svg" class="opacity-80 hover:opacity-100 fill-black dark:fill-white [&amp;&gt;path]:hidden sm:[&amp;&gt;path]:block [&amp;&gt;#mark]:block [&amp;&gt;#furigana]:opacity-60 [&amp;&gt;#subtitle]:opacity-60" data--h-bstatus="0OBSERVED" viewBox="0.3640000000000012 0.5000000000000024 87.27199999999999 31.999999999999996"><path d="M76.446 24.708V8.416h2.576v10.752l5.447-6.257h3.122l-4.9 5.362 4.945 6.435H84.56l-4.006-5.53-1.532-.01v5.54h-2.576zM68.636 24.982c-3.829 0-5.902-2.716-5.902-6.184 0-3.491 2.073-6.184 5.902-6.184 3.852 0 5.903 2.693 5.903 6.184 0 3.468-2.051 6.184-5.903 6.184zm-3.213-6.184c0 2.692 1.458 4.039 3.213 4.039 1.778 0 3.214-1.347 3.214-4.04 0-2.692-1.436-4.06-3.214-4.06-1.755 0-3.213 1.368-3.213 4.06zM55.566 24.708v-9.926l2.165-1.871h4.604v2.19H58.14v9.607h-2.575zM45.719 25.009c-4.909 0-7.836-3.564-7.836-8.424 0-4.906 3.032-8.557 7.931-8.557 3.83 0 6.633 1.962 7.294 5.613h-2.94c-.434-2.076-2.166-3.24-4.353-3.24-3.533 0-5.083 3.058-5.083 6.184 0 3.126 1.55 6.16 5.083 6.16 3.373 0 4.854-2.441 4.968-4.472H45.7v-2.362h7.68l-.013 1.235c0 4.59-1.87 7.863-7.65 7.863zM13.237 21.04l11.082-8.19c.543-.4 1.32-.244 1.578.38 1.363 3.288.754 7.241-1.957 9.955-2.71 2.714-6.482 3.31-9.93 1.954l-3.765 1.745c5.401 3.697 11.96 2.782 16.059-1.324 3.251-3.255 4.258-7.692 3.317-11.693l.008.009c-1.365-5.878.336-8.227 3.82-13.031.082-.114.165-.228.247-.345l-4.585 4.59v-.014L13.234 21.044M10.95 23.031c-3.877-3.707-3.208-9.446.1-12.755 2.446-2.449 6.454-3.448 9.952-1.979L24.76 6.56c-.677-.49-1.545-1.017-2.54-1.387A12.465 12.465 0 0 0 8.675 7.901c-3.519 3.523-4.625 8.94-2.725 13.561 1.42 3.454-.907 5.898-3.251 8.364-.83.874-1.664 1.748-2.335 2.674l10.583-9.466" fill="currentColor" data--h-bstatus="0OBSERVED"/></svg>

After

Width:  |  Height:  |  Size: 1.8 KiB

View file

@ -33,11 +33,11 @@ volatile bool sleepRequested = false;
*/
#ifdef DEV_MODE
const char *ws_server = "192.168.1.37";
const char *ws_server = "172.20.10.2";
const uint16_t ws_port = 8000;
const char *ws_path = "/";
// Backend server details
const char *backend_server = "192.168.1.37";
const char *backend_server = "172.20.10.2";
const uint16_t backend_port = 3000;
#elif defined(PROD_MODE)

View file

@ -13,7 +13,7 @@ import { v4 as uuidv4 } from 'uuid';
import { toast } from "@/components/ui/use-toast";
import { useRouter } from "next/navigation";
import { z } from "zod";
import { emotionOptions, geminiVoices, openaiVoices, r2UrlAudio } from "@/lib/data";
import { emotionOptions, geminiVoices, grokVoices, openaiVoices, r2UrlAudio } from "@/lib/data";
import EmojiComponent from "./EmojiComponent";
import { PitchFactors } from "@/lib/utils";
import { Slider } from "@/components/ui/slider";
@ -26,7 +26,7 @@ interface SettingsDashboardProps {
}
const formSchema = z.object({
provider: z.enum(["openai", "gemini"]),
provider: z.enum(["openai", "gemini", "grok"]),
title: z.string().min(2, "Minimum 2 characters").max(50, "Maximum 50 characters"),
description: z.string().min(50, "Minimum 50 characters").max(200, "Maximum 200 characters"),
prompt: z.string().min(100, "Minimum 100 characters").max(1000, "Maximum 1000 characters"),
@ -66,6 +66,7 @@ const SettingsDashboard: React.FC<SettingsDashboardProps> = ({
const [touchedFields, setTouchedFields] = useState<Record<string, boolean>>({});
const [formErrors, setFormErrors] = useState<Partial<Record<keyof FormData | 'features', string>>>({});
const [previewingVoice, setPreviewingVoice] = useState<string | null>(null);
const [expandedProvider, setExpandedProvider] = useState<ModelProvider | null>("openai");
const handleBlur = (field: keyof FormData | 'features') => {
// Mark the field as touched
@ -249,6 +250,19 @@ const SettingsDashboard: React.FC<SettingsDashboardProps> = ({
}
}
const getProviderBadge = (provider: ModelProvider) => {
if (provider === "openai") {
return { label: "OpenAI", className: "bg-emerald-500 text-white" };
}
if (provider === "gemini") {
return { label: "Gemini", className: "bg-purple-500 text-white" };
}
if (provider === "grok") {
return { label: "Grok", className: "bg-slate-900 text-white" };
}
return { label: provider, className: "bg-gray-600 text-white" };
};
const Heading = () => {
return (
<div className="flex flex-col gap-2">
@ -273,60 +287,98 @@ const SettingsDashboard: React.FC<SettingsDashboardProps> = ({
<div className="space-y-4">
<Label htmlFor="voice">Pick a voice</Label>
<p className="text-sm text-gray-500">
Click a voice to preview how it sounds.
Choose from a list of voices and model providers to create your AI character.
</p>
<div className="overflow-x-auto px-2">
<div className="flex gap-3 w-max py-2">
{[...openaiVoices, ...geminiVoices].map((voice: VoiceType) => (
<div
key={voice.id}
className={`relative rounded-xl border-2 p-4 transition-all cursor-pointer hover:scale-[1.02] hover:shadow-lg w-48 flex-shrink-0 ${formData.voice === voice.id
? `border-blue-500 shadow-lg ${voice.color} ring-2 ring-blue-200`
: `border-gray-200 hover:border-gray-300 ${voice.color} hover:shadow-md`
}`}
onClick={() => {
setFormData(prev => ({
<div className="grid grid-cols-3 gap-3">
{([
{ provider: "openai" as ModelProvider, label: "OpenAI" },
{ provider: "gemini" as ModelProvider, label: "Gemini" },
{ provider: "grok" as ModelProvider, label: "Grok" },
]).map((p) => (
<button
key={p.provider}
type="button"
className={`text-left bg-white shadow-md rounded-xl border-2 p-4 transition-all hover:shadow-md ${expandedProvider === p.provider
? "border-blue-500 ring-2 ring-blue-200"
: "border-gray-200 hover:border-gray-300"
}`}
onClick={() => {
setExpandedProvider(prev => prev === p.provider ? null : p.provider);
setFormData(prev => {
const switchingProvider = prev.provider !== p.provider;
return {
...prev,
provider: voice.provider as ModelProvider,
voice: voice.id
}));
previewVoice(voice);
}}
>
<div className="flex flex-col">
<div className="flex flex-col items-center gap-3">
<div className="text-3xl">
<EmojiComponent emoji={voice.emoji} />
</div>
<div className="flex flex-col text-center">
<span className="font-semibold text-gray-900">{voice.name}</span>
<span className="text-xs text-gray-600 mt-1">{voice.description}</span>
<div className={`inline-flex items-center justify-center px-2 py-1 rounded-full text-xs font-medium mt-2 ${voice.provider === 'openai' ? 'bg-emerald-500 text-white' : 'bg-purple-500 text-white'
}`}>
{voice.provider === 'openai' ? 'OpenAI' : 'Gemini'}
</div>
</div>
</div>
{previewingVoice === voice.id && (
<div className="absolute top-3 right-3">
<div className="animate-pulse text-blue-600 bg-white rounded-full p-2 shadow-lg">
<Volume2 size={16} />
</div>
</div>
)}
{formData.voice === voice.id && (
<div className="absolute -top-2 -right-2">
<div className="bg-blue-500 text-white rounded-full p-1.5 shadow-lg">
<Check size={12} />
</div>
</div>
)}
provider: p.provider,
voice: switchingProvider ? "" : prev.voice,
};
});
}}
>
<div className="flex flex-col gap-1">
<div className="flex flex-col sm:flex-row gap-2 items-center justify-between">
<span className="font-semibold text-gray-900">{p.label}</span>
<span className="text-xs text-gray-500">
{p.provider === "openai" ? openaiVoices.length : p.provider === "gemini" ? geminiVoices.length : grokVoices.length} voices
</span>
</div>
</div>
))}
</div>
</button>
))}
</div>
{expandedProvider && (
<div className="overflow-x-auto px-2">
<div className="flex gap-3 w-max py-2">
{(expandedProvider === "openai" ? openaiVoices : expandedProvider === "gemini" ? geminiVoices : grokVoices).map((voice: VoiceType) => (
<div
key={voice.id}
className={`relative rounded-xl border-2 p-4 transition-all cursor-pointer hover:scale-[1.02] hover:shadow-lg w-48 flex-shrink-0 ${formData.voice === voice.id
? `border-blue-500 shadow-lg ${voice.color} ring-2 ring-blue-200`
: `border-gray-200 hover:border-gray-300 ${voice.color} hover:shadow-md`
}`}
onClick={() => {
setFormData(prev => ({
...prev,
provider: voice.provider as ModelProvider,
voice: voice.id
}));
previewVoice(voice);
}}
>
<div className="flex flex-col">
<div className="flex flex-col items-center gap-3">
<div className="text-3xl">
<EmojiComponent emoji={voice.emoji} />
</div>
<div className="flex flex-col text-center">
<span className="font-semibold text-gray-900">{voice.name}</span>
<span className="text-xs text-gray-600 mt-1">{voice.description}</span>
<div className={`inline-flex items-center justify-center px-2 py-1 rounded-full text-xs font-medium mt-2 ${getProviderBadge(voice.provider as ModelProvider).className}`}>
{getProviderBadge(voice.provider as ModelProvider).label}
</div>
</div>
</div>
{previewingVoice === voice.id && (
<div className="absolute top-3 right-3">
<div className="animate-pulse text-blue-600 bg-white rounded-full p-2 shadow-lg">
<Volume2 size={16} />
</div>
</div>
)}
{formData.voice === voice.id && (
<div className="absolute -top-2 -right-2">
<div className="bg-blue-500 text-white rounded-full p-1.5 shadow-lg">
<Check size={12} />
</div>
</div>
)}
</div>
</div>
))}
</div>
</div>
)}
</div>
{/* ElevenLabs Alternative */}

View file

@ -54,7 +54,7 @@ export default function ProductsSection() {
<div className="container px-4 md:px-6 max-w-screen-sm mx-auto">
<div className="text-center mb-10">
<h2 className="text-3xl md:text-4xl font-bold mb-6 text-gray-800">
Our Product
Our Products
</h2>
<p className="text-lg text-gray-600 mt-2">
Everything you need to bring conversational AI to your world

View file

@ -4,15 +4,16 @@ import React from "react";
interface YoutubeDemoProps {
caption: string;
youtubeId: string;
}
export default function YoutubeDemo({ caption }: YoutubeDemoProps) {
export default function YoutubeDemo({ caption, youtubeId }: YoutubeDemoProps) {
return <div className="w-full max-w-3xl mx-auto">
<div className="relative" style={{ paddingBottom: '56.25%' }}>
<iframe
className="absolute top-0 left-0 w-full h-full rounded-xl shadow-lg"
src="https://www.youtube.com/embed/o1eIAwVll5I"
src={`https://www.youtube.com/embed/${youtubeId}`}
title="Elato Demo"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
allowFullScreen

View file

@ -124,7 +124,10 @@ export default async function LandingPage() {
</div>
</div>
</section>
<YoutubeDemo caption="Elato AI ESP32-S3 Demo" />
<YoutubeDemo caption="Elato AI ESP32-S3 Demo" youtubeId="o1eIAwVll5I" />
<br/><br />
<YoutubeDemo caption="How to run ElatoAI on your own device" youtubeId="bXrNRpGOJWw" />
{/* Products Section */}
<ProductsSection />

View file

@ -106,6 +106,44 @@ export const openaiVoices: VoiceType[] = [
},
];
export const grokVoices: VoiceType[] = [
{
id: "Ara",
name: "Ara",
description: "Bright",
color: "bg-yellow-100",
provider: "grok",
},
{
id: "Eve",
name: "Eve",
description: "Upbeat",
color: "bg-orange-100",
provider: "grok",
},
{
id: "Leo",
name: "Leo",
description: "Confident",
color: "bg-blue-100",
provider: "grok",
},
{
id: "Rex",
name: "Rex",
description: "Direct",
color: "bg-gray-100",
provider: "grok",
},
{
id: "Sal",
name: "Sal",
description: "Warm",
color: "bg-green-100",
provider: "grok",
}
];
export const geminiVoices: VoiceType[] = [
{
id: "Zephyr",

View file

@ -114,9 +114,24 @@ declare global {
description: string;
color: string;
emoji?: string;
}
| {
provider: "grok";
id: GrokVoice;
name: string;
description: string;
color: string;
emoji?: string;
};
type ModelProvider = "openai" | "gemini" | "elevenlabs" | "hume";
type ModelProvider = "openai" | "gemini" | "grok" | "elevenlabs" | "hume";
type GrokVoice =
| "Ara"
| "Eve"
| "Leo"
| "Rex"
| "Sal";
type GeminiVoice =
| "Zephyr"
@ -162,8 +177,8 @@ declare global {
// characters <-> personalities table
/**
* oai_voice is for the name of any voice. both gemini and openai use this.
* forgot to refactor this, update it for your setup
* oai_voice is for the name of any voice. grok, gemini and openai use this.
* I forgot to refactor this, please consider updating it for your setup :)
*/
interface IPersonality {
personality_id?: string;

View file

@ -9,6 +9,7 @@ ENCRYPTION_KEY=<ENCRYPTION_KEY>
# Model provider API Keys
OPENAI_API_KEY=<OPENAI_API_KEY>
GEMINI_API_KEY=<GEMINI_API_KEY>
XAI_API_KEY=<XAI_API_KEY>
ELEVENLABS_API_KEY=<ELEVENLABS_API_KEY>
HUME_API_KEY=<HUME_API_KEY>

View file

@ -17,6 +17,7 @@ import { connectToOpenAI } from "./models/openai.ts";
import { connectToGemini } from "./models/gemini.ts";
import { connectToElevenLabs } from "./models/elevenlabs.ts";
import { connectToHume } from "./models/hume.ts";
import { connectToGrok } from "./models/grok.ts";
const server = createServer();
@ -58,7 +59,7 @@ wss.on("connection", async (ws: WSWebSocket, payload: IPayload) => {
ws.send(
JSON.stringify({
type: "auth",
volume_control: user.device?.volume ?? 20,
volume_control: user.device?.volume ?? 100,
is_ota: user.device?.is_ota ?? false,
is_reset: user.device?.is_reset ?? false,
pitch_factor: user.personality?.pitch_factor ?? 1,
@ -84,6 +85,15 @@ wss.on("connection", async (ws: WSWebSocket, payload: IPayload) => {
systemPrompt,
);
break;
case "grok":
await connectToGrok(
ws,
payload,
connectionPcmFile,
firstMessage,
systemPrompt,
);
break;
case "elevenlabs":
const agentId = user.personality?.oai_voice ?? "";
@ -145,7 +155,7 @@ server.on("upgrade", async (req, socket, head) => {
});
});
if (isDev) { // deno run -A --env-file=.env main.ts
if (isDev) { // RUN WITH: deno run -A --env-file=.env main.ts
const HOST = Deno.env.get("HOST") || "0.0.0.0";
const PORT = Deno.env.get("PORT") || "8000";
server.listen(Number(PORT), HOST, () => {

240
server-deno/models/grok.ts Normal file
View file

@ -0,0 +1,240 @@
import { Buffer } from 'node:buffer';
import type { RawData } from 'npm:@types/ws';
import { WebSocket } from 'npm:ws';
import { addConversation, getDeviceInfo } from '../supabase.ts';
import { encoder, FRAME_SIZE, isDev, xaiApiKey } from '../utils.ts';
const XAI_REALTIME_URL = 'wss://api.x.ai/v1/realtime';
const DEFAULT_GROK_VOICE = 'Ara';
export const connectToGrok = async (
ws: WebSocket,
payload: IPayload,
connectionPcmFile: Deno.FsFile | null,
firstMessage: string,
systemPrompt: string,
) => {
const { user, supabase } = payload;
if (!xaiApiKey) {
throw new Error('XAI_API_KEY is not set');
}
const voice = user.personality?.oai_voice ?? DEFAULT_GROK_VOICE;
const grokWs = new WebSocket(XAI_REALTIME_URL, {
headers: {
Authorization: `Bearer ${xaiApiKey}`,
'Content-Type': 'application/json',
},
});
let isConnected = false;
const messageQueue: RawData[] = [];
let createdSent = false;
let outputTranscript = '';
let audioRemainder = Buffer.alloc(0);
const sendResponseCreated = async () => {
try {
const device = await getDeviceInfo(supabase, user.user_id);
ws.send(
JSON.stringify({
type: 'server',
msg: 'RESPONSE.CREATED',
volume_control: device?.volume ?? 100,
}),
);
} catch {
ws.send(JSON.stringify({ type: 'server', msg: 'RESPONSE.CREATED' }));
}
};
const sendFirstMessage = () => {
if (!firstMessage) return;
grokWs.send(
JSON.stringify({
type: 'conversation.item.create',
item: {
type: 'message',
role: 'user',
content: [{ type: 'input_text', text: firstMessage }],
},
}),
);
grokWs.send(JSON.stringify({ type: 'response.create' }));
};
grokWs.on('open', () => {
isConnected = true;
grokWs.send(
JSON.stringify({
type: 'session.update',
session: {
voice,
instructions: systemPrompt,
turn_detection: { type: "server_vad" },
audio: {
input: { format: { type: 'audio/pcm', rate: 16000 } },
output: { format: { type: 'audio/pcm', rate: 24000 } },
},
},
}),
);
sendFirstMessage();
while (messageQueue.length > 0) {
const queuedMessage = messageQueue.shift();
if (queuedMessage) {
messageHandler(queuedMessage, true);
}
}
});
grokWs.on('message', async (data: Buffer) => {
let event: any;
try {
event = JSON.parse(data.toString('utf-8'));
} catch {
return;
}
try {
switch (event.type) {
case 'response.created':
if (!createdSent) {
await sendResponseCreated();
createdSent = true;
}
break;
case 'response.output_audio_transcript.delta':
if (typeof event.delta === 'string') {
outputTranscript += event.delta;
}
break;
case 'response.output_audio.delta':
if (typeof event.delta === 'string') {
const pcmChunk = Buffer.from(event.delta, 'base64');
audioRemainder = Buffer.concat([audioRemainder, pcmChunk]);
while (audioRemainder.length >= FRAME_SIZE) {
const frame = audioRemainder.subarray(0, FRAME_SIZE);
audioRemainder = audioRemainder.subarray(FRAME_SIZE);
try {
const packet = encoder.encode(frame);
ws.send(packet);
} catch {
// Skip frame
}
}
}
break;
case 'conversation.item.input_audio_transcription.completed':
if (typeof event.transcript === 'string' && event.transcript.length > 0) {
await addConversation(supabase, 'user', event.transcript, user);
}
break;
case 'input_audio_buffer.committed':
ws.send(JSON.stringify({ type: 'server', msg: 'AUDIO.COMMITTED' }));
break;
case 'response.done':
if (outputTranscript) {
await addConversation(supabase, 'assistant', outputTranscript, user);
outputTranscript = '';
}
ws.send(JSON.stringify({ type: 'server', msg: 'RESPONSE.COMPLETE' }));
createdSent = false;
break;
case 'error':
ws.send(JSON.stringify({ type: 'server', msg: 'RESPONSE.ERROR' }));
createdSent = false;
break;
}
} catch (err) {
console.error('Error processing Grok event:', err);
ws.send(JSON.stringify({ type: 'server', msg: 'RESPONSE.ERROR' }));
createdSent = false;
}
});
grokWs.on('close', () => {
ws.close();
});
grokWs.on('error', (error: any) => {
console.error('Grok WebSocket error:', error);
ws.send(JSON.stringify({ type: 'server', msg: 'RESPONSE.ERROR' }));
});
const messageHandler = async (data: RawData, isBinary: boolean) => {
if (isBinary) {
const base64Data = (data as Buffer).toString('base64');
grokWs.send(JSON.stringify({ type: 'input_audio_buffer.append', audio: base64Data }));
if (isDev && connectionPcmFile) {
await connectionPcmFile.write(data as Buffer);
}
return;
}
let message: any;
try {
message = JSON.parse((data as Buffer).toString('utf-8'));
} catch {
return;
}
if (message?.type !== 'instruction') return;
if (message.msg === 'end_of_speech') {
grokWs.send(JSON.stringify({ type: 'input_audio_buffer.commit' }));
grokWs.send(JSON.stringify({ type: 'response.create' }));
grokWs.send(JSON.stringify({ type: 'input_audio_buffer.clear' }));
} else if (message.msg === 'INTERRUPT') {
grokWs.send(JSON.stringify({ type: 'input_audio_buffer.clear' }));
}
};
ws.on('message', (data: RawData, isBinary: boolean) => {
if (!isConnected) {
messageQueue.push(data);
} else {
messageHandler(data, isBinary);
}
});
ws.on('error', (error: any) => {
console.error('ESP32 WebSocket error:', error);
grokWs.close();
});
ws.on('close', async (code: number, reason: string) => {
console.log(`ESP32 WebSocket closed with code ${code}, reason: ${reason}`);
grokWs.close();
if (isDev && connectionPcmFile) {
connectionPcmFile.close();
}
});
return new Promise<void>((resolve, reject) => {
const timeout = setTimeout(() => reject(new Error('Grok connection timeout')), 10000);
grokWs.on('open', () => {
clearTimeout(timeout);
resolve();
});
grokWs.on('error', (error: any) => {
clearTimeout(timeout);
reject(error);
});
});
};

View file

@ -26,7 +26,14 @@ declare global {
user_code: string;
}
type ModelProvider = "openai" | "gemini" | "elevenlabs" | "hume";
type ModelProvider = "openai" | "gemini" | "elevenlabs" | "hume" | "grok";
type GrokVoice =
| "Ara"
| "Eve"
| "Leo"
| "Rex"
| "Sal"
type GeminiVoice =
| "Zephyr"

View file

@ -28,6 +28,7 @@ export const openaiApiKey = Deno.env.get("OPENAI_API_KEY");
export const geminiApiKey = Deno.env.get("GEMINI_API_KEY");
export const elevenLabsApiKey = Deno.env.get("ELEVENLABS_API_KEY");
export const humeApiKey = Deno.env.get('HUME_API_KEY');
export const xaiApiKey = Deno.env.get('XAI_API_KEY');
export { encoder, FRAME_SIZE };

View file

@ -1,6 +1,6 @@
-- Add provider column to personalities table
ALTER TABLE personalities
ADD COLUMN provider TEXT CHECK (provider IN ('openai', 'gemini', 'elevenlabs', 'hume')) DEFAULT 'openai';
ADD COLUMN provider TEXT CHECK (provider IN ('openai', 'gemini', 'grok', 'elevenlabs', 'hume')) DEFAULT 'openai';
-- Update existing records to have a default provider
UPDATE personalities