Everything you need to bring conversational AI to your world
diff --git a/frontend-nextjs/app/components/LandingPage/YoutubeDemo.tsx b/frontend-nextjs/app/components/LandingPage/YoutubeDemo.tsx
index a562643..bb06e16 100644
--- a/frontend-nextjs/app/components/LandingPage/YoutubeDemo.tsx
+++ b/frontend-nextjs/app/components/LandingPage/YoutubeDemo.tsx
@@ -4,15 +4,16 @@ import React from "react";
interface YoutubeDemoProps {
caption: string;
+ youtubeId: string;
}
-export default function YoutubeDemo({ caption }: YoutubeDemoProps) {
+export default function YoutubeDemo({ caption, youtubeId }: YoutubeDemoProps) {
return
VIDEO
-
+
+
+
+
{/* Products Section */}
diff --git a/frontend-nextjs/lib/data.ts b/frontend-nextjs/lib/data.ts
index 6e9f31e..c415875 100644
--- a/frontend-nextjs/lib/data.ts
+++ b/frontend-nextjs/lib/data.ts
@@ -106,6 +106,44 @@ export const openaiVoices: VoiceType[] = [
},
];
+export const grokVoices: VoiceType[] = [
+ {
+ id: "Ara",
+ name: "Ara",
+ description: "Bright",
+ color: "bg-yellow-100",
+ provider: "grok",
+ },
+ {
+ id: "Eve",
+ name: "Eve",
+ description: "Upbeat",
+ color: "bg-orange-100",
+ provider: "grok",
+ },
+ {
+ id: "Leo",
+ name: "Leo",
+ description: "Confident",
+ color: "bg-blue-100",
+ provider: "grok",
+ },
+ {
+ id: "Rex",
+ name: "Rex",
+ description: "Direct",
+ color: "bg-gray-100",
+ provider: "grok",
+ },
+ {
+ id: "Sal",
+ name: "Sal",
+ description: "Warm",
+ color: "bg-green-100",
+ provider: "grok",
+ }
+];
+
export const geminiVoices: VoiceType[] = [
{
id: "Zephyr",
diff --git a/frontend-nextjs/types/types.d.ts b/frontend-nextjs/types/types.d.ts
index 0eb79c6..7436ed9 100644
--- a/frontend-nextjs/types/types.d.ts
+++ b/frontend-nextjs/types/types.d.ts
@@ -114,9 +114,24 @@ declare global {
description: string;
color: string;
emoji?: string;
+ }
+ | {
+ provider: "grok";
+ id: GrokVoice;
+ name: string;
+ description: string;
+ color: string;
+ emoji?: string;
};
- type ModelProvider = "openai" | "gemini" | "elevenlabs" | "hume";
+ type ModelProvider = "openai" | "gemini" | "grok" | "elevenlabs" | "hume";
+
+ type GrokVoice =
+ | "Ara"
+ | "Eve"
+ | "Leo"
+ | "Rex"
+ | "Sal";
type GeminiVoice =
| "Zephyr"
@@ -162,8 +177,8 @@ declare global {
// characters <-> personalities table
/**
- * oai_voice is for the name of any voice. both gemini and openai use this.
- * forgot to refactor this, update it for your setup
+ * oai_voice is for the name of any voice. grok, gemini and openai use this.
+ * I forgot to refactor this, please consider updating it for your setup :)
*/
interface IPersonality {
personality_id?: string;
diff --git a/server-deno/.env.example b/server-deno/.env.example
index ca5b6d0..9d24d0d 100644
--- a/server-deno/.env.example
+++ b/server-deno/.env.example
@@ -9,6 +9,7 @@ ENCRYPTION_KEY=
# Model provider API Keys
OPENAI_API_KEY=
GEMINI_API_KEY=
+XAI_API_KEY=
ELEVENLABS_API_KEY=
HUME_API_KEY=
diff --git a/server-deno/main.ts b/server-deno/main.ts
index 9ddeaa2..6da631d 100644
--- a/server-deno/main.ts
+++ b/server-deno/main.ts
@@ -17,6 +17,7 @@ import { connectToOpenAI } from "./models/openai.ts";
import { connectToGemini } from "./models/gemini.ts";
import { connectToElevenLabs } from "./models/elevenlabs.ts";
import { connectToHume } from "./models/hume.ts";
+import { connectToGrok } from "./models/grok.ts";
const server = createServer();
@@ -58,7 +59,7 @@ wss.on("connection", async (ws: WSWebSocket, payload: IPayload) => {
ws.send(
JSON.stringify({
type: "auth",
- volume_control: user.device?.volume ?? 20,
+ volume_control: user.device?.volume ?? 100,
is_ota: user.device?.is_ota ?? false,
is_reset: user.device?.is_reset ?? false,
pitch_factor: user.personality?.pitch_factor ?? 1,
@@ -84,6 +85,15 @@ wss.on("connection", async (ws: WSWebSocket, payload: IPayload) => {
systemPrompt,
);
break;
+ case "grok":
+ await connectToGrok(
+ ws,
+ payload,
+ connectionPcmFile,
+ firstMessage,
+ systemPrompt,
+ );
+ break;
case "elevenlabs":
const agentId = user.personality?.oai_voice ?? "";
@@ -145,7 +155,7 @@ server.on("upgrade", async (req, socket, head) => {
});
});
-if (isDev) { // deno run -A --env-file=.env main.ts
+if (isDev) { // RUN WITH: deno run -A --env-file=.env main.ts
const HOST = Deno.env.get("HOST") || "0.0.0.0";
const PORT = Deno.env.get("PORT") || "8000";
server.listen(Number(PORT), HOST, () => {
diff --git a/server-deno/models/grok.ts b/server-deno/models/grok.ts
new file mode 100644
index 0000000..c134ab4
--- /dev/null
+++ b/server-deno/models/grok.ts
@@ -0,0 +1,240 @@
+
+import { Buffer } from 'node:buffer';
+import type { RawData } from 'npm:@types/ws';
+import { WebSocket } from 'npm:ws';
+import { addConversation, getDeviceInfo } from '../supabase.ts';
+import { encoder, FRAME_SIZE, isDev, xaiApiKey } from '../utils.ts';
+
+const XAI_REALTIME_URL = 'wss://api.x.ai/v1/realtime';
+const DEFAULT_GROK_VOICE = 'Ara';
+
+export const connectToGrok = async (
+ ws: WebSocket,
+ payload: IPayload,
+ connectionPcmFile: Deno.FsFile | null,
+ firstMessage: string,
+ systemPrompt: string,
+) => {
+ const { user, supabase } = payload;
+
+ if (!xaiApiKey) {
+ throw new Error('XAI_API_KEY is not set');
+ }
+
+ const voice = user.personality?.oai_voice ?? DEFAULT_GROK_VOICE;
+
+ const grokWs = new WebSocket(XAI_REALTIME_URL, {
+ headers: {
+ Authorization: `Bearer ${xaiApiKey}`,
+ 'Content-Type': 'application/json',
+ },
+ });
+
+ let isConnected = false;
+ const messageQueue: RawData[] = [];
+
+ let createdSent = false;
+ let outputTranscript = '';
+ let audioRemainder = Buffer.alloc(0);
+
+ const sendResponseCreated = async () => {
+ try {
+ const device = await getDeviceInfo(supabase, user.user_id);
+ ws.send(
+ JSON.stringify({
+ type: 'server',
+ msg: 'RESPONSE.CREATED',
+ volume_control: device?.volume ?? 100,
+ }),
+ );
+ } catch {
+ ws.send(JSON.stringify({ type: 'server', msg: 'RESPONSE.CREATED' }));
+ }
+ };
+
+ const sendFirstMessage = () => {
+ if (!firstMessage) return;
+ grokWs.send(
+ JSON.stringify({
+ type: 'conversation.item.create',
+ item: {
+ type: 'message',
+ role: 'user',
+ content: [{ type: 'input_text', text: firstMessage }],
+ },
+ }),
+ );
+ grokWs.send(JSON.stringify({ type: 'response.create' }));
+ };
+
+ grokWs.on('open', () => {
+ isConnected = true;
+
+ grokWs.send(
+ JSON.stringify({
+ type: 'session.update',
+ session: {
+ voice,
+ instructions: systemPrompt,
+ turn_detection: { type: "server_vad" },
+ audio: {
+ input: { format: { type: 'audio/pcm', rate: 16000 } },
+ output: { format: { type: 'audio/pcm', rate: 24000 } },
+ },
+ },
+ }),
+ );
+
+ sendFirstMessage();
+
+ while (messageQueue.length > 0) {
+ const queuedMessage = messageQueue.shift();
+ if (queuedMessage) {
+ messageHandler(queuedMessage, true);
+ }
+ }
+ });
+
+ grokWs.on('message', async (data: Buffer) => {
+ let event: any;
+ try {
+ event = JSON.parse(data.toString('utf-8'));
+ } catch {
+ return;
+ }
+
+ try {
+ switch (event.type) {
+ case 'response.created':
+ if (!createdSent) {
+ await sendResponseCreated();
+ createdSent = true;
+ }
+ break;
+
+ case 'response.output_audio_transcript.delta':
+ if (typeof event.delta === 'string') {
+ outputTranscript += event.delta;
+ }
+ break;
+
+ case 'response.output_audio.delta':
+ if (typeof event.delta === 'string') {
+ const pcmChunk = Buffer.from(event.delta, 'base64');
+ audioRemainder = Buffer.concat([audioRemainder, pcmChunk]);
+
+ while (audioRemainder.length >= FRAME_SIZE) {
+ const frame = audioRemainder.subarray(0, FRAME_SIZE);
+ audioRemainder = audioRemainder.subarray(FRAME_SIZE);
+ try {
+ const packet = encoder.encode(frame);
+ ws.send(packet);
+ } catch {
+ // Skip frame
+ }
+ }
+ }
+ break;
+
+ case 'conversation.item.input_audio_transcription.completed':
+ if (typeof event.transcript === 'string' && event.transcript.length > 0) {
+ await addConversation(supabase, 'user', event.transcript, user);
+ }
+ break;
+
+ case 'input_audio_buffer.committed':
+ ws.send(JSON.stringify({ type: 'server', msg: 'AUDIO.COMMITTED' }));
+ break;
+
+ case 'response.done':
+ if (outputTranscript) {
+ await addConversation(supabase, 'assistant', outputTranscript, user);
+ outputTranscript = '';
+ }
+ ws.send(JSON.stringify({ type: 'server', msg: 'RESPONSE.COMPLETE' }));
+ createdSent = false;
+ break;
+
+ case 'error':
+ ws.send(JSON.stringify({ type: 'server', msg: 'RESPONSE.ERROR' }));
+ createdSent = false;
+ break;
+ }
+ } catch (err) {
+ console.error('Error processing Grok event:', err);
+ ws.send(JSON.stringify({ type: 'server', msg: 'RESPONSE.ERROR' }));
+ createdSent = false;
+ }
+ });
+
+ grokWs.on('close', () => {
+ ws.close();
+ });
+
+ grokWs.on('error', (error: any) => {
+ console.error('Grok WebSocket error:', error);
+ ws.send(JSON.stringify({ type: 'server', msg: 'RESPONSE.ERROR' }));
+ });
+
+ const messageHandler = async (data: RawData, isBinary: boolean) => {
+ if (isBinary) {
+ const base64Data = (data as Buffer).toString('base64');
+ grokWs.send(JSON.stringify({ type: 'input_audio_buffer.append', audio: base64Data }));
+
+ if (isDev && connectionPcmFile) {
+ await connectionPcmFile.write(data as Buffer);
+ }
+ return;
+ }
+
+ let message: any;
+ try {
+ message = JSON.parse((data as Buffer).toString('utf-8'));
+ } catch {
+ return;
+ }
+
+ if (message?.type !== 'instruction') return;
+
+ if (message.msg === 'end_of_speech') {
+ grokWs.send(JSON.stringify({ type: 'input_audio_buffer.commit' }));
+ grokWs.send(JSON.stringify({ type: 'response.create' }));
+ grokWs.send(JSON.stringify({ type: 'input_audio_buffer.clear' }));
+ } else if (message.msg === 'INTERRUPT') {
+ grokWs.send(JSON.stringify({ type: 'input_audio_buffer.clear' }));
+ }
+ };
+
+ ws.on('message', (data: RawData, isBinary: boolean) => {
+ if (!isConnected) {
+ messageQueue.push(data);
+ } else {
+ messageHandler(data, isBinary);
+ }
+ });
+
+ ws.on('error', (error: any) => {
+ console.error('ESP32 WebSocket error:', error);
+ grokWs.close();
+ });
+
+ ws.on('close', async (code: number, reason: string) => {
+ console.log(`ESP32 WebSocket closed with code ${code}, reason: ${reason}`);
+ grokWs.close();
+ if (isDev && connectionPcmFile) {
+ connectionPcmFile.close();
+ }
+ });
+
+ return new Promise((resolve, reject) => {
+ const timeout = setTimeout(() => reject(new Error('Grok connection timeout')), 10000);
+ grokWs.on('open', () => {
+ clearTimeout(timeout);
+ resolve();
+ });
+ grokWs.on('error', (error: any) => {
+ clearTimeout(timeout);
+ reject(error);
+ });
+ });
+};
diff --git a/server-deno/types.d.ts b/server-deno/types.d.ts
index 8c3f0c4..28cbce8 100644
--- a/server-deno/types.d.ts
+++ b/server-deno/types.d.ts
@@ -26,7 +26,14 @@ declare global {
user_code: string;
}
- type ModelProvider = "openai" | "gemini" | "elevenlabs" | "hume";
+ type ModelProvider = "openai" | "gemini" | "elevenlabs" | "hume" | "grok";
+
+ type GrokVoice =
+ | "Ara"
+ | "Eve"
+ | "Leo"
+ | "Rex"
+ | "Sal"
type GeminiVoice =
| "Zephyr"
diff --git a/server-deno/utils.ts b/server-deno/utils.ts
index 0aa62bc..a9a3c7a 100644
--- a/server-deno/utils.ts
+++ b/server-deno/utils.ts
@@ -28,6 +28,7 @@ export const openaiApiKey = Deno.env.get("OPENAI_API_KEY");
export const geminiApiKey = Deno.env.get("GEMINI_API_KEY");
export const elevenLabsApiKey = Deno.env.get("ELEVENLABS_API_KEY");
export const humeApiKey = Deno.env.get('HUME_API_KEY');
+export const xaiApiKey = Deno.env.get('XAI_API_KEY');
export { encoder, FRAME_SIZE };
diff --git a/supabase/migrations/20250611011151_add_provider.sql b/supabase/migrations/20250611011151_add_provider.sql
index a568cf9..350638d 100644
--- a/supabase/migrations/20250611011151_add_provider.sql
+++ b/supabase/migrations/20250611011151_add_provider.sql
@@ -1,6 +1,6 @@
-- Add provider column to personalities table
ALTER TABLE personalities
-ADD COLUMN provider TEXT CHECK (provider IN ('openai', 'gemini', 'elevenlabs', 'hume')) DEFAULT 'openai';
+ADD COLUMN provider TEXT CHECK (provider IN ('openai', 'gemini', 'grok', 'elevenlabs', 'hume')) DEFAULT 'openai';
-- Update existing records to have a default provider
UPDATE personalities