diff --git a/README.md b/README.md index d003a7c..0366321 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Elato is heading to Kickstarter! After a year of prototyping and shipping early

-[🏠 Homepage](https://elatoai.com/) | [🚀 Kickstarter](https://www.kickstarter.com/projects/elatoai/elato-make-toys-talk-with-ai-voices) +[🚀 Kickstarter](https://www.kickstarter.com/projects/elatoai/elato-make-toys-talk-with-ai-voices) |[🏠 Homepage](https://elatoai.com/)

@@ -45,6 +45,8 @@ Realtime AI Speech powered by **OpenAI Realtime API**, **Eleven Labs AI Agents** +
+
[![Discord](https://img.shields.io/badge/Discord-105_members-5865F2?style=flat&logo=discord&logoColor=white)](https://discord.gg/KJWxDPBRUj) diff --git a/frontend-nextjs/app/components/CreateCharacter/BuildDashboard.tsx b/frontend-nextjs/app/components/CreateCharacter/BuildDashboard.tsx index 55a3481..554e724 100644 --- a/frontend-nextjs/app/components/CreateCharacter/BuildDashboard.tsx +++ b/frontend-nextjs/app/components/CreateCharacter/BuildDashboard.tsx @@ -326,11 +326,11 @@ const SettingsDashboard: React.FC = ({ {/* ElevenLabs Alternative */}
-
+
- +

- Create an ElevenLabs character with custom voice agents + Create a voice clone with Eleven Labs Conversational AI Agents or Hume EVI4

diff --git a/frontend-nextjs/app/components/Nav/LeftNavbarButtons.tsx b/frontend-nextjs/app/components/Nav/LeftNavbarButtons.tsx index 30e3972..bd96185 100644 --- a/frontend-nextjs/app/components/Nav/LeftNavbarButtons.tsx +++ b/frontend-nextjs/app/components/Nav/LeftNavbarButtons.tsx @@ -10,6 +10,7 @@ import { DropdownMenuLabel, } from "@/components/ui/dropdown-menu"; import { usePathname } from "next/navigation"; +import Image from "next/image"; const ICON_SIZE = 22; @@ -44,10 +45,10 @@ export default function LeftNavbarButtons({ user }: LeftNavbarButtonsProps) { Main Website -

+

Elato

- 👾 + Elato Logo
diff --git a/frontend-nextjs/app/layout.tsx b/frontend-nextjs/app/layout.tsx index 79c8aab..d556cfc 100644 --- a/frontend-nextjs/app/layout.tsx +++ b/frontend-nextjs/app/layout.tsx @@ -10,6 +10,7 @@ import { Inter_Tight, Borel, Silkscreen, + Luckiest_Guy, } from "next/font/google"; import "./globals.css"; import { createClient } from "@/utils/supabase/server"; @@ -91,7 +92,14 @@ const silkscreen = Silkscreen({ weight: ["400"], }); -const fonts = `${inter.variable} ${inter_tight.variable} ${baloo2.variable} ${comicNeue.variable} ${quicksand.variable} ${fredoka.variable} ${lora.variable} ${karla.variable} ${borel.variable} ${silkscreen.variable}`; +const luckiestGuy = Luckiest_Guy({ + subsets: ["latin"], + display: "swap", + variable: "--font-luckiest-guy", + weight: ["400"], +}); + +const fonts = `${inter.variable} ${inter_tight.variable} ${baloo2.variable} ${comicNeue.variable} ${quicksand.variable} ${fredoka.variable} ${lora.variable} ${karla.variable} ${borel.variable} ${silkscreen.variable} ${luckiestGuy.variable}`; const defaultUrl = process.env.VERCEL_URL ? `https://${process.env.VERCEL_URL}` diff --git a/frontend-nextjs/app/page.tsx b/frontend-nextjs/app/page.tsx index f5347ac..e0f0ff6 100644 --- a/frontend-nextjs/app/page.tsx +++ b/frontend-nextjs/app/page.tsx @@ -1,5 +1,5 @@ import Link from "next/link" -import { ChevronRight, Zap, Star, Home } from "lucide-react" +import { ChevronRight, Zap, Star, Home, ArrowUpRight } from "lucide-react" import { Button } from "@/components/ui/button" import { DEVICE_COST, SUBSCRIPTION_COST } from "@/lib/data"; import { createClient } from "@/utils/supabase/server" @@ -10,7 +10,7 @@ import ProductsSection from "./components/LandingPage/ProductsSection"; import Image from "next/image"; import { fetchGithubStars } from "./actions"; import YoutubeDemo from "./components/LandingPage/YoutubeDemo"; -import { PricingSection } from "./components/LandingPage/PricingSection"; +import { kickstarterLink } from "@/lib/data"; export default async function LandingPage() { const supabase = createClient(); @@ -26,6 +26,18 @@ export default async function LandingPage() {
+

+ Elato Logo +Elato +

+

@@ -80,18 +92,7 @@ export default async function LandingPage() { {stars}

- - {/*
- - - - - - -
*/} - +
@@ -171,37 +172,6 @@ export default async function LandingPage() { {/* Create Character Showcase */} - - {/* Pricing */} -
-
-
-
-

Our Pricing

- -
-
-
-
- {/* FAQ */} - {/*
- -
*/} - - - {/* CTA */} - {/*
-
-

Ready to Bring Your Toys to Life?

-

- Order your Elato device today and watch the magic happen! -

- -

First month subscription FREE, then just ${SUBSCRIPTION_COST}/month (or use your own OpenAI API key)

-
-
*/}
) diff --git a/frontend-nextjs/lib/data.ts b/frontend-nextjs/lib/data.ts index e49055d..6e9f31e 100644 --- a/frontend-nextjs/lib/data.ts +++ b/frontend-nextjs/lib/data.ts @@ -10,6 +10,7 @@ export const tiktokLink = "https://www.tiktok.com/@elatoai"; export const githubPublicLink = "https://github.com/akdeb/ElatoAI"; export const businessDemoLink = "https://calendly.com/akadeb/elato-ai-demo"; export const feedbackFormLink = "https://forms.gle/2QmukEG2FXNwBdee7"; +export const kickstarterLink = "https://www.kickstarter.com/projects/elatoai/elato-make-toys-talk-with-ai-voices"; export const r2Url = "https://pub-cd736d767add4fecafea55c239c28497.r2.dev"; export const r2UrlAudio = "https://pub-5fab8e2596c544cd8dc3e20812be2168.r2.dev"; diff --git a/frontend-nextjs/public/logos/elato.png b/frontend-nextjs/public/logos/elato.png new file mode 100644 index 0000000..6c24158 Binary files /dev/null and b/frontend-nextjs/public/logos/elato.png differ diff --git a/frontend-nextjs/public/logos/ks.png b/frontend-nextjs/public/logos/ks.png new file mode 100644 index 0000000..90facd8 Binary files /dev/null and b/frontend-nextjs/public/logos/ks.png differ diff --git a/frontend-nextjs/tailwind.config.ts b/frontend-nextjs/tailwind.config.ts index fb42b0e..598525f 100644 --- a/frontend-nextjs/tailwind.config.ts +++ b/frontend-nextjs/tailwind.config.ts @@ -116,6 +116,7 @@ const config = { karla: ["var(--font-karla)"], borel: ["var(--font-borel)"], silkscreen: ["var(--font-silkscreen)"], + luckiestGuy: ["var(--font-luckiest-guy)"], }, animation: { "accordion-down": "accordion-down 0.2s ease-out", diff --git a/frontend-nextjs/types/types.d.ts b/frontend-nextjs/types/types.d.ts index a516514..0eb79c6 100644 --- a/frontend-nextjs/types/types.d.ts +++ b/frontend-nextjs/types/types.d.ts @@ -116,7 +116,7 @@ declare global { emoji?: string; }; - type ModelProvider = "openai" | "gemini" | "elevenlabs"; + type ModelProvider = "openai" | "gemini" | "elevenlabs" | "hume"; type GeminiVoice = | "Zephyr" diff --git a/server-deno/.env.example b/server-deno/.env.example index 2bf18b5..ca5b6d0 100644 --- a/server-deno/.env.example +++ b/server-deno/.env.example @@ -6,10 +6,11 @@ JWT_SECRET_KEY=super-secret-jwt-token-with-at-least-32-characters-long # Encryption Key (useful for encrypting secrets in the database) ENCRYPTION_KEY= -# OpenAI API Key +# Model provider API Keys OPENAI_API_KEY= GEMINI_API_KEY= ELEVENLABS_API_KEY= +HUME_API_KEY= # local development HOST=0.0.0.0 diff --git a/server-deno/models/hume.ts b/server-deno/models/hume.ts new file mode 100644 index 0000000..8c65d69 --- /dev/null +++ b/server-deno/models/hume.ts @@ -0,0 +1,324 @@ +import { Buffer } from 'node:buffer'; +import type { RawData } from 'npm:@types/ws'; +import { WebSocket } from 'npm:ws'; +import { addConversation, getDeviceInfo } from '../supabase.ts'; +import { encoder, FRAME_SIZE, isDev, humeApiKey, downsamplePcm, extractPcmFromWav, boostLimitPCM16LEInPlace } from '../utils.ts'; + +export const connectToHume = ({ + ws, + payload, + connectionPcmFile, + firstMessage, + systemPrompt, + closeHandler, +}: { + ws: WebSocket; + payload: IPayload; + connectionPcmFile: Deno.FsFile | null; + firstMessage: string; + systemPrompt: string; + closeHandler: () => Promise; +}) => { + const { user, supabase } = payload; + const { personality } = user; + + console.log(`Connecting to Hume with key "${humeApiKey?.slice(0, 3)}..."`); + + // Build Hume WebSocket URL + const queryParams = new URLSearchParams({ + api_key: humeApiKey!, + config_id: personality!.voice?.config.config_id, + }); + + const humeWsUrl = `wss://api.hume.ai/v0/evi/chat?${queryParams.toString()}`; + + console.log(`Connecting to Hume WebSocket at: ${humeWsUrl.replace(humeApiKey!, 'API_KEY_HIDDEN')}`); + const humeWs = new WebSocket(humeWsUrl); + + let isConnected = false; + const messageQueue: RawData[] = []; + let createdSent = false; + + // Handle Hume WebSocket connection + humeWs.on('open', () => { + console.log('✅ Connected to Hume WebSocket API successfully'); + isConnected = true; + + // Configure Hume session settings for input audio format + // This tells Hume what format we're sending TO them, not what we want back + humeWs.send(JSON.stringify({ + type: 'session_settings', + audio: { + encoding: "linear16", + channels: 1, + sample_rate: 16000, + }, + system_prompt: systemPrompt, + })); + + // Send simple first message if provided + humeWs.send(JSON.stringify({ + type: 'user_input', + text: firstMessage, + })); + + // Process queued messages + while (messageQueue.length > 0) { + const queuedMessage = messageQueue.shift(); + if (queuedMessage) { + messageHandler(queuedMessage, true); // Assume binary for queued audio + } + } + }); + + // Handle messages from Hume + humeWs.on('message', async (data: Buffer) => { + try { + const message: HumeMessage = JSON.parse(data.toString()); + console.log(`Received from Hume: ${message.type}`); + + switch (message.type) { + case 'assistant_end': + + // Send RESPONSE.COMPLETE when assistant message is done + ws.send(JSON.stringify({ + type: 'server', + msg: 'RESPONSE.COMPLETE', + })); + + // Reset for next turn + createdSent = false; + + break; + + case 'assistant_message': + const assistantMsg = message as HumeAssistantMessage; + + // Store conversation in database + await addConversation( + supabase, + 'assistant', + assistantMsg.message.content, + user, + ); + + break; + + case 'audio_output': + const audioMsg = message as HumeAudioOutput; + + // Send RESPONSE.CREATED before first audio chunk + if (!createdSent) { + try { + const device = await getDeviceInfo(supabase, user.user_id); + + if (device) { + ws.send(JSON.stringify({ + type: 'server', + msg: 'RESPONSE.CREATED', + volume_control: device.volume ?? 70, + })); + } else { + ws.send(JSON.stringify({ + type: 'server', + msg: 'RESPONSE.CREATED', + })); + } + } catch (error) { + console.error('Error fetching device info:', error); + ws.send(JSON.stringify({ + type: 'server', + msg: 'RESPONSE.CREATED', + })); + } + createdSent = true; + } + + try { + // Decode base64 audio data from Hume (this is a WAV file, not raw PCM!) + const wavBuffer = Buffer.from(audioMsg.data, 'base64'); + + // Extract PCM data from WAV file + const pcmData = extractPcmFromWav(wavBuffer); + + if (!pcmData) { + console.error('Failed to extract PCM data from WAV'); + return; + } + + // Downsample from 48kHz to 24kHz to match our system + const downsampledPcm = downsamplePcm(pcmData, 48000, 24000); + boostLimitPCM16LEInPlace(downsampledPcm, /*gainDb=*/6.0, /*ceiling=*/0.89); + + + // Process the downsampled PCM data in frames + let audioBuffer = downsampledPcm; + + // Process complete frames using the standard FRAME_SIZE + while (audioBuffer.length >= FRAME_SIZE) { + const frame = audioBuffer.subarray(0, FRAME_SIZE); + audioBuffer = audioBuffer.subarray(FRAME_SIZE); + + try { + const packet = encoder.encode(frame); + ws.send(packet); + } catch (err) { + console.error('Hume Opus encode failed:', err); + } + } + + // Store remaining bytes for next chunk (if any) + if (audioBuffer.length > 0) { + console.log(`Hume audio remainder: ${audioBuffer.length} bytes`); + } + } catch (audioError) { + console.error('Error processing Hume audio output:', audioError); + } + break; + + case 'chat_metadata': + console.log('Chat metadata received:', message); + break; + + case 'user_message': + console.log('User message acknowledged:', message); + await addConversation( + supabase, + 'user', + message.message.content, + user, + ); + break; + + case 'user_input': + // This is an echo of our own input, we can log it but don't need to store it again + console.log('User input acknowledged by Hume'); + break; + + case 'error': + const errorMsg = message as HumeError; + console.error(`Hume error: ${errorMsg.code} - ${errorMsg.message}`); + + ws.send(JSON.stringify({ + type: 'server', + msg: 'RESPONSE.ERROR', + error: errorMsg.message, + })); + break; + + case 'session_created': + console.log('Hume session created'); + ws.send(JSON.stringify({ + type: 'server', + msg: 'SESSION.CREATED', + })); + break; + + case 'session_ended': + console.log('Hume session ended'); + ws.send(JSON.stringify({ + type: 'server', + msg: 'SESSION.END', + })); + break; + + default: + console.log(`Unhandled Hume message type: ${message.type}`); + } + } catch (error) { + console.error('Error processing Hume message:', error); + } + }); + + humeWs.on('close', (code: number, reason: Buffer) => { + console.log(`Hume WebSocket closed: ${code} - ${reason.toString()}`); + ws.send(JSON.stringify({ + type: 'server', + msg: 'SESSION.END', + })); + isConnected = false; + ws.close(); + }); + + humeWs.on('error', (error: Error) => { + console.error('Hume WebSocket error:', error); + console.error('Error details:', { + message: error.message, + stack: error.stack, + name: error.name + }); + ws.send(JSON.stringify({ + type: 'server', + msg: 'RESPONSE.ERROR', + error: 'Connection to Hume failed', + })); + }); + + // Handle messages from ESP32 client + const messageHandler = async (data: RawData, isBinary: boolean) => { + try { + if (isBinary) { + // Handle audio data from ESP32 + const base64Audio = data.toString('base64'); + + const audioMessage: HumeAudioInput = { + type: 'audio_input', + data: base64Audio, + }; + + if (isConnected) { + humeWs.send(JSON.stringify(audioMessage)); + } + + // Write to debug file if enabled + if (isDev && connectionPcmFile) { + await connectionPcmFile.write(data as Buffer); + } + } + } catch (error) { + console.error('Error handling message:', error); + } + }; + + // Set up ESP32 WebSocket handlers + ws.on('message', (data: RawData, isBinary: boolean) => { + if (!isConnected) { + messageQueue.push(data); + } else { + messageHandler(data, isBinary); + } + }); + + ws.on('error', (error: Error) => { + console.error('ESP32 WebSocket error:', error); + humeWs.close(); + }); + + ws.on('close', async (code: number, reason: string) => { + console.log(`ESP32 WebSocket closed: ${code} - ${reason}`); + humeWs.close(); + await closeHandler(); + + if (isDev && connectionPcmFile) { + connectionPcmFile.close(); + console.log('Closed debug audio file'); + } + }); + + // Wait for Hume connection to be established + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + reject(new Error('Hume connection timeout')); + }, 10000); + + humeWs.on('open', () => { + clearTimeout(timeout); + resolve(); + }); + + humeWs.on('error', (error) => { + clearTimeout(timeout); + reject(error); + }); + }); +}; diff --git a/server-deno/types.d.ts b/server-deno/types.d.ts index 874e2f8..8c3f0c4 100644 --- a/server-deno/types.d.ts +++ b/server-deno/types.d.ts @@ -26,7 +26,7 @@ declare global { user_code: string; } - type ModelProvider = "openai" | "gemini" | "elevenlabs"; + type ModelProvider = "openai" | "gemini" | "elevenlabs" | "hume"; type GeminiVoice = | "Zephyr" @@ -142,4 +142,54 @@ declare global { device_id: string; device?: IDevice; } + + // Hume EVI WebSocket message types + interface HumeMessage { + type: string; + [key: string]: any; + } + + interface HumeAudioInput { + type: 'audio_input'; + data: string; // base64 encoded audio + } + + interface HumeUserInput { + type: 'user_input'; + text: string; + } + + interface HumeAssistantInput { + type: 'assistant_input'; + text: string; + } + + interface HumeSessionSettings { + type: 'session_settings'; + [key: string]: any; + } + + interface HumeAssistantMessage { + type: 'assistant_message'; + message: { + role: 'assistant'; + content: string; + }; + models: { + prosody?: { + scores: Record; + }; + }; + } + + interface HumeAudioOutput { + type: 'audio_output'; + data: string; // base64 encoded audio + } + + interface HumeError { + type: 'error'; + code: string; + message: string; + } } diff --git a/server-deno/utils.ts b/server-deno/utils.ts index be46e07..0aa62bc 100644 --- a/server-deno/utils.ts +++ b/server-deno/utils.ts @@ -27,6 +27,7 @@ encoder.bitrate = 12000; export const openaiApiKey = Deno.env.get("OPENAI_API_KEY"); export const geminiApiKey = Deno.env.get("GEMINI_API_KEY"); export const elevenLabsApiKey = Deno.env.get("ELEVENLABS_API_KEY"); +export const humeApiKey = Deno.env.get('HUME_API_KEY'); export { encoder, FRAME_SIZE }; @@ -82,3 +83,103 @@ export function decryptSecret( decrypted += decipher.final("utf8"); return decrypted; } + + +export function boostLimitPCM16LEInPlace( + pcmBytes: Uint8Array, // Buffer is fine (subclass of Uint8Array) + gainDb = 6.0, + ceiling = 0.89, // ≈ −1 dBFS +): void { + const dv = new DataView(pcmBytes.buffer, pcmBytes.byteOffset, pcmBytes.byteLength); + const g = Math.pow(10, gainDb / 20); + + // Pass 1: measure post-gain peak + let peak = 0; + for (let i = 0; i < dv.byteLength; i += 2) { + const s = dv.getInt16(i, true) / 32768; // 16-bit LE → [-1,1] + const y = s * g; + const a = Math.abs(y); + if (a > peak) peak = a; + } + const scale = peak > ceiling && peak > 0 ? (ceiling / peak) : 1; + + // Pass 2: apply gain + scale + gentle soft-clip + for (let i = 0; i < dv.byteLength; i += 2) { + let y = (dv.getInt16(i, true) / 32768) * g * scale; + // cubic soft-clip (tanh-ish) for nicer peaks + const y2 = y * y; + y = 0.5 * y * (3 - y2); + if (y > 0.999) y = 0.999; + if (y < -0.999) y = -0.999; + dv.setInt16(i, (y * 32767) | 0, true); + } +} + + +// Function to downsample PCM audio from 48kHz to 24kHz +export function downsamplePcm(pcmBuffer: Buffer, fromRate: number, toRate: number): Buffer { + if (fromRate === toRate) { + return pcmBuffer; + } + + const ratio = fromRate / toRate; + const inputSamples = pcmBuffer.length / 2; // 16-bit = 2 bytes per sample + const outputSamples = Math.floor(inputSamples / ratio); + const outputBuffer = Buffer.alloc(outputSamples * 2); + + for (let i = 0; i < outputSamples; i++) { + const sourceIndex = Math.floor(i * ratio) * 2; + const sample = pcmBuffer.readInt16LE(sourceIndex); + outputBuffer.writeInt16LE(sample, i * 2); + } + + return outputBuffer; +} + + +// Function to extract PCM data from WAV file +export function extractPcmFromWav(wavBuffer: Buffer): Buffer | null { + try { + // Check minimum WAV header size + if (wavBuffer.length < 44) { + console.error('WAV file too small'); + return null; + } + + // Verify RIFF header + const riffHeader = wavBuffer.subarray(0, 4).toString('ascii'); + if (riffHeader !== 'RIFF') { + console.error('Not a RIFF file'); + return null; + } + + // Verify WAVE format + const waveHeader = wavBuffer.subarray(8, 12).toString('ascii'); + if (waveHeader !== 'WAVE') { + console.error('Not a WAVE file'); + return null; + } + + // Find the data chunk + let offset = 12; + while (offset < wavBuffer.length - 8) { + const chunkId = wavBuffer.subarray(offset, offset + 4).toString('ascii'); + const chunkSize = wavBuffer.readUInt32LE(offset + 4); + + if (chunkId === 'data') { + // Found data chunk, extract PCM data + const pcmData = wavBuffer.subarray(offset + 8, offset + 8 + chunkSize); + return pcmData; + } + + // Move to next chunk + offset += 8 + chunkSize; + } + + console.error('No data chunk found in WAV file'); + return null; + } catch (error) { + console.error('Error extracting PCM from WAV:', error); + return null; + } +} \ No newline at end of file diff --git a/supabase/migrations/20250611011151_add_provider.sql b/supabase/migrations/20250611011151_add_provider.sql index 041cde8..a568cf9 100644 --- a/supabase/migrations/20250611011151_add_provider.sql +++ b/supabase/migrations/20250611011151_add_provider.sql @@ -1,6 +1,6 @@ -- Add provider column to personalities table ALTER TABLE personalities -ADD COLUMN provider TEXT CHECK (provider IN ('openai', 'gemini', 'elevenlabs')) DEFAULT 'openai'; +ADD COLUMN provider TEXT CHECK (provider IN ('openai', 'gemini', 'elevenlabs', 'hume')) DEFAULT 'openai'; -- Update existing records to have a default provider UPDATE personalities