From 852a5e0ebfc9e025d1788682a51dcef50a7322b3 Mon Sep 17 00:00:00 2001 From: akdeb Date: Mon, 9 Jun 2025 22:42:15 +0100 Subject: [PATCH] gemini-update --- .../CreateCharacter/BuildDashboard.tsx | 8 +- frontend-nextjs/types/types.d.ts | 37 +- server-deno/deno.json | 3 + server-deno/deno.lock | 607 ++++++++++++++++++ server-deno/google.ts | 388 +++++++++++ server-deno/main.ts | 439 +------------ server-deno/models/gemini.ts | 311 +++++++++ server-deno/models/openai.ts | 405 ++++++++++++ server-deno/utils.ts | 27 + 9 files changed, 1786 insertions(+), 439 deletions(-) create mode 100644 server-deno/google.ts create mode 100644 server-deno/models/gemini.ts create mode 100644 server-deno/models/openai.ts diff --git a/frontend-nextjs/app/components/CreateCharacter/BuildDashboard.tsx b/frontend-nextjs/app/components/CreateCharacter/BuildDashboard.tsx index 175527c..7bf102f 100644 --- a/frontend-nextjs/app/components/CreateCharacter/BuildDashboard.tsx +++ b/frontend-nextjs/app/components/CreateCharacter/BuildDashboard.tsx @@ -8,8 +8,7 @@ import { Button } from "@/components/ui/button"; import { Label } from "@/components/ui/label"; import { Input } from "@/components/ui/input"; import { Textarea } from "@/components/ui/textarea"; -import { ArrowLeft, ArrowRight, Check, Mic, Volume2 } from "lucide-react"; -import Twemoji from "react-twemoji"; +import { ArrowLeft, ArrowRight, Check, Volume2 } from "lucide-react"; import { createPersonality } from "@/db/personalities"; import { v4 as uuidv4 } from 'uuid'; import { toast } from "@/components/ui/use-toast"; @@ -26,6 +25,7 @@ interface SettingsDashboardProps { } const formSchema = z.object({ + provider: z.enum(["openai", "gemini"]), title: z.string().min(2, "Minimum 2 characters").max(50, "Maximum 50 characters"), description: z.string().min(50, "Minimum 50 characters").max(200, "Maximum 200 characters"), prompt: z.string().min(100, "Minimum 100 characters").max(1000, "Maximum 1000 characters"), @@ -49,6 +49,7 @@ const SettingsDashboard: React.FC = ({ const [isSubmitting, setIsSubmitting] = useState(false); const [formData, setFormData] = useState({ + provider: 'openai' as ModelProvider, title: '', description: '', prompt: '', @@ -63,10 +64,8 @@ const SettingsDashboard: React.FC = ({ const [touchedFields, setTouchedFields] = useState>({}); - const [formErrors, setFormErrors] = useState>>({}); - const [previewingVoice, setPreviewingVoice] = useState(null); const handleBlur = (field: keyof FormData | 'features') => { @@ -167,6 +166,7 @@ const SettingsDashboard: React.FC = ({ try { const personality = await createPersonality(supabase, selectedUser.user_id, { + provider: formData.provider, title: formData.title, subtitle: "", character_prompt: formData.prompt, diff --git a/frontend-nextjs/types/types.d.ts b/frontend-nextjs/types/types.d.ts index 7808879..f5a27f5 100644 --- a/frontend-nextjs/types/types.d.ts +++ b/frontend-nextjs/types/types.d.ts @@ -98,7 +98,39 @@ declare global { is_sensitive: boolean; } - type TTSModel = "FISH" | "AZURE"; + type ModelProvider = "openai" | "gemini"; + + type GeminiVoice = + | "Zephyr" + | "Puck" + | "Charon" + | "Kore" + | "Fenrir" + | "Leda" + | "Orus" + | "Aoede" + | "Callirrhoe" + | "Autonoe" + | "Enceladus" + | "Iapetus" + | "Umbriel" + | "Algieba" + | "Despina" + | "Erinome" + | "Algenib" + | "Rasalgethi" + | "Laomedeia" + | "Achernar" + | "Alnilam" + | "Schedar" + | "Gacrux" + | "Pulcherrima" + | "Achird" + | "Zubenelgenubi" + | "Vindemiatrix" + | "Sadachbia" + | "Sadaltager" + | "Sulafat"; type OaiVoice = | "ash" @@ -117,7 +149,8 @@ declare global { is_child_voice: boolean; is_story: boolean; key: string; - oai_voice: OaiVoice; + oai_voice: OaiVoice | GeminiVoice; + provider: ModelProvider; title: string; subtitle: string; short_description: string; diff --git a/server-deno/deno.json b/server-deno/deno.json index e134e5a..13b4c3a 100644 --- a/server-deno/deno.json +++ b/server-deno/deno.json @@ -18,6 +18,9 @@ "proseWrap": "preserve" } }, + "compilerOptions": { + "lib": ["deno.window", "deno.ns"] + }, "lint": { "files": { "include": ["./**/*.ts"], diff --git a/server-deno/deno.lock b/server-deno/deno.lock index fa9c5f2..3a6b796 100644 --- a/server-deno/deno.lock +++ b/server-deno/deno.lock @@ -4,12 +4,16 @@ "jsr:@supabase/supabase-js@2": "2.48.1", "jsr:@supabase/supabase-js@^2.48.1": "2.48.1", "npm:@evan/opus@^1.0.3": "1.0.3", + "npm:@google/genai@*": "1.2.0_@modelcontextprotocol+sdk@1.12.1__express@5.1.0__zod@3.25.36_zod@3.25.36", "npm:@supabase/auth-js@2.67.3": "2.67.3", "npm:@supabase/functions-js@2.4.4": "2.4.4", "npm:@supabase/node-fetch@2.6.15": "2.6.15", "npm:@supabase/postgrest-js@1.18.1": "1.18.1", "npm:@supabase/realtime-js@2.11.2": "2.11.2", "npm:@supabase/storage-js@2.7.1": "2.7.1", + "npm:@types/node@*": "22.13.0", + "npm:@types/ws@*": "8.5.14", + "npm:wavefile@*": "11.0.0", "npm:ws@*": "8.18.0" }, "jsr": { @@ -29,6 +33,32 @@ "@evan/opus@1.0.3": { "integrity": "sha512-ADfwIad83W1LuiZDNMjDMDNQRsPz8rj5xnDLExhVWTnA5wGJCLntOn12Ir5rxGBqdfo10QhnNVdd2+gXiZ6xCg==" }, + "@google/genai@1.2.0_@modelcontextprotocol+sdk@1.12.1__express@5.1.0__zod@3.25.36_zod@3.25.36": { + "integrity": "sha512-jAYhzG7UrLJxeQr5cfL87O4AcyEu+E7AA7MJDYPrDWI3Hl25EAdx5mA4AuNfSXZO31LnSyrIkEzkmJOAdlPKOA==", + "dependencies": [ + "@modelcontextprotocol/sdk", + "google-auth-library", + "ws", + "zod", + "zod-to-json-schema" + ] + }, + "@modelcontextprotocol/sdk@1.12.1_express@5.1.0_zod@3.25.36": { + "integrity": "sha512-KG1CZhZfWg+u8pxeM/mByJDScJSrjjxLc8fwQqbsS8xCjBmQfMNEBTotYdNanKekepnfRI85GtgQlctLFpcYPw==", + "dependencies": [ + "ajv", + "content-type", + "cors", + "cross-spawn", + "eventsource", + "express", + "express-rate-limit", + "pkce-challenge", + "raw-body", + "zod", + "zod-to-json-schema" + ] + }, "@supabase/auth-js@2.67.3": { "integrity": "sha512-NJDaW8yXs49xMvWVOkSIr8j46jf+tYHV0wHhrwOaLLMZSFO4g6kKAf+MfzQ2RaD06OCUkUHIzctLAxjTgEVpzw==", "dependencies": [ @@ -83,12 +113,570 @@ "@types/node" ] }, + "accepts@2.0.0": { + "integrity": "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==", + "dependencies": [ + "mime-types", + "negotiator" + ] + }, + "agent-base@7.1.3": { + "integrity": "sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==" + }, + "ajv@6.12.6": { + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dependencies": [ + "fast-deep-equal", + "fast-json-stable-stringify", + "json-schema-traverse", + "uri-js" + ] + }, + "base64-js@1.5.1": { + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==" + }, + "bignumber.js@9.3.0": { + "integrity": "sha512-EM7aMFTXbptt/wZdMlBv2t8IViwQL+h6SLHosp8Yf0dqJMTnY6iL32opnAB6kAdL0SZPuvcAzFr31o0c/R3/RA==" + }, + "body-parser@2.2.0": { + "integrity": "sha512-02qvAaxv8tp7fBa/mw1ga98OGm+eCbqzJOKoRt70sLmfEEi+jyBYVTDGfCL/k06/4EMk/z01gCe7HoCH/f2LTg==", + "dependencies": [ + "bytes", + "content-type", + "debug", + "http-errors", + "iconv-lite", + "on-finished", + "qs", + "raw-body", + "type-is" + ] + }, + "buffer-equal-constant-time@1.0.1": { + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==" + }, + "bytes@3.1.2": { + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==" + }, + "call-bind-apply-helpers@1.0.2": { + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "dependencies": [ + "es-errors", + "function-bind" + ] + }, + "call-bound@1.0.4": { + "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", + "dependencies": [ + "call-bind-apply-helpers", + "get-intrinsic" + ] + }, + "content-disposition@1.0.0": { + "integrity": "sha512-Au9nRL8VNUut/XSzbQA38+M78dzP4D+eqg3gfJHMIHHYa3bg067xj1KxMUWj+VULbiZMowKngFFbKczUrNJ1mg==", + "dependencies": [ + "safe-buffer" + ] + }, + "content-type@1.0.5": { + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==" + }, + "cookie-signature@1.2.2": { + "integrity": "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==" + }, + "cookie@0.7.2": { + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==" + }, + "cors@2.8.5": { + "integrity": "sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g==", + "dependencies": [ + "object-assign", + "vary" + ] + }, + "cross-spawn@7.0.6": { + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dependencies": [ + "path-key", + "shebang-command", + "which" + ] + }, + "debug@4.4.1": { + "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", + "dependencies": [ + "ms" + ] + }, + "depd@2.0.0": { + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==" + }, + "dunder-proto@1.0.1": { + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "dependencies": [ + "call-bind-apply-helpers", + "es-errors", + "gopd" + ] + }, + "ecdsa-sig-formatter@1.0.11": { + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "dependencies": [ + "safe-buffer" + ] + }, + "ee-first@1.1.1": { + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==" + }, + "encodeurl@2.0.0": { + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==" + }, + "es-define-property@1.0.1": { + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==" + }, + "es-errors@1.3.0": { + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==" + }, + "es-object-atoms@1.1.1": { + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "dependencies": [ + "es-errors" + ] + }, + "escape-html@1.0.3": { + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==" + }, + "etag@1.8.1": { + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==" + }, + "eventsource-parser@3.0.2": { + "integrity": "sha512-6RxOBZ/cYgd8usLwsEl+EC09Au/9BcmCKYF2/xbml6DNczf7nv0MQb+7BA2F+li6//I+28VNlQR37XfQtcAJuA==" + }, + "eventsource@3.0.7": { + "integrity": "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==", + "dependencies": [ + "eventsource-parser" + ] + }, + "express-rate-limit@7.5.0_express@5.1.0": { + "integrity": "sha512-eB5zbQh5h+VenMPM3fh+nw1YExi5nMr6HUCR62ELSP11huvxm/Uir1H1QEyTkk5QX6A58pX6NmaTMceKZ0Eodg==", + "dependencies": [ + "express" + ] + }, + "express@5.1.0": { + "integrity": "sha512-DT9ck5YIRU+8GYzzU5kT3eHGA5iL+1Zd0EutOmTE9Dtk+Tvuzd23VBU+ec7HPNSTxXYO55gPV/hq4pSBJDjFpA==", + "dependencies": [ + "accepts", + "body-parser", + "content-disposition", + "content-type", + "cookie", + "cookie-signature", + "debug", + "encodeurl", + "escape-html", + "etag", + "finalhandler", + "fresh", + "http-errors", + "merge-descriptors", + "mime-types", + "on-finished", + "once", + "parseurl", + "proxy-addr", + "qs", + "range-parser", + "router", + "send", + "serve-static", + "statuses", + "type-is", + "vary" + ] + }, + "extend@3.0.2": { + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" + }, + "fast-deep-equal@3.1.3": { + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==" + }, + "fast-json-stable-stringify@2.1.0": { + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==" + }, + "finalhandler@2.1.0": { + "integrity": "sha512-/t88Ty3d5JWQbWYgaOGCCYfXRwV1+be02WqYYlL6h0lEiUAMPM8o8qKGO01YIkOHzka2up08wvgYD0mDiI+q3Q==", + "dependencies": [ + "debug", + "encodeurl", + "escape-html", + "on-finished", + "parseurl", + "statuses" + ] + }, + "forwarded@0.2.0": { + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==" + }, + "fresh@2.0.0": { + "integrity": "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==" + }, + "function-bind@1.1.2": { + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==" + }, + "gaxios@6.7.1": { + "integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==", + "dependencies": [ + "extend", + "https-proxy-agent", + "is-stream", + "node-fetch", + "uuid" + ] + }, + "gcp-metadata@6.1.1": { + "integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==", + "dependencies": [ + "gaxios", + "google-logging-utils", + "json-bigint" + ] + }, + "get-intrinsic@1.3.0": { + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "dependencies": [ + "call-bind-apply-helpers", + "es-define-property", + "es-errors", + "es-object-atoms", + "function-bind", + "get-proto", + "gopd", + "has-symbols", + "hasown", + "math-intrinsics" + ] + }, + "get-proto@1.0.1": { + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "dependencies": [ + "dunder-proto", + "es-object-atoms" + ] + }, + "google-auth-library@9.15.1": { + "integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==", + "dependencies": [ + "base64-js", + "ecdsa-sig-formatter", + "gaxios", + "gcp-metadata", + "gtoken", + "jws" + ] + }, + "google-logging-utils@0.0.2": { + "integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==" + }, + "gopd@1.2.0": { + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==" + }, + "gtoken@7.1.0": { + "integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==", + "dependencies": [ + "gaxios", + "jws" + ] + }, + "has-symbols@1.1.0": { + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==" + }, + "hasown@2.0.2": { + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "dependencies": [ + "function-bind" + ] + }, + "http-errors@2.0.0": { + "integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==", + "dependencies": [ + "depd", + "inherits", + "setprototypeof", + "statuses", + "toidentifier" + ] + }, + "https-proxy-agent@7.0.6": { + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "dependencies": [ + "agent-base", + "debug" + ] + }, + "iconv-lite@0.6.3": { + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "dependencies": [ + "safer-buffer" + ] + }, + "inherits@2.0.4": { + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "ipaddr.js@1.9.1": { + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==" + }, + "is-promise@4.0.0": { + "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==" + }, + "is-stream@2.0.1": { + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==" + }, + "isexe@2.0.0": { + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==" + }, + "json-bigint@1.0.0": { + "integrity": "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==", + "dependencies": [ + "bignumber.js" + ] + }, + "json-schema-traverse@0.4.1": { + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, + "jwa@2.0.1": { + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "dependencies": [ + "buffer-equal-constant-time", + "ecdsa-sig-formatter", + "safe-buffer" + ] + }, + "jws@4.0.0": { + "integrity": "sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==", + "dependencies": [ + "jwa", + "safe-buffer" + ] + }, + "math-intrinsics@1.1.0": { + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==" + }, + "media-typer@1.1.0": { + "integrity": "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==" + }, + "merge-descriptors@2.0.0": { + "integrity": "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==" + }, + "mime-db@1.54.0": { + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==" + }, + "mime-types@3.0.1": { + "integrity": "sha512-xRc4oEhT6eaBpU1XF7AjpOFD+xQmXNB5OVKwp4tqCuBpHLS/ZbBDrc07mYTDqVMg6PfxUjjNp85O6Cd2Z/5HWA==", + "dependencies": [ + "mime-db" + ] + }, + "ms@2.1.3": { + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + }, + "negotiator@1.0.0": { + "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==" + }, + "node-fetch@2.7.0": { + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "dependencies": [ + "whatwg-url" + ] + }, + "object-assign@4.1.1": { + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==" + }, + "object-inspect@1.13.4": { + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==" + }, + "on-finished@2.4.1": { + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "dependencies": [ + "ee-first" + ] + }, + "once@1.4.0": { + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dependencies": [ + "wrappy" + ] + }, + "parseurl@1.3.3": { + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==" + }, + "path-key@3.1.1": { + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==" + }, + "path-to-regexp@8.2.0": { + "integrity": "sha512-TdrF7fW9Rphjq4RjrW0Kp2AW0Ahwu9sRGTkS6bvDi0SCwZlEZYmcfDbEsTz8RVk0EHIS/Vd1bv3JhG+1xZuAyQ==" + }, + "pkce-challenge@5.0.0": { + "integrity": "sha512-ueGLflrrnvwB3xuo/uGob5pd5FN7l0MsLf0Z87o/UQmRtwjvfylfc9MurIxRAWywCYTgrvpXBcqjV4OfCYGCIQ==" + }, + "proxy-addr@2.0.7": { + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "dependencies": [ + "forwarded", + "ipaddr.js" + ] + }, + "punycode@2.3.1": { + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==" + }, + "qs@6.14.0": { + "integrity": "sha512-YWWTjgABSKcvs/nWBi9PycY/JiPJqOD4JA6o9Sej2AtvSGarXxKC3OQSk4pAarbdQlKAh5D4FCQkJNkW+GAn3w==", + "dependencies": [ + "side-channel" + ] + }, + "range-parser@1.2.1": { + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==" + }, + "raw-body@3.0.0": { + "integrity": "sha512-RmkhL8CAyCRPXCE28MMH0z2PNWQBNk2Q09ZdxM9IOOXwxwZbN+qbWaatPkdkWIKL2ZVDImrN/pK5HTRz2PcS4g==", + "dependencies": [ + "bytes", + "http-errors", + "iconv-lite", + "unpipe" + ] + }, + "router@2.2.0": { + "integrity": "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==", + "dependencies": [ + "debug", + "depd", + "is-promise", + "parseurl", + "path-to-regexp" + ] + }, + "safe-buffer@5.2.1": { + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==" + }, + "safer-buffer@2.1.2": { + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, + "send@1.2.0": { + "integrity": "sha512-uaW0WwXKpL9blXE2o0bRhoL2EGXIrZxQ2ZQ4mgcfoBxdFmQold+qWsD2jLrfZ0trjKL6vOw0j//eAwcALFjKSw==", + "dependencies": [ + "debug", + "encodeurl", + "escape-html", + "etag", + "fresh", + "http-errors", + "mime-types", + "ms", + "on-finished", + "range-parser", + "statuses" + ] + }, + "serve-static@2.2.0": { + "integrity": "sha512-61g9pCh0Vnh7IutZjtLGGpTA355+OPn2TyDv/6ivP2h/AdAVX9azsoxmg2/M6nZeQZNYBEwIcsne1mJd9oQItQ==", + "dependencies": [ + "encodeurl", + "escape-html", + "parseurl", + "send" + ] + }, + "setprototypeof@1.2.0": { + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==" + }, + "shebang-command@2.0.0": { + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dependencies": [ + "shebang-regex" + ] + }, + "shebang-regex@3.0.0": { + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==" + }, + "side-channel-list@1.0.0": { + "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "dependencies": [ + "es-errors", + "object-inspect" + ] + }, + "side-channel-map@1.0.1": { + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "dependencies": [ + "call-bound", + "es-errors", + "get-intrinsic", + "object-inspect" + ] + }, + "side-channel-weakmap@1.0.2": { + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "dependencies": [ + "call-bound", + "es-errors", + "get-intrinsic", + "object-inspect", + "side-channel-map" + ] + }, + "side-channel@1.1.0": { + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "dependencies": [ + "es-errors", + "object-inspect", + "side-channel-list", + "side-channel-map", + "side-channel-weakmap" + ] + }, + "statuses@2.0.1": { + "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==" + }, + "toidentifier@1.0.1": { + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==" + }, "tr46@0.0.3": { "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" }, + "type-is@2.0.1": { + "integrity": "sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==", + "dependencies": [ + "content-type", + "media-typer", + "mime-types" + ] + }, "undici-types@6.20.0": { "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==" }, + "unpipe@1.0.0": { + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==" + }, + "uri-js@4.4.1": { + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dependencies": [ + "punycode" + ] + }, + "uuid@9.0.1": { + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "bin": true + }, + "vary@1.1.2": { + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==" + }, + "wavefile@11.0.0": { + "integrity": "sha512-/OBiAALgWU24IG7sC84cDO/KfFuvajWc5Uec0oV2zrpOOZZDgGdOwHwgEzOrwh8jkubBk7PtZfQBIcI1OaE5Ng==", + "bin": true + }, "webidl-conversions@3.0.1": { "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" }, @@ -99,8 +687,27 @@ "webidl-conversions" ] }, + "which@2.0.2": { + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dependencies": [ + "isexe" + ], + "bin": true + }, + "wrappy@1.0.2": { + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + }, "ws@8.18.0": { "integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==" + }, + "zod-to-json-schema@3.24.5_zod@3.25.36": { + "integrity": "sha512-/AuWwMP+YqiPbsJx5D6TfgRTc4kTLjsh5SOcd4bLsfUg2RcEXrFMJl1DGgdHy2aCfsIA/cr/1JM0xcB2GZji8g==", + "dependencies": [ + "zod" + ] + }, + "zod@3.25.36": { + "integrity": "sha512-eRFS3i8T0IrpGdL8HQyqFAugGOn7jOjyGgGdtv5NY4Wkhi7lJDk732bNZ609YMIGFbLoaj6J69O1Mura23gfIw==" } }, "remote": { diff --git a/server-deno/google.ts b/server-deno/google.ts new file mode 100644 index 0000000..6b378fb --- /dev/null +++ b/server-deno/google.ts @@ -0,0 +1,388 @@ +import { Buffer } from "node:buffer"; +import { createServer } from "node:http"; +import { WebSocketServer } from "npm:ws"; +import type { + RawData, + WebSocket as WSWebSocket, + WebSocketServer as _WebSocketServer, +} from "npm:@types/ws"; +import { + GoogleGenAI, + LiveServerMessage, + Modality, + Session, +} from "npm:@google/genai"; +import { authenticateUser } from "./utils.ts"; +import { + getChatHistory, + getSupabaseClient, + updateUserSessionTime, +} from "./supabase.ts"; +import { SupabaseClient } from "@supabase/supabase-js"; +import { Encoder } from "@evan/opus"; + +const isDev = Deno.env.get("DEV_MODE") === "True"; + +// Define your audio parameters +const SAMPLE_RATE = 24000; // For example, 24000 Hz +const CHANNELS = 1; // Mono (set to 2 if you have stereo) +const FRAME_DURATION = 120; // Frame length in ms + +const BYTES_PER_SAMPLE = 2; // 16-bit PCM: 2 bytes per sample +const FRAME_SIZE = (SAMPLE_RATE * FRAME_DURATION / 1000) * CHANNELS * + BYTES_PER_SAMPLE; // 960 bytes for 24000 Hz mono 16-bit + +// Evan's library doesn’t require you to specify frame size here; +// it will automatically handle the frame size based on your PCM input. +// Create a global encoder instance (reuse this for every audio delta) +const encoder = new Encoder({ + channels: CHANNELS, + sample_rate: SAMPLE_RATE, + application: "voip", +}); + +encoder.expert_frame_duration = FRAME_DURATION; +encoder.bitrate = 12000; + +const server = createServer(); + +const wss: _WebSocketServer = new WebSocketServer({ noServer: true }); + +const supabaseUrl = Deno.env.get("SUPABASE_URL"); +const supabaseKey = Deno.env.get("SUPABASE_KEY"); +const geminiApiKey = Deno.env.get("GEMINI_API_KEY"); + +if (!supabaseUrl || !supabaseKey) { + throw new Error("SUPABASE_URL or SUPABASE_KEY is not set"); +} + +wss.on("connection", async (ws: WSWebSocket, payload: IPayload) => { + const { user, supabase } = payload; + + let connectionPcmFile: Deno.FsFile | null = null; + if (isDev) { + const filename = `debug_audio_${Date.now()}.pcm`; + connectionPcmFile = await Deno.open(filename, { + create: true, + write: true, + append: true, + }); + } + + // Send user details to client + ws.send( + JSON.stringify({ + type: "auth", + volume_control: user.device?.volume ?? 100, + is_ota: user.device?.is_ota ?? false, + is_reset: user.device?.is_reset ?? false, + }), + ); + + const isDoctor = user.user_info.user_type === "doctor"; + const chatHistory = await getChatHistory( + supabase, + user.user_id, + user.personality?.key ?? null, + isDoctor, + ); + // const firstMessage = createFirstMessage(chatHistory, payload); + // const systemPrompt = createSystemPrompt(chatHistory, payload); + let sessionStartTime: number; + + console.log(`Connecting with Gemini key "${geminiApiKey.slice(0, 3)}..."`); + + // Initialize Google GenAI + const ai = new GoogleGenAI({ apiKey: geminiApiKey }); + const model = "gemini-2.5-flash-preview-native-audio-dialog"; + const config = { + responseModalities: [Modality.AUDIO], + systemInstruction: "You are a surfer bro talking to Kai Lenny", + }; + + // Response queue for handling Google's callback-based responses + const responseQueue: LiveServerMessage[] = []; + let geminiSession: Session | null = null; + + async function waitMessage() { + let done = false; + let message: LiveServerMessage | undefined = undefined; + while (!done) { + message = responseQueue.shift(); + if (message) { + done = true; + } else { + await new Promise((resolve) => setTimeout(resolve, 10)); + } + } + return message; + } + + async function handleTurn() { + const turns: any[] = []; + let done = false; + while (!done) { + const message = await waitMessage(); + turns.push(message); + // if ( + // message.serverContent && + // message.serverContent.generationComplete + // ) { + + // } + if ( + message.serverContent && + message.serverContent.generationComplete + ) { + ws.send(JSON.stringify({ + type: "server", + msg: "RESPONSE.CREATED", + })); + done = true; + } + } + return turns; + } + + async function processGeminiTurns() { + try { + console.log("Processing Gemini turns"); + while (geminiSession) { + const turns = await handleTurn(); + + console.log("Turns:", turns); + + // Combine all audio data from this turn + const combinedAudio = turns.reduce( + (acc: number[], turn: any) => { + if (turn.data) { + const buffer = Buffer.from(turn.data, "base64"); + const intArray = new Int16Array( + buffer.buffer, + buffer.byteOffset, + buffer.byteLength / + Int16Array.BYTES_PER_ELEMENT, + ); + return acc.concat(Array.from(intArray)); + } + return acc; + }, + [], + ); + + if (combinedAudio.length > 0) { + console.log( + "Received complete audio turn, length:", + combinedAudio.length, + ); + + // Convert back to buffer and send to client + const audioBuffer = new Int16Array(combinedAudio); + const buffer = Buffer.from(audioBuffer.buffer); + + // PREVIEW AUDIO + // const wf = new WaveFile(); + // wf.fromScratch(1, SAMPLE_RATE, "16", audioBuffer); + + // const filename = `gemini_response_${Date.now()}.wav`; + // await Deno.writeFile(filename, wf.toBuffer()); + // console.log(`Audio saved as ${filename}`); + + // Send audio in chunks to client + for ( + let offset = 0; + offset < buffer.length; + offset += FRAME_SIZE + ) { + const frame = buffer.subarray( + offset, + offset + FRAME_SIZE, + ); + try { + const encodedPacket = encoder.encode(frame); + ws.send(encodedPacket); + } catch (_e) { + // Skip this frame but continue with others + } + } + } + + // // Handle text responses if any + // for (const turn of turns) { + // if (turn.text) { + // console.log("Received text:", turn.text); + // addConversation(supabase, "assistant", turn.text, user); + // } + // } + + // Send completion signal + ws.send(JSON.stringify({ + type: "server", + msg: "RESPONSE.COMPLETE", + })); + } + } catch (error) { + console.error("Error processing Gemini turns:", error); + } + } + + // Connect to Google Gemini Live + try { + geminiSession = await ai.live.connect({ + model: model, + callbacks: { + onopen: function () { + console.log("Gemini session opened"); + sessionStartTime = Date.now(); + }, + onmessage: function (message: LiveServerMessage) { + console.log("Received message:", message); + responseQueue.push(message); + }, + onerror: function (e: any) { + console.error("Gemini error:", e.message); + ws.send( + JSON.stringify({ + type: "server", + msg: "RESPONSE.ERROR", + }), + ); + }, + onclose: function (e: any) { + console.log("Gemini session closed:", e.reason); + }, + }, + config: config, + }); + + console.log("Connected to Gemini successfully!"); + // Send first message if available + const inputTurns = [{ + role: "user", + parts: [{ text: "Hello how are you?" }], + }]; + geminiSession?.sendClientContent({ turns: inputTurns }); + processGeminiTurns(); + } catch (e: unknown) { + console.log(`Error connecting to Gemini: ${e}`); + ws.close(); + return; + } + + ws.on("message", (data: any, isBinary: boolean) => { + try { + if (isBinary) { + // Handle binary audio data from ESP32 + const base64Data = data.toString("base64"); + + if (isDev && connectionPcmFile) { + connectionPcmFile.write(data); + } + + // Send audio to Gemini + geminiSession?.sendRealtimeInput({ + audio: { + data: base64Data, + mimeType: "audio/pcm;rate=24000", // Gemini expects 16kHz but 24kHz is fine + }, + }); + } else { + // Handle text/JSON messages + const message = JSON.parse(data.toString("utf-8")); + + if ( + message.type === "instruction" && + message.msg === "end_of_speech" + ) { + console.log("end_of_speech detected"); + // Gemini handles turn detection automatically, but we can send a signal + ws.send( + JSON.stringify({ + type: "server", + msg: "AUDIO.COMMITTED", + }), + ); + } + + if ( + message.type === "instruction" && + message.msg === "INTERRUPT" + ) { + console.log("interrupt detected"); + // For Gemini, we might need to close and reopen the session or handle differently + // This depends on Gemini's interrupt capabilities + } + } + } catch (e: unknown) { + console.error("Error handling message:", (e as Error).message); + } + }); + + ws.on("error", (error: any) => { + console.error("WebSocket error:", error); + geminiSession?.close(); + }); + + ws.on("close", async (code: number, reason: string) => { + console.log(`WebSocket closed with code ${code}, reason: ${reason}`); + if (sessionStartTime) { + const sessionDuration = Math.floor( + (Date.now() - sessionStartTime) / 1000, + ); + await updateUserSessionTime(supabase, user, sessionDuration); + } + geminiSession?.close(); + if (isDev && connectionPcmFile) { + connectionPcmFile.close(); + console.log("Closed debug audio file."); + } + }); +}); + +server.on("upgrade", async (req, socket, head) => { + console.log("upgrade"); + let user: IUser; + let supabase: SupabaseClient; + let authToken: string; + try { + const { authorization: authHeader, "x-wifi-rssi": rssi } = req.headers; + authToken = authHeader?.replace("Bearer ", "") ?? ""; + const wifiStrength = parseInt(rssi as string); // Convert to number + + // You can now use wifiStrength in your code + console.log("WiFi RSSI:", wifiStrength); // Will log something like -50 + + // Remove debug logging + if (!authToken) { + socket.write("HTTP/1.1 401 Unauthorized\r\n\r\n"); + socket.destroy(); + return; + } + + supabase = getSupabaseClient(authToken as string); + user = await authenticateUser(supabase, authToken as string); + console.log(user.email); + } catch (_e: any) { + socket.write("HTTP/1.1 401 Unauthorized\r\n\r\n"); + socket.destroy(); + return; + } + + wss.handleUpgrade(req, socket, head, (ws) => { + wss.emit("connection", ws, { + user, + supabase, + timestamp: new Date().toISOString(), + }); + }); +}); + +if (isDev) { // deno run -A --env-file=.env main.ts + const HOST = Deno.env.get("HOST") || "0.0.0.0"; + const PORT = Deno.env.get("PORT") || "8000"; + server.listen(Number(PORT), HOST, () => { + console.log(`Audio capture server running on ws://${HOST}:${PORT}`); + }); +} else { + server.listen(8080); +} diff --git a/server-deno/main.ts b/server-deno/main.ts index d26264c..d1c350b 100644 --- a/server-deno/main.ts +++ b/server-deno/main.ts @@ -1,449 +1,23 @@ -import { Buffer } from "node:buffer"; import { createServer } from "node:http"; import { WebSocketServer } from "npm:ws"; import type { - RawData, WebSocket as WSWebSocket, WebSocketServer as _WebSocketServer, } from "npm:@types/ws"; - -import { RealtimeClient } from "./realtime/client.js"; -import { RealtimeUtils } from "./realtime/utils.js"; import { authenticateUser } from "./utils.ts"; -import { - addConversation, - createFirstMessage, - createSystemPrompt, - getChatHistory, - getDeviceInfo, - getSupabaseClient, - updateUserSessionTime, -} from "./supabase.ts"; +import { getSupabaseClient } from "./supabase.ts"; import { SupabaseClient } from "@supabase/supabase-js"; - -import { Encoder } from "@evan/opus"; - -const isDev = Deno.env.get("DEV_MODE") === "True"; - -// Define your audio parameters -const SAMPLE_RATE = 24000; // For example, 24000 Hz -const CHANNELS = 1; // Mono (set to 2 if you have stereo) -const FRAME_DURATION = 120; // Frame length in ms - -const BYTES_PER_SAMPLE = 2; // 16-bit PCM: 2 bytes per sample -const FRAME_SIZE = (SAMPLE_RATE * FRAME_DURATION / 1000) * CHANNELS * - BYTES_PER_SAMPLE; // 960 bytes for 24000 Hz mono 16-bit - -// Evan's library doesn’t require you to specify frame size here; -// it will automatically handle the frame size based on your PCM input. -// Create a global encoder instance (reuse this for every audio delta) -const encoder = new Encoder({ - channels: CHANNELS, - sample_rate: SAMPLE_RATE, - application: "voip", -}); - -encoder.expert_frame_duration = FRAME_DURATION; -encoder.bitrate = 12000; +import { isDev } from "./utils.ts"; +import { connectToOpenAI } from "./models/openai.ts"; +import { connectToGemini } from "./models/gemini.ts"; const server = createServer(); const wss: _WebSocketServer = new WebSocketServer({ noServer: true }); -const sendFirstMessage = (client: RealtimeClient, firstMessage: string) => { - const event = { - event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID - type: "conversation.item.create", - previous_item_id: "root", - item: { - type: "message", - role: "system", - content: [{ - type: "input_text", - text: firstMessage, - }], - }, - }; - - client.realtime.send(event.type, event); - client.realtime.send("response.create", { - event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID - type: "response.create", - }); -}; - -const supabaseUrl = Deno.env.get("SUPABASE_URL"); -const supabaseKey = Deno.env.get("SUPABASE_KEY"); -const openaiApiKey = Deno.env.get("OPENAI_API_KEY"); - -if (!supabaseUrl || !supabaseKey) { - throw new Error("SUPABASE_URL or SUPABASE_KEY is not set"); -} - wss.on("connection", async (ws: WSWebSocket, payload: IPayload) => { - const { user, supabase } = payload; - - let connectionPcmFile: Deno.FsFile | null = null; - if (isDev) { - const filename = `debug_audio_${Date.now()}.pcm`; - connectionPcmFile = await Deno.open(filename, { - create: true, - write: true, - append: true, - }); - } - // send user details to client - // when DEV_MODE is true, we send the default values 100, false, false - ws.send( - JSON.stringify({ - type: "auth", - volume_control: user.device?.volume ?? 20, - is_ota: user.device?.is_ota ?? false, - is_reset: user.device?.is_reset ?? false, - pitch_factor: user.personality?.pitch_factor ?? 1, - }), - ); - - const isDoctor = user.user_info.user_type === "doctor"; - - const chatHistory = await getChatHistory( - supabase, - user.user_id, - user.personality?.key ?? null, - isDoctor, - ); - const firstMessage = createFirstMessage(chatHistory, payload); - console.log("firstMessage", firstMessage); - const systemPrompt = createSystemPrompt(chatHistory, payload); - let sessionStartTime: number; - let currentItemId: string | null = null; - let currentCallId: string | null = null; - - // Instantiate new client - console.log(`Connecting with key "${openaiApiKey.slice(0, 3)}..."`); - const client = new RealtimeClient({ apiKey: openaiApiKey }); - - // ADD TOOL CALLS HERE - client.addTool( - { - type: "function", - name: "end_session", - description: - 'Call this if the user says bye or needs to leave or suggests they want to end the session. (e.g. "I gotta to go", "I have to work", "I have to sleep", "I have to do something else")', - parameters: { - type: "object", - strict: true, - properties: { - reason: { - type: "string", - description: - 'Short reason for ending the session.', - }, - }, - required: ["reason"], - }, - }, - (args: any) => { - console.log("end session", args); - - // Send your custom message to the client - ws.send(JSON.stringify({ type: "server", msg: "SESSION.END" })); - - // Send the function result back to OpenAI - const functionResult = { - event_id: RealtimeUtils.generateId("evt_"), - type: "conversation.item.create", - item: { - id: RealtimeUtils.generateId("item_"), - type: "function_call_output", - call_id: currentCallId, - output: JSON.stringify({ - success: true, - message: `Session ended: ${args.reason}`, - }), - }, - }; - - client.realtime.send(functionResult.type, functionResult); - - // Return the result for the callback - return { success: true, message: `Session ended: ${args.reason}` }; - }, - ); - - // Relay: OpenAI Realtime API Event -> Browser Event - client.realtime.on("server.*", async (event: any) => { - // console.log(`Relaying "${event.type}" to Client`); - // Check if the event is session.created - if (event.type === "session.created") { - console.log("session created", event); - sessionStartTime = Date.now(); - sendFirstMessage(client, firstMessage); - } else if (event.type === "session.updated") { - console.log("session updated", event); - } else if (event.type === "error") { - console.log("error", event); - } else if (event.type === "response.done") { - // Fetch the latest device info when response is complete - try { - const device = await getDeviceInfo(supabase, user.user_id); - - if (device) { - // Send the updated volume data along with the response complete message - ws.send(JSON.stringify({ - type: "server", - msg: "RESPONSE.COMPLETE", - volume_control: device.volume ?? 100, - })); - } else { - // Fall back to just sending the complete message if there's an error - ws.send( - JSON.stringify({ - type: "server", - msg: "RESPONSE.COMPLETE", - }), - ); - } - } catch (error) { - console.error("Error fetching updated device info:", error); - ws.send( - JSON.stringify({ - type: "server", - msg: "RESPONSE.COMPLETE", - }), - ); - } - } else if (event.type === "response.audio_transcript.done") { - console.log("response.audio_transcript.done", event); - await addConversation( - supabase, - "assistant", - event.transcript, - user, - ); - } else if (event.type === "input_audio_buffer.committed") { - ws.send(JSON.stringify({ type: "server", msg: "AUDIO.COMMITTED" })); - } - - if (event.type in client.conversation.EventProcessors) { - try { - switch (event.type) { - case "response.created": - console.log("response.created", event); - ws.send( - JSON.stringify({ - type: "server", - msg: "RESPONSE.CREATED", - }), - ); - break; - case "response.output_item.added": - console.log("response.output_item.added", event); - if (event.item.id) { - console.log("foobar", event.item.id); - currentItemId = event.item.id; - currentCallId = event.item.call_id; - } - break; - case "response.audio.delta": - { - const { delta } = client.conversation.processEvent( - event, - ); - try { - if (delta?.audio?.buffer) { - const pcmBuffer = Buffer.from( - delta.audio.buffer, - ); - for ( - let offset = 0; - offset < pcmBuffer.length; - offset += FRAME_SIZE - ) { - // Get one frame of PCM data. - const frame = pcmBuffer.subarray( - offset, - offset + FRAME_SIZE, - ); - - try { - const encodedPacket = encoder - .encode(frame); - ws.send(encodedPacket); - } catch (_e) { - // Skip this frame but continue with others - } - } - } - } catch (audioError) { - console.error( - "Error processing audio delta:", - audioError, - ); - // Don't send any audio data if there's an error at this level - } - } - break; - case "conversation.item.created": - console.log("user said: ", event.item); - break; - case "conversation.item.input_audio_transcription.completed": - console.log("user transcription:", event); - await addConversation( - supabase, - "user", - event.transcript, - user, - ); - break; - } - } catch (error) { - console.error("Error processing event:", error); - console.error("Event that caused the error:", event); - ws.send( - JSON.stringify({ type: "server", msg: "RESPONSE.ERROR" }), - ); - } - } - }); - - client.realtime.on("close", () => ws.close()); - - // Relay: Browser Event -> OpenAI Realtime API Event - // We need to queue data waiting for the OpenAI connection - const messageQueue: RawData[] = []; - - const messageHandler = async (data: any, isBinary: boolean) => { - try { - let event; - - // for esp32 - if (isBinary) { - const base64Data = data.toString("base64"); - - // Convert binary PCM16 data to base64 for OpenAI Realtime API - event = { - event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID - type: "input_audio_buffer.append", - audio: base64Data, - }; - // Write the raw PCM data to file for debugging if enabled. - // Also write the base64 data to a separate file - if (isDev) { - if (connectionPcmFile) { - await connectionPcmFile.write(data); - } - } - client.realtime.send(event.type, event); - } else { // Manual VAD - const message = JSON.parse(data.toString("utf-8")); - - // commit user audio and create response - if ( - message.type === "instruction" && - message.msg === "end_of_speech" - ) { - console.log("end_of_speech detected"); - - client.realtime.send("input_audio_buffer.commit", { - event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID - type: "input_audio_buffer.commit", - }); - - client.realtime.send("response.create", { - event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID - type: "response.create", - }); - - client.realtime.send("input_audio_buffer.clear", { - event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID - type: "input_audio_buffer.clear", - }); - } else if ( - message.type === "instruction" && - message.msg === "INTERRUPT" - ) { - console.log("interrupt detected", message); - const audioEndMs = message.audio_end_ms; - - client.realtime.send("conversation.item.truncate", { - event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID - type: "conversation.item.truncate", - item_id: currentItemId, - content_index: 0, - audio_end_ms: audioEndMs, - }); - - client.realtime.send("input_audio_buffer.clear", { - event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID - type: "input_audio_buffer.clear", - }); - } - } - } catch (e: unknown) { - console.error((e as Error).message); - console.log(`Error parsing event from client: ${data}`); - } - }; - - ws.on("message", (data: any, isBinary: boolean) => { - if (!client.isConnected()) { - messageQueue.push(data); - } else { - messageHandler(data, isBinary); - } - }); - - // Add error handler - ws.on("error", (error: any) => { - console.error("WebSocket error:", error); - client.disconnect(); - }); - - // Add more detailed close handling - ws.on("close", async (code: number, reason: string) => { - console.log(`WebSocket closed with code ${code}, reason: ${reason}`); - if (sessionStartTime) { - const sessionDuration = Math.floor( - (Date.now() - sessionStartTime) / 1000, - ); - await updateUserSessionTime(supabase, user, sessionDuration); - } - client.disconnect(); - if (isDev) { - if (connectionPcmFile) { - connectionPcmFile.close(); - console.log(`Closed debug audio file.`); - } - } - }); - - // Connect to the OpenAI Realtime API - try { - console.log(`Connecting to OpenAI...`); - const sessionOptions = { - model: "gpt-4o-mini-realtime-preview-2024-12-17", - // turn_detection: null, - turn_detection: { - type: "server_vad", - threshold: 0.4, - prefix_padding_ms: 400, - silence_duration_ms: 1000, - }, - voice: user.personality?.oai_voice ?? "ash", - instructions: systemPrompt, - input_audio_transcription: { model: "whisper-1" }, - }; - await client.connect(sessionOptions as any); - } catch (e: unknown) { - console.log(`Error connecting to OpenAI: ${e as Error}`); - ws.close(); - return; - } - console.log(`Connected to OpenAI successfully!`); - while (messageQueue.length) { - messageHandler(messageQueue.shift(), false); - } + // await connectToOpenAI(ws, payload); + await connectToGemini(ws, payload); }); server.on("upgrade", async (req, socket, head) => { @@ -468,7 +42,6 @@ server.on("upgrade", async (req, socket, head) => { supabase = getSupabaseClient(authToken as string); user = await authenticateUser(supabase, authToken as string); - console.log(user.email); } catch (_e: any) { socket.write("HTTP/1.1 401 Unauthorized\r\n\r\n"); socket.destroy(); diff --git a/server-deno/models/gemini.ts b/server-deno/models/gemini.ts new file mode 100644 index 0000000..6f28cc8 --- /dev/null +++ b/server-deno/models/gemini.ts @@ -0,0 +1,311 @@ +import { Buffer } from "node:buffer"; +import type { WebSocketServer as _WebSocketServer } from "npm:@types/ws"; +import { + GoogleGenAI, + LiveConnectConfig, + LiveServerMessage, + Modality, + Session, +} from "npm:@google/genai"; +import { getChatHistory, updateUserSessionTime } from "../supabase.ts"; +import { + encoder, + FRAME_SIZE, + geminiApiKey, + isDev, + SAMPLE_RATE, +} from "../utils.ts"; +import pkg from "npm:wavefile"; +const { WaveFile } = pkg; + +export const connectToGemini = async (ws: WebSocket, payload: IPayload) => { + const { user, supabase } = payload; + + let connectionPcmFile: Deno.FsFile | null = null; + if (isDev) { + const filename = `debug_audio_${Date.now()}.pcm`; + connectionPcmFile = await Deno.open(filename, { + create: true, + write: true, + append: true, + }); + } + + // Send user details to client + ws.send( + JSON.stringify({ + type: "auth", + volume_control: user.device?.volume ?? 100, + is_ota: user.device?.is_ota ?? false, + is_reset: user.device?.is_reset ?? false, + }), + ); + + const chatHistory = await getChatHistory( + supabase, + user.user_id, + user.personality?.key ?? null, + false, + ); + // const firstMessage = createFirstMessage(chatHistory, payload); + // const systemPrompt = createSystemPrompt(chatHistory, payload); + let sessionStartTime: number; + + console.log(`Connecting with Gemini key "${geminiApiKey.slice(0, 3)}..."`); + + // Initialize Google GenAI + const ai = new GoogleGenAI({ apiKey: geminiApiKey }); + const model = "gemini-2.0-flash-live-001"; + const config: LiveConnectConfig = { + responseModalities: [Modality.AUDIO], + systemInstruction: "You are a surfer bro talking to Kai Lenny", + // generationConfig: { + // speechConfig: { + // voiceConfig: { + // prebuiltVoiceConfig: { + // voiceName: "Zephyr", + // }, + // }, + // }, + // }, + }; + + // Response queue for handling Google's callback-based responses + const responseQueue: LiveServerMessage[] = []; + let geminiSession: Session | null = null; + + async function waitMessage() { + let done = false; + let message: LiveServerMessage | undefined = undefined; + while (!done) { + message = responseQueue.shift(); + if (message) { + done = true; + } else { + await new Promise((resolve) => setTimeout(resolve, 10)); + } + } + return message; + } + + async function handleTurn() { + const turns: any[] = []; + let done = false; + while (!done) { + const message = await waitMessage(); + turns.push(message); + // if ( + // message.serverContent && + // message.serverContent.generationComplete + // ) { + + // } + if ( + message.serverContent && + message.serverContent.generationComplete + ) { + ws.send(JSON.stringify({ + type: "server", + msg: "RESPONSE.CREATED", + })); + done = true; + } + } + return turns; + } + + async function processGeminiTurns() { + try { + console.log("Processing Gemini turns"); + while (geminiSession) { + const turns = await handleTurn(); + + console.log("Turns:", turns); + + // Combine all audio data from this turn + const combinedAudio = turns.reduce( + (acc: number[], turn: any) => { + if (turn.data) { + const buffer = Buffer.from(turn.data, "base64"); + const intArray = new Int16Array( + buffer.buffer, + buffer.byteOffset, + buffer.byteLength / + Int16Array.BYTES_PER_ELEMENT, + ); + return acc.concat(Array.from(intArray)); + } + return acc; + }, + [], + ); + + if (combinedAudio.length > 0) { + console.log( + "Received complete audio turn, length:", + combinedAudio.length, + ); + + // Convert back to buffer and send to client + const audioBuffer = new Int16Array(combinedAudio); + const buffer = Buffer.from(audioBuffer.buffer); + + // PREVIEW AUDIO + // const wf = new WaveFile(); + // wf.fromScratch(1, SAMPLE_RATE, "16", audioBuffer); + + // const filename = `gemini_response_${Date.now()}.wav`; + // await Deno.writeFile(filename, wf.toBuffer()); + // console.log(`Audio saved as ${filename}`); + + // SEND TO ESP32 + // Send audio in chunks to client + for ( + let offset = 0; + offset < buffer.length; + offset += FRAME_SIZE + ) { + const frame = buffer.subarray( + offset, + offset + FRAME_SIZE, + ); + try { + const encodedPacket = encoder.encode(frame); + ws.send(encodedPacket); + } catch (_e) { + // Skip this frame but continue with others + } + } + } + + // // Handle text responses if any + // for (const turn of turns) { + // if (turn.text) { + // console.log("Received text:", turn.text); + // addConversation(supabase, "assistant", turn.text, user); + // } + // } + + // Send completion signal + ws.send(JSON.stringify({ + type: "server", + msg: "RESPONSE.COMPLETE", + })); + } + } catch (error) { + console.error("Error processing Gemini turns:", error); + } + } + + // Connect to Google Gemini Live + try { + geminiSession = await ai.live.connect({ + model: model, + callbacks: { + onopen: function () { + console.log("Gemini session opened"); + sessionStartTime = Date.now(); + }, + onmessage: function (message: LiveServerMessage) { + console.log("Received message:", message); + responseQueue.push(message); + }, + onerror: function (e: any) { + console.error("Gemini error:", e.message); + ws.send( + JSON.stringify({ + type: "server", + msg: "RESPONSE.ERROR", + }), + ); + }, + onclose: function (e: any) { + console.log("Gemini session closed:", e.reason); + }, + }, + config: config, + }); + + console.log("Connected to Gemini successfully!"); + // Send first message if available + const inputTurns = [{ + role: "user", + parts: [{ text: "Hello how are you?" }], + }]; + geminiSession?.sendClientContent({ turns: inputTurns }); + processGeminiTurns(); + } catch (e: unknown) { + console.log(`Error connecting to Gemini: ${e}`); + ws.close(); + return; + } + + ws.on("message", (data: any, isBinary: boolean) => { + try { + if (isBinary) { + // Handle binary audio data from ESP32 + const base64Data = data.toString("base64"); + + if (isDev && connectionPcmFile) { + connectionPcmFile.write(data); + } + + // Send audio to Gemini + geminiSession?.sendRealtimeInput({ + audio: { + data: base64Data, + mimeType: "audio/pcm;rate=24000", // Gemini expects 16kHz but 24kHz is fine + }, + }); + } else { + // Handle text/JSON messages + const message = JSON.parse(data.toString("utf-8")); + + if ( + message.type === "instruction" && + message.msg === "end_of_speech" + ) { + console.log("end_of_speech detected"); + // Gemini handles turn detection automatically, but we can send a signal + ws.send( + JSON.stringify({ + type: "server", + msg: "AUDIO.COMMITTED", + }), + ); + } + + if ( + message.type === "instruction" && + message.msg === "INTERRUPT" + ) { + console.log("interrupt detected"); + // For Gemini, we might need to close and reopen the session or handle differently + // This depends on Gemini's interrupt capabilities + } + } + } catch (e: unknown) { + console.error("Error handling message:", (e as Error).message); + } + }); + + ws.on("error", (error: any) => { + console.error("WebSocket error:", error); + geminiSession?.close(); + }); + + ws.on("close", async (code: number, reason: string) => { + console.log(`WebSocket closed with code ${code}, reason: ${reason}`); + if (sessionStartTime) { + const sessionDuration = Math.floor( + (Date.now() - sessionStartTime) / 1000, + ); + await updateUserSessionTime(supabase, user, sessionDuration); + } + geminiSession?.close(); + if (isDev && connectionPcmFile) { + connectionPcmFile.close(); + console.log("Closed debug audio file."); + } + }); +}; diff --git a/server-deno/models/openai.ts b/server-deno/models/openai.ts new file mode 100644 index 0000000..d3b24dd --- /dev/null +++ b/server-deno/models/openai.ts @@ -0,0 +1,405 @@ +import { Buffer } from "node:buffer"; +import type { + RawData, + WebSocket as WSWebSocket, + WebSocketServer as _WebSocketServer, +} from "npm:@types/ws"; + +import { RealtimeClient } from "../realtime/client.js"; +import { RealtimeUtils } from "../realtime/utils.js"; +import { + addConversation, + createFirstMessage, + createSystemPrompt, + getChatHistory, + getDeviceInfo, + updateUserSessionTime, +} from "../supabase.ts"; +import { encoder, FRAME_SIZE, isDev, openaiApiKey } from "../utils.ts"; + +const sendFirstMessage = (client: RealtimeClient, firstMessage: string) => { + const event = { + event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID + type: "conversation.item.create", + previous_item_id: "root", + item: { + type: "message", + role: "system", + content: [{ + type: "input_text", + text: firstMessage, + }], + }, + }; + + client.realtime.send(event.type, event); + client.realtime.send("response.create", { + event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID + type: "response.create", + }); +}; + +export const connectToOpenAI = async (ws: WebSocket, payload: IPayload) => { + const { user, supabase } = payload; + + let connectionPcmFile: Deno.FsFile | null = null; + if (isDev) { + const filename = `debug_audio_${Date.now()}.pcm`; + connectionPcmFile = await Deno.open(filename, { + create: true, + write: true, + append: true, + }); + } + // send user details to client + // when DEV_MODE is true, we send the default values 100, false, false + ws.send( + JSON.stringify({ + type: "auth", + volume_control: user.device?.volume ?? 20, + is_ota: user.device?.is_ota ?? false, + is_reset: user.device?.is_reset ?? false, + pitch_factor: user.personality?.pitch_factor ?? 1, + }), + ); + + const isDoctor = user.user_info.user_type === "doctor"; + + const chatHistory = await getChatHistory( + supabase, + user.user_id, + user.personality?.key ?? null, + isDoctor, + ); + const firstMessage = createFirstMessage(chatHistory, payload); + console.log("firstMessage", firstMessage); + const systemPrompt = createSystemPrompt(chatHistory, payload); + let sessionStartTime: number; + let currentItemId: string | null = null; + let currentCallId: string | null = null; + + // Instantiate new client + console.log(`Connecting with key "${openaiApiKey.slice(0, 3)}..."`); + const client = new RealtimeClient({ apiKey: openaiApiKey }); + + // ADD TOOL CALLS HERE + client.addTool( + { + type: "function", + name: "end_session", + description: + 'Call this if the user says bye or needs to leave or suggests they want to end the session. (e.g. "I gotta to go", "I have to work", "I have to sleep", "I have to do something else")', + parameters: { + type: "object", + strict: true, + properties: { + reason: { + type: "string", + description: "Short reason for ending the session.", + }, + }, + required: ["reason"], + }, + }, + (args: any) => { + console.log("end session", args); + + // Send your custom message to the client + ws.send(JSON.stringify({ type: "server", msg: "SESSION.END" })); + + // Send the function result back to OpenAI + const functionResult = { + event_id: RealtimeUtils.generateId("evt_"), + type: "conversation.item.create", + item: { + id: RealtimeUtils.generateId("item_"), + type: "function_call_output", + call_id: currentCallId, + output: JSON.stringify({ + success: true, + message: `Session ended: ${args.reason}`, + }), + }, + }; + + client.realtime.send(functionResult.type, functionResult); + + // Return the result for the callback + return { success: true, message: `Session ended: ${args.reason}` }; + }, + ); + + // Relay: OpenAI Realtime API Event -> Browser Event + client.realtime.on("server.*", async (event: any) => { + // console.log(`Relaying "${event.type}" to Client`); + // Check if the event is session.created + if (event.type === "session.created") { + console.log("session created", event); + sessionStartTime = Date.now(); + sendFirstMessage(client, firstMessage); + } else if (event.type === "session.updated") { + console.log("session updated", event); + } else if (event.type === "error") { + console.log("error", event); + } else if (event.type === "response.done") { + // Fetch the latest device info when response is complete + try { + const device = await getDeviceInfo(supabase, user.user_id); + + if (device) { + // Send the updated volume data along with the response complete message + ws.send(JSON.stringify({ + type: "server", + msg: "RESPONSE.COMPLETE", + volume_control: device.volume ?? 100, + })); + } else { + // Fall back to just sending the complete message if there's an error + ws.send( + JSON.stringify({ + type: "server", + msg: "RESPONSE.COMPLETE", + }), + ); + } + } catch (error) { + console.error("Error fetching updated device info:", error); + ws.send( + JSON.stringify({ + type: "server", + msg: "RESPONSE.COMPLETE", + }), + ); + } + } else if (event.type === "response.audio_transcript.done") { + console.log("response.audio_transcript.done", event); + await addConversation( + supabase, + "assistant", + event.transcript, + user, + ); + } else if (event.type === "input_audio_buffer.committed") { + ws.send(JSON.stringify({ type: "server", msg: "AUDIO.COMMITTED" })); + } + + if (event.type in client.conversation.EventProcessors) { + try { + switch (event.type) { + case "response.created": + console.log("response.created", event); + ws.send( + JSON.stringify({ + type: "server", + msg: "RESPONSE.CREATED", + }), + ); + break; + case "response.output_item.added": + console.log("response.output_item.added", event); + if (event.item.id) { + console.log("foobar", event.item.id); + currentItemId = event.item.id; + currentCallId = event.item.call_id; + } + break; + case "response.audio.delta": + { + const { delta } = client.conversation.processEvent( + event, + ); + try { + if (delta?.audio?.buffer) { + const pcmBuffer = Buffer.from( + delta.audio.buffer, + ); + for ( + let offset = 0; + offset < pcmBuffer.length; + offset += FRAME_SIZE + ) { + // Get one frame of PCM data. + const frame = pcmBuffer.subarray( + offset, + offset + FRAME_SIZE, + ); + + try { + const encodedPacket = encoder + .encode(frame); + ws.send(encodedPacket); + } catch (_e) { + // Skip this frame but continue with others + } + } + } + } catch (audioError) { + console.error( + "Error processing audio delta:", + audioError, + ); + // Don't send any audio data if there's an error at this level + } + } + break; + case "conversation.item.created": + console.log("user said: ", event.item); + break; + case "conversation.item.input_audio_transcription.completed": + console.log("user transcription:", event); + await addConversation( + supabase, + "user", + event.transcript, + user, + ); + break; + } + } catch (error) { + console.error("Error processing event:", error); + console.error("Event that caused the error:", event); + ws.send( + JSON.stringify({ type: "server", msg: "RESPONSE.ERROR" }), + ); + } + } + }); + + client.realtime.on("close", () => ws.close()); + + // Relay: Browser Event -> OpenAI Realtime API Event + // We need to queue data waiting for the OpenAI connection + const messageQueue: RawData[] = []; + + const messageHandler = async (data: any, isBinary: boolean) => { + try { + let event; + + // for esp32 + if (isBinary) { + const base64Data = data.toString("base64"); + + // Convert binary PCM16 data to base64 for OpenAI Realtime API + event = { + event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID + type: "input_audio_buffer.append", + audio: base64Data, + }; + // Write the raw PCM data to file for debugging if enabled. + // Also write the base64 data to a separate file + if (isDev) { + if (connectionPcmFile) { + await connectionPcmFile.write(data); + } + } + client.realtime.send(event.type, event); + } else { // Manual VAD + const message = JSON.parse(data.toString("utf-8")); + + // commit user audio and create response + if ( + message.type === "instruction" && + message.msg === "end_of_speech" + ) { + console.log("end_of_speech detected"); + + client.realtime.send("input_audio_buffer.commit", { + event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID + type: "input_audio_buffer.commit", + }); + + client.realtime.send("response.create", { + event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID + type: "response.create", + }); + + client.realtime.send("input_audio_buffer.clear", { + event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID + type: "input_audio_buffer.clear", + }); + } else if ( + message.type === "instruction" && + message.msg === "INTERRUPT" + ) { + console.log("interrupt detected", message); + const audioEndMs = message.audio_end_ms; + + client.realtime.send("conversation.item.truncate", { + event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID + type: "conversation.item.truncate", + item_id: currentItemId, + content_index: 0, + audio_end_ms: audioEndMs, + }); + + client.realtime.send("input_audio_buffer.clear", { + event_id: RealtimeUtils.generateId("evt_"), // Generate unique ID + type: "input_audio_buffer.clear", + }); + } + } + } catch (e: unknown) { + console.error((e as Error).message); + console.log(`Error parsing event from client: ${data}`); + } + }; + + ws.on("message", (data: any, isBinary: boolean) => { + if (!client.isConnected()) { + messageQueue.push(data); + } else { + messageHandler(data, isBinary); + } + }); + + // Add error handler + ws.on("error", (error: any) => { + console.error("WebSocket error:", error); + client.disconnect(); + }); + + // Add more detailed close handling + ws.on("close", async (code: number, reason: string) => { + console.log(`WebSocket closed with code ${code}, reason: ${reason}`); + if (sessionStartTime) { + const sessionDuration = Math.floor( + (Date.now() - sessionStartTime) / 1000, + ); + await updateUserSessionTime(supabase, user, sessionDuration); + } + client.disconnect(); + if (isDev) { + if (connectionPcmFile) { + connectionPcmFile.close(); + console.log(`Closed debug audio file.`); + } + } + }); + + // Connect to the OpenAI Realtime API + try { + console.log(`Connecting to OpenAI...`); + const sessionOptions = { + model: "gpt-4o-mini-realtime-preview-2024-12-17", + // turn_detection: null, + turn_detection: { + type: "server_vad", + threshold: 0.4, + prefix_padding_ms: 400, + silence_duration_ms: 1000, + }, + voice: user.personality?.oai_voice ?? "ash", + instructions: systemPrompt, + input_audio_transcription: { model: "whisper-1" }, + }; + await client.connect(sessionOptions as any); + } catch (e: unknown) { + console.log(`Error connecting to OpenAI: ${e as Error}`); + ws.close(); + return; + } + console.log(`Connected to OpenAI successfully!`); + while (messageQueue.length) { + messageHandler(messageQueue.shift(), false); + } +}; diff --git a/server-deno/utils.ts b/server-deno/utils.ts index 03ee3ba..d8a0fa1 100644 --- a/server-deno/utils.ts +++ b/server-deno/utils.ts @@ -3,6 +3,33 @@ import { getUserByEmail } from "./supabase.ts"; import { SupabaseClient } from "@supabase/supabase-js"; import crypto from "node:crypto"; import { Buffer } from "node:buffer"; +import { Encoder } from "@evan/opus"; + +export const defaultVolume = 50; + +// Define your audio parameters +export const SAMPLE_RATE = 24000; // For example, 24000 Hz +const CHANNELS = 1; // Mono (set to 2 if you have stereo) +const FRAME_DURATION = 120; // Frame length in ms +const BYTES_PER_SAMPLE = 2; // 16-bit PCM: 2 bytes per sample +const FRAME_SIZE = (SAMPLE_RATE * FRAME_DURATION / 1000) * CHANNELS * + BYTES_PER_SAMPLE; // 960 bytes for 24000 Hz mono 16-bit + +const encoder = new Encoder({ + channels: CHANNELS, + sample_rate: SAMPLE_RATE, + application: "voip", +}); + +encoder.expert_frame_duration = FRAME_DURATION; +encoder.bitrate = 12000; + +export const openaiApiKey = Deno.env.get("OPENAI_API_KEY"); +export const geminiApiKey = Deno.env.get("GEMINI_API_KEY"); + +export { encoder, FRAME_SIZE }; + +export const isDev = Deno.env.get("DEV_MODE") === "True"; export const authenticateUser = async ( supabaseClient: SupabaseClient,