cloudflare test

This commit is contained in:
akdeb 2026-04-17 10:45:07 +05:30
parent 7a6a7887fc
commit b8c4ae661c
15 changed files with 4619 additions and 8 deletions

View file

@ -16,6 +16,13 @@ volatile bool sleepRequested = false;
* 2. `DEV_MODE` requires updating the IP addresses to your local network IP
* 3. Without `DEV_MODE` defined, the firmware will use your production servers
*
* VOICE BACKEND SELECTION:
* --------------------------------
* Keep the deployment mode (DEV/PROD/ELATO) separate from the voice backend
* (`VOICE_SERVER_DENO` vs `VOICE_SERVER_CLOUDFLARE`).
* This avoids an extra device-side config lookup while still letting us switch
* websocket backends at build time.
*
* DEV SETUP (find your local IP address using ifconfig):
* - WebSocket: Your local IP (e.g., 192.168.1.100:8000)
* - Backend: Your local IP (e.g., 192.168.1.100:3000)
@ -33,27 +40,45 @@ volatile bool sleepRequested = false;
*/
#ifdef DEV_MODE
const char *ws_server = "192.168.1.121";
const uint16_t ws_port = 8000;
const char *ws_server = "192.168.1.33";
const char *ws_path = "/";
#if defined(VOICE_SERVER_DENO)
const uint16_t ws_port = 8000;
#elif defined(VOICE_SERVER_CLOUDFLARE)
const uint16_t ws_port = 8787;
#endif
// Backend server details
const char *backend_server = "192.168.1.121";
const uint16_t backend_port = 3000;
#elif defined(PROD_MODE)
// PROD
#if defined(VOICE_SERVER_DENO)
const char *ws_server = "<your-edge-server>.deno.dev";
const uint16_t ws_port = 443;
const char *ws_path = "/";
#elif defined(VOICE_SERVER_CLOUDFLARE)
const char *ws_server = "<your-cloudflare-worker>.workers.dev";
const uint16_t ws_port = 443;
const char *ws_path = "/ws/esp32";
#endif
// Backend server details
const char *backend_server = "<your-backend-server-url>"; // like www.facebook.com or facebook.vercel.app
const uint16_t backend_port = 3000;
#elif defined(ELATO_MODE)
// ELATO
#if defined(VOICE_SERVER_DENO)
const char *ws_server = "talkedge.deno.dev";
const uint16_t ws_port = 443;
const char *ws_path = "/";
#elif defined(VOICE_SERVER_CLOUDFLARE)
const char *ws_server = "<your-cloudflare-worker>.workers.dev";
const uint16_t ws_port = 443;
const char *ws_path = "/ws/openai/default";
#endif
// Backend server details
const char *backend_server = "www.elatoai.com"; // like www.facebook.com or facebook.vercel.app
const uint16_t backend_port = 3000;
@ -98,8 +123,8 @@ const char *Vercel_CA_cert = R"EOF(
-----END CERTIFICATE-----
)EOF";
// Deno Edge Functions CA cert
// add the CA cert for your edge server here `ws_server`
// Voice websocket CA cert
// add the CA cert for your selected voice websocket server here `ws_server`
const char *CA_cert = R"EOF(
-----BEGIN CERTIFICATE-----
<YOUR TALKEDGE CERTIFICATE HERE>
@ -144,7 +169,7 @@ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
)EOF";
// talkedge.deno.dev CA cert
// Voice websocket CA cert
const char *CA_cert = R"EOF(
-----BEGIN CERTIFICATE-----
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
@ -179,4 +204,4 @@ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
-----END CERTIFICATE-----
)EOF";
#endif
#endif

View file

@ -16,8 +16,15 @@
// #define PROD_MODE
// #define ELATO_MODE
// ---------- CHOOSE YOUR VOICE SERVER ----------
// Keep this separate from DEV/PROD/ELATO so the deployment mode and the voice backend stay independent.
// Pick one backend for websocket voice traffic.
// ---------- Touch mode ----------
// #define VOICE_SERVER_DENO
#define VOICE_SERVER_CLOUDFLARE
// ---------- CHOOSE YOUR INPUT MODE ----------
// If you want to use the touch sensor to wake up the device, uncomment the following line
// If you want to use the button to wake up the device, comment the following line
#define TOUCH_MODE
@ -101,4 +108,4 @@ void factoryResetDevice();
void resetAuth();
void processSleepRequest();
#endif
#endif

View file

@ -0,0 +1,4 @@
OPENAI_API_KEY=your_openai_api_key_here
JWT_SECRET_KEY=replace_with_your_jwt_secret_key
ELATO_OPENAI_MODEL=gpt-4.1-mini
ELATO_OPENAI_SYSTEM_PROMPT=You are an Elato voice companion. Be concise, playful, and easy to understand when spoken aloud.

3
server-cloudflare/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
node_modules
.dev.vars
.wrangler

View file

@ -0,0 +1,42 @@
# server-cloudflare
Cloudflare Workers + Durable Objects voice backend for Elato.
This starts with one model path:
- `/ws/openai`
Under the hood that route is rewritten into a Durable Object agent using Cloudflare Agents SDK and `@cloudflare/voice`.
## Current stack
- STT: `WorkersAIFluxSTT`
- LLM: OpenAI Chat Completions
- TTS: `WorkersAITTS`
## Local setup
1. Install dependencies
```bash
npm install
```
2. Copy `.dev.vars.example` to `.dev.vars` and fill in your keys.
3. Run locally
```bash
npm run dev
```
## Notes
- Browser / Next.js clients should connect with a token query param, for example:
```text
wss://<worker-domain>/ws/openai?token=<jwt>&session=<session-id>
```
- ESP32 clients can keep sending `Authorization: Bearer <token>` headers, but this backend is currently built around Cloudflare Voice's browser-style PCM websocket flow, not the existing Elato ESP32 control protocol.
- For ESP32 parity, we will likely need a Cloudflare-side shim or a separate ESP32-specific route.

View file

@ -0,0 +1,87 @@
import { Agent } from "agents";
import {
WorkersAIFluxSTT,
WorkersAITTS,
withVoice,
type VoiceTurnContext,
} from "@cloudflare/voice";
import { getSystemPrompt } from "../src/prompt";
import type { Env } from "../src/types";
const VoiceAgent = withVoice(Agent);
interface OpenAIChatMessage {
role: "system" | "user" | "assistant";
content: string;
}
async function generateOpenAIReply(
env: Env,
transcript: string,
context: VoiceTurnContext,
): Promise<string> {
const messages: OpenAIChatMessage[] = [
{ role: "system", content: getSystemPrompt(env) },
...context.messages
.filter(
(message) =>
(message.role === "user" || message.role === "assistant") &&
typeof message.content === "string" &&
message.content.length > 0,
)
.map((message) => ({
role: message.role as "user" | "assistant",
content: message.content,
})),
{ role: "user", content: transcript },
];
const response = await fetch("https://api.openai.com/v1/chat/completions", {
method: "POST",
headers: {
Authorization: `Bearer ${env.OPENAI_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: env.ELATO_OPENAI_MODEL || "gpt-4.1-mini",
messages,
temperature: 0.7,
}),
signal: context.signal,
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`OpenAI request failed: ${response.status} ${errorText}`);
}
const data = (await response.json()) as {
choices?: Array<{
message?: {
content?: string;
};
}>;
};
return (
data.choices?.[0]?.message?.content?.trim() ||
"I heard you, but I do not have a response yet."
);
}
export class ElatoOpenAiVoiceAgent extends VoiceAgent<Env> {
transcriber = new WorkersAIFluxSTT(this.env.AI);
tts = new WorkersAITTS(this.env.AI, {
model: "@cf/deepgram/aura-1",
speaker: "asteria",
});
async onTurn(
transcript: string,
context: VoiceTurnContext,
): Promise<string> {
return generateOpenAIReply(this.env, transcript, context);
}
}

4289
server-cloudflare/package-lock.json generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,20 @@
{
"name": "server-cloudflare",
"private": true,
"type": "module",
"scripts": {
"dev": "wrangler dev --ip 0.0.0.0 --port 8787",
"deploy": "wrangler deploy",
"typecheck": "tsc --noEmit"
},
"dependencies": {
"@cloudflare/voice": "latest",
"agents": "latest",
"jose": "^6.0.0"
},
"devDependencies": {
"@cloudflare/workers-types": "^4.20260417.0",
"typescript": "^5.8.3",
"wrangler": "latest"
}
}

View file

@ -0,0 +1,46 @@
import { jwtVerify } from "jose";
import type { Env } from "./types";
export interface AuthPayload {
email?: string;
sub?: string;
}
export function getAuthToken(request: Request): string | null {
const authHeader = request.headers.get("authorization");
if (authHeader?.startsWith("Bearer ")) {
return authHeader.slice("Bearer ".length);
}
const url = new URL(request.url);
return url.searchParams.get("token");
}
export async function verifyAuthToken(
token: string,
env: Env,
): Promise<AuthPayload> {
const secret = new TextEncoder().encode(env.JWT_SECRET_KEY);
const { payload } = await jwtVerify(token, secret);
return {
email: typeof payload.email === "string" ? payload.email : undefined,
sub: typeof payload.sub === "string" ? payload.sub : undefined,
};
}
export async function requireAuthorizedRequest(
request: Request,
env: Env,
): Promise<Response | void> {
const token = getAuthToken(request);
if (!token) {
return new Response("Unauthorized", { status: 401 });
}
try {
await verifyAuthToken(token, env);
} catch {
return new Response("Unauthorized", { status: 401 });
}
}

View file

@ -0,0 +1,40 @@
import { routeAgentRequest } from "agents";
import { requireAuthorizedRequest } from "./auth";
import type { Env } from "./types";
export { ElatoOpenAiVoiceAgent } from "../models/openai";
function rewriteOpenAIRequest(request: Request): Request {
const originalUrl = new URL(request.url);
const pathParts = originalUrl.pathname.split("/").filter(Boolean);
const sessionName = pathParts[2] || originalUrl.searchParams.get("session") || "default";
originalUrl.pathname = `/agents/elato-open-ai-voice-agent/${encodeURIComponent(sessionName)}`;
return new Request(originalUrl.toString(), request);
}
export default {
async fetch(request: Request, env: Env): Promise<Response> {
const url = new URL(request.url);
if (url.pathname === "/healthz") {
return Response.json({ ok: true, backend: "cloudflare-voice" });
}
let routedRequest = request;
if (url.pathname === "/ws/openai" || url.pathname.startsWith("/ws/openai/")) {
routedRequest = rewriteOpenAIRequest(request);
}
return (
(await routeAgentRequest(routedRequest, env, {
cors: true,
onBeforeConnect: async (incomingRequest) =>
requireAuthorizedRequest(incomingRequest, env),
onBeforeRequest: async (incomingRequest) =>
requireAuthorizedRequest(incomingRequest, env),
})) ?? new Response("Not found", { status: 404 })
);
},
};

View file

@ -0,0 +1,8 @@
import type { Env } from "./types";
const DEFAULT_PROMPT =
"You are an Elato voice companion. Keep responses concise, natural to speak aloud, and friendly for a realtime conversation.";
export function getSystemPrompt(env: Env): string {
return env.ELATO_OPENAI_SYSTEM_PROMPT?.trim() || DEFAULT_PROMPT;
}

View file

@ -0,0 +1,8 @@
export interface Env {
AI: Ai;
JWT_SECRET_KEY: string;
OPENAI_API_KEY: string;
ELATO_OPENAI_MODEL?: string;
ELATO_OPENAI_SYSTEM_PROMPT?: string;
ElatoOpenAiVoiceAgent: DurableObjectNamespace;
}

View file

@ -0,0 +1,13 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "ESNext",
"moduleResolution": "Bundler",
"strict": true,
"lib": ["ES2022", "WebWorker"],
"types": ["@cloudflare/workers-types"],
"skipLibCheck": true,
"noEmit": true
},
"include": ["src/**/*.ts", "models/**/*.ts"]
}

View file

@ -0,0 +1,15 @@
name = "elato-cloudflare-voice"
main = "src/index.ts"
compatibility_date = "2026-04-17"
compatibility_flags = ["nodejs_compat"]
[ai]
binding = "AI"
[[durable_objects.bindings]]
name = "ElatoOpenAiVoiceAgent"
class_name = "ElatoOpenAiVoiceAgent"
[[migrations]]
tag = "v1"
new_sqlite_classes = ["ElatoOpenAiVoiceAgent"]

4
server-fastapi/.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
.venv/
.uv-cache/
__pycache__/
*.pyc