test cloudflare DO
This commit is contained in:
parent
b8c4ae661c
commit
c8748c0535
9 changed files with 487 additions and 82 deletions
|
|
@ -50,7 +50,7 @@ const uint16_t ws_port = 8787;
|
|||
#endif
|
||||
|
||||
// Backend server details
|
||||
const char *backend_server = "192.168.1.121";
|
||||
const char *backend_server = "192.168.1.33";
|
||||
const uint16_t backend_port = 3000;
|
||||
|
||||
#elif defined(PROD_MODE)
|
||||
|
|
@ -60,7 +60,7 @@ const char *ws_server = "<your-edge-server>.deno.dev";
|
|||
const uint16_t ws_port = 443;
|
||||
const char *ws_path = "/";
|
||||
#elif defined(VOICE_SERVER_CLOUDFLARE)
|
||||
const char *ws_server = "<your-cloudflare-worker>.workers.dev";
|
||||
const char *ws_server = "<your-cloudflare-server>.workers.dev";
|
||||
const uint16_t ws_port = 443;
|
||||
const char *ws_path = "/ws/esp32";
|
||||
#endif
|
||||
|
|
@ -75,9 +75,9 @@ const char *ws_server = "talkedge.deno.dev";
|
|||
const uint16_t ws_port = 443;
|
||||
const char *ws_path = "/";
|
||||
#elif defined(VOICE_SERVER_CLOUDFLARE)
|
||||
const char *ws_server = "<your-cloudflare-worker>.workers.dev";
|
||||
const char *ws_server = "elato.akash-b25.workers.dev";
|
||||
const uint16_t ws_port = 443;
|
||||
const char *ws_path = "/ws/openai/default";
|
||||
const char *ws_path = "/ws/esp32";
|
||||
#endif
|
||||
// Backend server details
|
||||
const char *backend_server = "www.elatoai.com"; // like www.facebook.com or facebook.vercel.app
|
||||
|
|
@ -123,8 +123,7 @@ const char *Vercel_CA_cert = R"EOF(
|
|||
-----END CERTIFICATE-----
|
||||
)EOF";
|
||||
|
||||
// Voice websocket CA cert
|
||||
// add the CA cert for your selected voice websocket server here `ws_server`
|
||||
// Voice websocket CA cert for the selected voice websocket server (`ws_server`).
|
||||
const char *CA_cert = R"EOF(
|
||||
-----BEGIN CERTIFICATE-----
|
||||
<YOUR TALKEDGE CERTIFICATE HERE>
|
||||
|
|
@ -169,7 +168,8 @@ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
|||
)EOF";
|
||||
|
||||
|
||||
// Voice websocket CA cert
|
||||
#if defined(VOICE_SERVER_DENO)
|
||||
// Voice websocket CA cert for `talkedge.deno.dev`
|
||||
const char *CA_cert = R"EOF(
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
||||
|
|
@ -203,5 +203,41 @@ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
|||
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
||||
-----END CERTIFICATE-----
|
||||
)EOF";
|
||||
#elif defined(VOICE_SERVER_CLOUDFLARE)
|
||||
// Voice websocket CA cert for `elato.akash-b25.workers.dev`
|
||||
const char *CA_cert = R"EOF(
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
||||
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
||||
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
||||
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
||||
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
||||
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
||||
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
||||
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
||||
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
||||
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
||||
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
||||
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
||||
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
||||
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
||||
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
||||
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
||||
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
||||
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
||||
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
||||
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
||||
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
||||
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
||||
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
||||
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
||||
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
||||
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
||||
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
||||
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
||||
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
||||
-----END CERTIFICATE-----
|
||||
)EOF";
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@
|
|||
// Pick one of the following (DEV_MODE, PROD_MODE, ELATO_MODE) , comment the rest
|
||||
// For ELATO_MODE, you will need to register your DIY Hardware on the Elato website
|
||||
|
||||
#define DEV_MODE
|
||||
// #define PROD_MODE
|
||||
// #define DEV_MODE
|
||||
#define PROD_MODE
|
||||
// #define ELATO_MODE
|
||||
|
||||
// ---------- CHOOSE YOUR VOICE SERVER ----------
|
||||
|
|
|
|||
|
|
@ -2,17 +2,17 @@
|
|||
|
||||
Cloudflare Workers + Durable Objects voice backend for Elato.
|
||||
|
||||
This starts with one model path:
|
||||
This starts with one ESP32-compatible websocket path:
|
||||
|
||||
- `/ws/openai`
|
||||
- `/ws/esp32`
|
||||
|
||||
Under the hood that route is rewritten into a Durable Object agent using Cloudflare Agents SDK and `@cloudflare/voice`.
|
||||
The route is backed by a Durable Object that preserves the Elato device control protocol.
|
||||
|
||||
## Current stack
|
||||
|
||||
- STT: `WorkersAIFluxSTT`
|
||||
- STT: `@cf/openai/whisper`
|
||||
- LLM: OpenAI Chat Completions
|
||||
- TTS: `WorkersAITTS`
|
||||
- TTS: `@cf/deepgram/aura-1`
|
||||
|
||||
## Local setup
|
||||
|
||||
|
|
@ -32,11 +32,15 @@ npm run dev
|
|||
|
||||
## Notes
|
||||
|
||||
- Browser / Next.js clients should connect with a token query param, for example:
|
||||
- ESP32 clients should connect to:
|
||||
|
||||
```text
|
||||
wss://<worker-domain>/ws/openai?token=<jwt>&session=<session-id>
|
||||
wss://<worker-domain>/ws/esp32
|
||||
```
|
||||
|
||||
- ESP32 clients can keep sending `Authorization: Bearer <token>` headers, but this backend is currently built around Cloudflare Voice's browser-style PCM websocket flow, not the existing Elato ESP32 control protocol.
|
||||
- For ESP32 parity, we will likely need a Cloudflare-side shim or a separate ESP32-specific route.
|
||||
- Auth is intentionally left out of this iteration. Add your own auth check in the Worker route before using this in production.
|
||||
- This backend now targets the current Elato ESP32 control protocol first:
|
||||
`auth`, `AUDIO.COMMITTED`, `RESPONSE.CREATED`, binary audio frames, `RESPONSE.COMPLETE`, and `SESSION.END`.
|
||||
- It does not currently use `@cloudflare/voice`; the Durable Object owns the websocket session directly so the firmware protocol stays explicit.
|
||||
- The ESP32 route now packetizes Cloudflare TTS output into Opus frames before sending binary websocket packets, matching the same 24kHz mono / 120ms framing shape used by `server-deno`.
|
||||
- The remaining gap is operational, not transport-level: this prototype still has placeholder auth / DB comments and has not been load-tested against long-running device sessions yet.
|
||||
|
|
|
|||
|
|
@ -1,39 +1,52 @@
|
|||
import { Agent } from "agents";
|
||||
import {
|
||||
WorkersAIFluxSTT,
|
||||
WorkersAITTS,
|
||||
withVoice,
|
||||
type VoiceTurnContext,
|
||||
} from "@cloudflare/voice";
|
||||
|
||||
import { getSystemPrompt } from "../src/prompt";
|
||||
import type { Env } from "../src/types";
|
||||
import { createOpusPacketizer } from "../src/opus";
|
||||
import { getSystemPrompt } from "../src/prompt";
|
||||
|
||||
const VoiceAgent = withVoice(Agent);
|
||||
const AUDIO_OUTPUT_SAMPLE_RATE = 24_000;
|
||||
|
||||
interface OpenAIChatMessage {
|
||||
role: "system" | "user" | "assistant";
|
||||
content: string;
|
||||
}
|
||||
|
||||
interface SessionState {
|
||||
history: OpenAIChatMessage[];
|
||||
}
|
||||
|
||||
function createAuthMessage() {
|
||||
return {
|
||||
type: "auth",
|
||||
volume_control: 100,
|
||||
pitch_factor: 1,
|
||||
is_ota: false,
|
||||
is_reset: false,
|
||||
};
|
||||
}
|
||||
|
||||
function createServerMessage(msg: string, extra: Record<string, unknown> = {}) {
|
||||
return JSON.stringify({
|
||||
type: "server",
|
||||
msg,
|
||||
...extra,
|
||||
});
|
||||
}
|
||||
|
||||
async function transcribePcm(env: Env, audio: Uint8Array): Promise<string> {
|
||||
const response = await env.AI.run("@cf/openai/whisper", {
|
||||
audio: [...audio],
|
||||
}) as { text?: string };
|
||||
|
||||
return response.text?.trim() || "";
|
||||
}
|
||||
|
||||
async function generateOpenAIReply(
|
||||
env: Env,
|
||||
transcript: string,
|
||||
context: VoiceTurnContext,
|
||||
history: OpenAIChatMessage[],
|
||||
): Promise<string> {
|
||||
const messages: OpenAIChatMessage[] = [
|
||||
{ role: "system", content: getSystemPrompt(env) },
|
||||
...context.messages
|
||||
.filter(
|
||||
(message) =>
|
||||
(message.role === "user" || message.role === "assistant") &&
|
||||
typeof message.content === "string" &&
|
||||
message.content.length > 0,
|
||||
)
|
||||
.map((message) => ({
|
||||
role: message.role as "user" | "assistant",
|
||||
content: message.content,
|
||||
})),
|
||||
...history,
|
||||
{ role: "user", content: transcript },
|
||||
];
|
||||
|
||||
|
|
@ -48,20 +61,14 @@ async function generateOpenAIReply(
|
|||
messages,
|
||||
temperature: 0.7,
|
||||
}),
|
||||
signal: context.signal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`OpenAI request failed: ${response.status} ${errorText}`);
|
||||
throw new Error(`OpenAI request failed: ${response.status} ${await response.text()}`);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as {
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
content?: string;
|
||||
};
|
||||
}>;
|
||||
choices?: Array<{ message?: { content?: string } }>;
|
||||
};
|
||||
|
||||
return (
|
||||
|
|
@ -70,18 +77,170 @@ async function generateOpenAIReply(
|
|||
);
|
||||
}
|
||||
|
||||
export class ElatoOpenAiVoiceAgent extends VoiceAgent<Env> {
|
||||
transcriber = new WorkersAIFluxSTT(this.env.AI);
|
||||
async function synthesizeSpeech(env: Env, text: string): Promise<Response> {
|
||||
return env.AI.run(
|
||||
"@cf/deepgram/aura-1",
|
||||
{
|
||||
text,
|
||||
speaker: "asteria",
|
||||
encoding: "linear16",
|
||||
container: "none",
|
||||
sample_rate: AUDIO_OUTPUT_SAMPLE_RATE,
|
||||
},
|
||||
{
|
||||
returnRawResponse: true,
|
||||
},
|
||||
) as Promise<Response>;
|
||||
}
|
||||
|
||||
tts = new WorkersAITTS(this.env.AI, {
|
||||
model: "@cf/deepgram/aura-1",
|
||||
speaker: "asteria",
|
||||
});
|
||||
export class ElatoOpenAiVoiceAgent {
|
||||
private audioBuffer = new Uint8Array(0);
|
||||
private isGenerating = false;
|
||||
private readonly ctx: DurableObjectState;
|
||||
private readonly env: Env;
|
||||
|
||||
async onTurn(
|
||||
transcript: string,
|
||||
context: VoiceTurnContext,
|
||||
): Promise<string> {
|
||||
return generateOpenAIReply(this.env, transcript, context);
|
||||
constructor(ctx: DurableObjectState, env: Env) {
|
||||
this.ctx = ctx;
|
||||
this.env = env;
|
||||
}
|
||||
|
||||
private appendAudio(chunk: Uint8Array) {
|
||||
const next = new Uint8Array(this.audioBuffer.length + chunk.length);
|
||||
next.set(this.audioBuffer, 0);
|
||||
next.set(chunk, this.audioBuffer.length);
|
||||
this.audioBuffer = next;
|
||||
}
|
||||
|
||||
private async loadSessionState(): Promise<SessionState> {
|
||||
const stored = await this.ctx.storage.get<SessionState>("session_state");
|
||||
return stored || { history: [] };
|
||||
}
|
||||
|
||||
private async saveSessionState(state: SessionState) {
|
||||
await this.ctx.storage.put("session_state", state);
|
||||
}
|
||||
|
||||
private resetBufferedAudio() {
|
||||
this.audioBuffer = new Uint8Array(0);
|
||||
}
|
||||
|
||||
private async handleTurn(
|
||||
websocket: WebSocket,
|
||||
opus: Awaited<ReturnType<typeof createOpusPacketizer>>,
|
||||
) {
|
||||
const pcm = this.audioBuffer;
|
||||
this.resetBufferedAudio();
|
||||
|
||||
if (pcm.byteLength === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
websocket.send(createServerMessage("AUDIO.COMMITTED"));
|
||||
|
||||
const transcript = await transcribePcm(this.env, pcm);
|
||||
if (!transcript) {
|
||||
websocket.send(createServerMessage("RESPONSE.ERROR"));
|
||||
return;
|
||||
}
|
||||
/* Add user transcript DB call here */
|
||||
|
||||
const session = await this.loadSessionState();
|
||||
const reply = await generateOpenAIReply(this.env, transcript, session.history);
|
||||
session.history.push(
|
||||
{ role: "user", content: transcript },
|
||||
{ role: "assistant", content: reply },
|
||||
);
|
||||
await this.saveSessionState(session);
|
||||
/* Add AI transcript DB call here */
|
||||
|
||||
opus.reset();
|
||||
websocket.send(createServerMessage("RESPONSE.CREATED"));
|
||||
|
||||
const ttsResponse = await synthesizeSpeech(this.env, reply);
|
||||
if (!ttsResponse.ok || !ttsResponse.body) {
|
||||
websocket.send(createServerMessage("RESPONSE.ERROR"));
|
||||
return;
|
||||
}
|
||||
|
||||
const reader = ttsResponse.body.getReader();
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
if (value) {
|
||||
opus.push(value);
|
||||
}
|
||||
}
|
||||
opus.flush(true);
|
||||
websocket.send(createServerMessage("RESPONSE.COMPLETE", { volume_control: 100 }));
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
|
||||
async fetch(request: Request): Promise<Response> {
|
||||
if (request.headers.get("Upgrade") !== "websocket") {
|
||||
return new Response("Expected websocket", { status: 426 });
|
||||
}
|
||||
|
||||
const pair = new WebSocketPair();
|
||||
const [client, server] = Object.values(pair);
|
||||
server.accept();
|
||||
const opus = await createOpusPacketizer((packet) => server.send(packet));
|
||||
|
||||
server.send(JSON.stringify(createAuthMessage()));
|
||||
|
||||
server.addEventListener("message", (event) => {
|
||||
void this.ctx.blockConcurrencyWhile(async () => {
|
||||
if (typeof event.data !== "string") {
|
||||
this.appendAudio(new Uint8Array(event.data as ArrayBuffer));
|
||||
return;
|
||||
}
|
||||
|
||||
const message = JSON.parse(event.data) as {
|
||||
type?: string;
|
||||
msg?: string;
|
||||
};
|
||||
|
||||
if (message.type !== "instruction") {
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.msg === "end_of_speech") {
|
||||
if (this.isGenerating) {
|
||||
return;
|
||||
}
|
||||
this.isGenerating = true;
|
||||
try {
|
||||
await this.handleTurn(server, opus);
|
||||
} catch {
|
||||
server.send(createServerMessage("RESPONSE.ERROR"));
|
||||
} finally {
|
||||
this.isGenerating = false;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.msg === "INTERRUPT") {
|
||||
this.isGenerating = false;
|
||||
this.resetBufferedAudio();
|
||||
server.send(createServerMessage("RESPONSE.COMPLETE", { volume_control: 100 }));
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.msg === "END_SESSION") {
|
||||
server.send(createServerMessage("SESSION.END"));
|
||||
server.close(1000, "Session ended");
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
server.addEventListener("close", () => {
|
||||
this.isGenerating = false;
|
||||
this.resetBufferedAudio();
|
||||
opus.close();
|
||||
});
|
||||
|
||||
return new Response(null, { status: 101, webSocket: client });
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,17 +1,11 @@
|
|||
import { routeAgentRequest } from "agents";
|
||||
|
||||
import { requireAuthorizedRequest } from "./auth";
|
||||
import type { Env } from "./types";
|
||||
|
||||
export { ElatoOpenAiVoiceAgent } from "../models/openai";
|
||||
|
||||
function rewriteOpenAIRequest(request: Request): Request {
|
||||
const originalUrl = new URL(request.url);
|
||||
const pathParts = originalUrl.pathname.split("/").filter(Boolean);
|
||||
const sessionName = pathParts[2] || originalUrl.searchParams.get("session") || "default";
|
||||
|
||||
originalUrl.pathname = `/agents/elato-open-ai-voice-agent/${encodeURIComponent(sessionName)}`;
|
||||
return new Request(originalUrl.toString(), request);
|
||||
function sessionNameFromRequest(request: Request): string {
|
||||
const url = new URL(request.url);
|
||||
const pathParts = url.pathname.split("/").filter(Boolean);
|
||||
return pathParts[2] || url.searchParams.get("session") || "default";
|
||||
}
|
||||
|
||||
export default {
|
||||
|
|
@ -22,19 +16,15 @@ export default {
|
|||
return Response.json({ ok: true, backend: "cloudflare-voice" });
|
||||
}
|
||||
|
||||
let routedRequest = request;
|
||||
if (url.pathname === "/ws/openai" || url.pathname.startsWith("/ws/openai/")) {
|
||||
routedRequest = rewriteOpenAIRequest(request);
|
||||
if (url.pathname === "/ws/esp32" || url.pathname.startsWith("/ws/esp32/")) {
|
||||
/* Add AUTH here */
|
||||
|
||||
const stub = env.ElatoOpenAiVoiceAgent.get(
|
||||
env.ElatoOpenAiVoiceAgent.idFromName(sessionNameFromRequest(request)),
|
||||
);
|
||||
return stub.fetch(request);
|
||||
}
|
||||
|
||||
return (
|
||||
(await routeAgentRequest(routedRequest, env, {
|
||||
cors: true,
|
||||
onBeforeConnect: async (incomingRequest) =>
|
||||
requireAuthorizedRequest(incomingRequest, env),
|
||||
onBeforeRequest: async (incomingRequest) =>
|
||||
requireAuthorizedRequest(incomingRequest, env),
|
||||
})) ?? new Response("Not found", { status: 404 })
|
||||
);
|
||||
return new Response("Not found", { status: 404 });
|
||||
},
|
||||
};
|
||||
|
|
|
|||
4
server-cloudflare/src/modules.d.ts
vendored
Normal file
4
server-cloudflare/src/modules.d.ts
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
declare module "*.wasm" {
|
||||
const module: WebAssembly.Module;
|
||||
export default module;
|
||||
}
|
||||
212
server-cloudflare/src/opus.ts
Normal file
212
server-cloudflare/src/opus.ts
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
import opusWasm from "./vendor/opus.wasm";
|
||||
|
||||
const SAMPLE_RATE = 24_000;
|
||||
const CHANNELS = 1;
|
||||
const FRAME_DURATION_MS = 120;
|
||||
const BYTES_PER_SAMPLE = 2;
|
||||
const FRAME_SIZE_BYTES =
|
||||
(SAMPLE_RATE * FRAME_DURATION_MS / 1000) * CHANNELS * BYTES_PER_SAMPLE;
|
||||
const MAX_PACKET_SIZE = 2 ** 13;
|
||||
const PCM_BUFFER_SIZE = 2 ** 15;
|
||||
|
||||
interface OpusExports {
|
||||
memory: WebAssembly.Memory;
|
||||
malloc(size: number): number;
|
||||
free(ptr: number): void;
|
||||
opus_strerror(code: number): number;
|
||||
opus_encoder_get_size(channels: number): number;
|
||||
opus_encoder_init(
|
||||
ptr: number,
|
||||
sampleRate: number,
|
||||
channels: number,
|
||||
application: number,
|
||||
): number;
|
||||
opus_encoder_ctl_get(ptr: number, cmd: number): number;
|
||||
opus_encoder_ctl_set(ptr: number, cmd: number, arg: number): number;
|
||||
opus_encode(
|
||||
ptr: number,
|
||||
pcmPtr: number,
|
||||
frameSize: number,
|
||||
packetPtr: number,
|
||||
maxPacketSize: number,
|
||||
): number;
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
const wasmInstancePromise = WebAssembly.instantiate(opusWasm, {
|
||||
wasi_snapshot_preview1: {
|
||||
fd_seek() {},
|
||||
fd_write() {},
|
||||
fd_close() {},
|
||||
proc_exit() {},
|
||||
},
|
||||
env: {
|
||||
emscripten_notify_memory_growth() {},
|
||||
},
|
||||
});
|
||||
|
||||
function loadCString(memory: Uint8Array, ptr: number): string {
|
||||
let end = ptr;
|
||||
while (end < memory.length && memory[end] !== 0) {
|
||||
end += 1;
|
||||
}
|
||||
return decoder.decode(memory.subarray(ptr, end));
|
||||
}
|
||||
|
||||
function ensureOk(code: number, memory: Uint8Array, wasm: OpusExports): number {
|
||||
if (code >= 0) {
|
||||
return code;
|
||||
}
|
||||
throw new Error(`opus: ${loadCString(memory, wasm.opus_strerror(code))}`);
|
||||
}
|
||||
|
||||
class WasmOpusEncoder {
|
||||
private readonly wasm: OpusExports;
|
||||
private memory: Uint8Array;
|
||||
private readonly pcmPtr: number;
|
||||
private readonly packetPtr: number;
|
||||
private readonly encoderPtr: number;
|
||||
private closed = false;
|
||||
|
||||
constructor(wasm: OpusExports) {
|
||||
this.wasm = wasm;
|
||||
this.memory = new Uint8Array(wasm.memory.buffer);
|
||||
this.packetPtr = wasm.malloc(MAX_PACKET_SIZE);
|
||||
this.pcmPtr = wasm.malloc(PCM_BUFFER_SIZE);
|
||||
this.encoderPtr = wasm.malloc(wasm.opus_encoder_get_size(CHANNELS));
|
||||
|
||||
ensureOk(
|
||||
wasm.opus_encoder_init(this.encoderPtr, SAMPLE_RATE, CHANNELS, 2048),
|
||||
this.memory,
|
||||
wasm,
|
||||
);
|
||||
ensureOk(wasm.opus_encoder_ctl_set(this.encoderPtr, 4002, 24_000), this.memory, wasm);
|
||||
ensureOk(wasm.opus_encoder_ctl_set(this.encoderPtr, 4040, FRAME_DURATION_MS), this.memory, wasm);
|
||||
}
|
||||
|
||||
private refreshMemory() {
|
||||
if (this.memory.buffer !== this.wasm.memory.buffer) {
|
||||
this.memory = new Uint8Array(this.wasm.memory.buffer);
|
||||
}
|
||||
}
|
||||
|
||||
encode(frame: Uint8Array): Uint8Array {
|
||||
if (this.closed) {
|
||||
throw new Error("Opus encoder is closed");
|
||||
}
|
||||
|
||||
this.refreshMemory();
|
||||
this.memory.set(frame, this.pcmPtr);
|
||||
const size = ensureOk(
|
||||
this.wasm.opus_encode(
|
||||
this.encoderPtr,
|
||||
this.pcmPtr,
|
||||
frame.byteLength / BYTES_PER_SAMPLE / CHANNELS,
|
||||
this.packetPtr,
|
||||
MAX_PACKET_SIZE,
|
||||
),
|
||||
this.memory,
|
||||
this.wasm,
|
||||
);
|
||||
|
||||
this.refreshMemory();
|
||||
return this.memory.slice(this.packetPtr, this.packetPtr + size);
|
||||
}
|
||||
|
||||
close() {
|
||||
if (this.closed) {
|
||||
return;
|
||||
}
|
||||
this.closed = true;
|
||||
this.wasm.free(this.encoderPtr);
|
||||
this.wasm.free(this.pcmPtr);
|
||||
this.wasm.free(this.packetPtr);
|
||||
}
|
||||
}
|
||||
|
||||
let encoderFactoryPromise: Promise<() => WasmOpusEncoder> | undefined;
|
||||
|
||||
async function getEncoderFactory(): Promise<() => WasmOpusEncoder> {
|
||||
if (!encoderFactoryPromise) {
|
||||
encoderFactoryPromise = wasmInstancePromise.then((instance) => {
|
||||
const wasm = instance.exports as unknown as OpusExports;
|
||||
return () => new WasmOpusEncoder(wasm);
|
||||
});
|
||||
}
|
||||
|
||||
return encoderFactoryPromise;
|
||||
}
|
||||
|
||||
function concatBytes(left: Uint8Array, right: Uint8Array): Uint8Array {
|
||||
const out = new Uint8Array(left.length + right.length);
|
||||
out.set(left, 0);
|
||||
out.set(right, left.length);
|
||||
return out;
|
||||
}
|
||||
|
||||
export async function createOpusPacketizer(
|
||||
sendPacket: (packet: Uint8Array) => void,
|
||||
) {
|
||||
const createEncoder = await getEncoderFactory();
|
||||
const encoder = createEncoder();
|
||||
let pending: Uint8Array<ArrayBufferLike> = new Uint8Array(0);
|
||||
let closed = false;
|
||||
|
||||
const push = (pcm: Uint8Array) => {
|
||||
if (closed || pcm.byteLength === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
pending = concatBytes(pending, pcm);
|
||||
while (pending.byteLength >= FRAME_SIZE_BYTES) {
|
||||
const frame = pending.slice(0, FRAME_SIZE_BYTES);
|
||||
pending = pending.slice(FRAME_SIZE_BYTES);
|
||||
sendPacket(encoder.encode(frame));
|
||||
}
|
||||
};
|
||||
|
||||
const flush = (padFinalFrame = false) => {
|
||||
if (closed || pending.byteLength === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!padFinalFrame) {
|
||||
pending = new Uint8Array(0);
|
||||
return;
|
||||
}
|
||||
|
||||
const padded = new Uint8Array(FRAME_SIZE_BYTES);
|
||||
padded.set(pending, 0);
|
||||
pending = new Uint8Array(0);
|
||||
sendPacket(encoder.encode(padded));
|
||||
};
|
||||
|
||||
const reset = () => {
|
||||
pending = new Uint8Array(0);
|
||||
};
|
||||
|
||||
const close = () => {
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
closed = true;
|
||||
pending = new Uint8Array(0);
|
||||
encoder.close();
|
||||
};
|
||||
|
||||
const bufferedBytes = () => pending.byteLength;
|
||||
|
||||
return {
|
||||
push,
|
||||
flush,
|
||||
reset,
|
||||
close,
|
||||
bufferedBytes,
|
||||
};
|
||||
}
|
||||
|
||||
export {
|
||||
SAMPLE_RATE as OPUS_SAMPLE_RATE,
|
||||
FRAME_SIZE_BYTES as OPUS_FRAME_SIZE_BYTES,
|
||||
};
|
||||
BIN
server-cloudflare/src/vendor/opus.wasm
vendored
Executable file
BIN
server-cloudflare/src/vendor/opus.wasm
vendored
Executable file
Binary file not shown.
|
|
@ -9,5 +9,5 @@
|
|||
"skipLibCheck": true,
|
||||
"noEmit": true
|
||||
},
|
||||
"include": ["src/**/*.ts", "models/**/*.ts"]
|
||||
"include": ["src/**/*.ts", "src/**/*.d.ts", "models/**/*.ts"]
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue