From 0e3c92f8731cc7f65d5972bf2dbb0dc1349ad24e Mon Sep 17 00:00:00 2001 From: Matt Date: Sat, 28 Mar 2026 14:15:42 +0100 Subject: [PATCH] fix: correct WebSocket setup and audio format for Gemini Live API - Setup message uses "config" key (not "setup") - Audio sent as realtimeInput.audio (not mediaChunks) - Added message logging for debugging Co-Authored-By: Claude Opus 4.6 (1M context) --- .../configurator/VoiceAgentProvider.tsx | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/components/configurator/VoiceAgentProvider.tsx b/src/components/configurator/VoiceAgentProvider.tsx index b1ecaab..0890fbd 100644 --- a/src/components/configurator/VoiceAgentProvider.tsx +++ b/src/components/configurator/VoiceAgentProvider.tsx @@ -278,14 +278,12 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi wsRef.current = ws; ws.onopen = () => { - // Send setup message with config + // Send setup message — must use "config" key per Gemini Live API spec ws.send(JSON.stringify({ - setup: { + config: { model: `models/${model}`, - generationConfig: { - responseModalities: config.responseModalities, - speechConfig: config.speechConfig, - }, + responseModalities: config.responseModalities, + speechConfig: config.speechConfig, systemInstruction: { parts: [{ text: config.systemInstruction }], }, @@ -300,7 +298,10 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi const base64 = arrayBufferToBase64(event.data.data.int16arrayBuffer); ws.send(JSON.stringify({ realtimeInput: { - mediaChunks: [{ mimeType: 'audio/pcm;rate=16000', data: base64 }], + audio: { + data: base64, + mimeType: 'audio/pcm;rate=16000', + }, }, })); } @@ -308,9 +309,11 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi ws.onmessage = async (event) => { const msg = JSON.parse(event.data as string); + console.log('[VoiceAgent] Message:', JSON.stringify(msg).slice(0, 200)); - // Setup complete - if (msg.setupComplete) { + // Setup complete — Gemini sends back a setupComplete message + if (msg.setupComplete !== undefined) { + console.log('[VoiceAgent] Setup complete, session active'); setStatus('active'); trackAmplitude(); return;