fix: correct WebSocket setup and audio format for Gemini Live API
All checks were successful
Build & Push / build-and-push (push) Successful in 1m30s

- Setup message uses "config" key (not "setup")
- Audio sent as realtimeInput.audio (not mediaChunks)
- Added message logging for debugging

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-28 14:15:42 +01:00
parent 15136080ed
commit 0e3c92f873

View File

@@ -278,14 +278,12 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
wsRef.current = ws; wsRef.current = ws;
ws.onopen = () => { ws.onopen = () => {
// Send setup message with config // Send setup message — must use "config" key per Gemini Live API spec
ws.send(JSON.stringify({ ws.send(JSON.stringify({
setup: { config: {
model: `models/${model}`, model: `models/${model}`,
generationConfig: {
responseModalities: config.responseModalities, responseModalities: config.responseModalities,
speechConfig: config.speechConfig, speechConfig: config.speechConfig,
},
systemInstruction: { systemInstruction: {
parts: [{ text: config.systemInstruction }], parts: [{ text: config.systemInstruction }],
}, },
@@ -300,7 +298,10 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
const base64 = arrayBufferToBase64(event.data.data.int16arrayBuffer); const base64 = arrayBufferToBase64(event.data.data.int16arrayBuffer);
ws.send(JSON.stringify({ ws.send(JSON.stringify({
realtimeInput: { realtimeInput: {
mediaChunks: [{ mimeType: 'audio/pcm;rate=16000', data: base64 }], audio: {
data: base64,
mimeType: 'audio/pcm;rate=16000',
},
}, },
})); }));
} }
@@ -308,9 +309,11 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
ws.onmessage = async (event) => { ws.onmessage = async (event) => {
const msg = JSON.parse(event.data as string); const msg = JSON.parse(event.data as string);
console.log('[VoiceAgent] Message:', JSON.stringify(msg).slice(0, 200));
// Setup complete // Setup complete — Gemini sends back a setupComplete message
if (msg.setupComplete) { if (msg.setupComplete !== undefined) {
console.log('[VoiceAgent] Setup complete, session active');
setStatus('active'); setStatus('active');
trackAmplitude(); trackAmplitude();
return; return;