diff --git a/src/components/configurator/VoiceAgentProvider.tsx b/src/components/configurator/VoiceAgentProvider.tsx index 0a51949..6c5360f 100644 --- a/src/components/configurator/VoiceAgentProvider.tsx +++ b/src/components/configurator/VoiceAgentProvider.tsx @@ -206,10 +206,7 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi ); const playAudioChunk = useCallback((base64Audio: string) => { - if (!playbackContextRef.current) { - playbackContextRef.current = new AudioContext({ sampleRate: 24000 }); - nextStartTimeRef.current = playbackContextRef.current.currentTime; - } + if (!playbackContextRef.current) return; const ctx = playbackContextRef.current; const int16 = base64ToInt16(base64Audio); const float32 = int16ToFloat32(int16); @@ -248,13 +245,17 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi const { apiKey, model, config } = tokenData; const stream = await navigator.mediaDevices.getUserMedia({ - audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true, noiseSuppression: true }, + audio: { channelCount: 1, echoCancellation: true, noiseSuppression: true }, }); mediaStreamRef.current = stream; - // Create AudioContext for mic capture (must be in user gesture handler) + // Create AudioContexts during user gesture (required on mobile) const audioCtx = new AudioContext({ sampleRate: 16000 }); audioContextRef.current = audioCtx; + + // Playback context MUST be created here (user gesture) for mobile + playbackContextRef.current = new AudioContext({ sampleRate: 24000 }); + nextStartTimeRef.current = playbackContextRef.current.currentTime; const source = audioCtx.createMediaStreamSource(stream); const analyser = audioCtx.createAnalyser(); @@ -351,10 +352,12 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi const parts = msg.serverContent.modelTurn?.parts; if (parts) { for (const part of parts) { - if (part.inlineData?.mimeType?.startsWith('audio/')) { + if (part.inlineData) { + console.log('[VoiceAgent] Audio chunk received, mime:', part.inlineData.mimeType, 'len:', part.inlineData.data?.length); playAudioChunk(part.inlineData.data); } if (part.text) { + console.log('[VoiceAgent] Text:', part.text); addTranscript('agent', part.text); } }