fix: create playback AudioContext in user gesture for mobile support
All checks were successful
Build & Push / build-and-push (push) Successful in 1m40s

- Playback AudioContext created during startConversation (button click)
- Removed sampleRate constraint from getUserMedia (let browser choose)
- Added audio chunk logging for debugging

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-28 14:34:19 +01:00
parent 34a78e7d4a
commit 6fcebe74af

View File

@@ -206,10 +206,7 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
); );
const playAudioChunk = useCallback((base64Audio: string) => { const playAudioChunk = useCallback((base64Audio: string) => {
if (!playbackContextRef.current) { if (!playbackContextRef.current) return;
playbackContextRef.current = new AudioContext({ sampleRate: 24000 });
nextStartTimeRef.current = playbackContextRef.current.currentTime;
}
const ctx = playbackContextRef.current; const ctx = playbackContextRef.current;
const int16 = base64ToInt16(base64Audio); const int16 = base64ToInt16(base64Audio);
const float32 = int16ToFloat32(int16); const float32 = int16ToFloat32(int16);
@@ -248,13 +245,17 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
const { apiKey, model, config } = tokenData; const { apiKey, model, config } = tokenData;
const stream = await navigator.mediaDevices.getUserMedia({ const stream = await navigator.mediaDevices.getUserMedia({
audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true, noiseSuppression: true }, audio: { channelCount: 1, echoCancellation: true, noiseSuppression: true },
}); });
mediaStreamRef.current = stream; mediaStreamRef.current = stream;
// Create AudioContext for mic capture (must be in user gesture handler) // Create AudioContexts during user gesture (required on mobile)
const audioCtx = new AudioContext({ sampleRate: 16000 }); const audioCtx = new AudioContext({ sampleRate: 16000 });
audioContextRef.current = audioCtx; audioContextRef.current = audioCtx;
// Playback context MUST be created here (user gesture) for mobile
playbackContextRef.current = new AudioContext({ sampleRate: 24000 });
nextStartTimeRef.current = playbackContextRef.current.currentTime;
const source = audioCtx.createMediaStreamSource(stream); const source = audioCtx.createMediaStreamSource(stream);
const analyser = audioCtx.createAnalyser(); const analyser = audioCtx.createAnalyser();
@@ -351,10 +352,12 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
const parts = msg.serverContent.modelTurn?.parts; const parts = msg.serverContent.modelTurn?.parts;
if (parts) { if (parts) {
for (const part of parts) { for (const part of parts) {
if (part.inlineData?.mimeType?.startsWith('audio/')) { if (part.inlineData) {
console.log('[VoiceAgent] Audio chunk received, mime:', part.inlineData.mimeType, 'len:', part.inlineData.data?.length);
playAudioChunk(part.inlineData.data); playAudioChunk(part.inlineData.data);
} }
if (part.text) { if (part.text) {
console.log('[VoiceAgent] Text:', part.text);
addTranscript('agent', part.text); addTranscript('agent', part.text);
} }
} }