'use client'; import { createContext, useContext, useState, useRef, useCallback, type ReactNode } from 'react'; import type { WizardFormData } from './WizardContainer'; // ─── Types ─────────────────────────────────────────────────────────────────── export interface TranscriptEntry { role: 'user' | 'agent'; text: string; timestamp: number; } type ConnectionStatus = 'idle' | 'connecting' | 'active' | 'ending' | 'error'; interface VoiceAgentContextValue { status: ConnectionStatus; errorMessage: string | null; isMicActive: boolean; toggleMic: () => void; transcript: TranscriptEntry[]; selections: Partial; isAnalyzingSite: boolean; userAmplitude: number; agentAmplitude: number; startConversation: () => Promise; endConversation: () => void; completedBrief: string | null; completedFormData: WizardFormData | null; } // ─── Context ───────────────────────────────────────────────────────────────── const VoiceAgentContext = createContext(null); export function useVoiceAgent() { const ctx = useContext(VoiceAgentContext); if (!ctx) throw new Error('useVoiceAgent must be used within VoiceAgentProvider'); return ctx; } // ─── Audio Helpers ─────────────────────────────────────────────────────────── function int16ToFloat32(int16: Int16Array): Float32Array { const float32 = new Float32Array(int16.length); for (let i = 0; i < int16.length; i++) { float32[i] = int16[i] / 32768; } return float32; } function base64ToInt16(base64: string): Int16Array { const binary = atob(base64); const bytes = new Uint8Array(binary.length); for (let i = 0; i < binary.length; i++) { bytes[i] = binary.charCodeAt(i); } return new Int16Array(bytes.buffer); } function arrayBufferToBase64(buffer: ArrayBuffer): string { const bytes = new Uint8Array(buffer); let binary = ''; for (let i = 0; i < bytes.length; i++) { binary += String.fromCharCode(bytes[i]); } return btoa(binary); } // ─── Audio Worklet Processor Code ──────────────────────────────────────────── const WORKLET_CODE = ` class AudioRecordingWorklet extends AudioWorkletProcessor { buffer = new Int16Array(2048); bufferWriteIndex = 0; process(inputs) { if (inputs[0].length) { const channel0 = inputs[0][0]; for (let i = 0; i < channel0.length; i++) { const sample = Math.max(-1, Math.min(1, channel0[i])); this.buffer[this.bufferWriteIndex++] = sample * 32767; if (this.bufferWriteIndex >= this.buffer.length) { this.port.postMessage({ event: 'chunk', data: { int16arrayBuffer: this.buffer.slice(0, this.bufferWriteIndex).buffer }, }); this.bufferWriteIndex = 0; } } } return true; } } registerProcessor('audio-recorder-worklet', AudioRecordingWorklet); `; // ─── Default Form Data (mirror WizardContainer) ───────────────────────────── const DEFAULT_FORM_DATA: WizardFormData = { services: [], aiEnabled: false, aiTypes: [], industry: null, scope: '', timeline: null, name: '', company: '', email: '', phone: '', contactPreference: 'email', currentSiteUrl: '', currentSiteThoughts: '', }; // ─── Provider Component ────────────────────────────────────────────────────── interface VoiceAgentProviderProps { locale: string; children: ReactNode; } export default function VoiceAgentProvider({ locale, children }: VoiceAgentProviderProps) { const [status, setStatus] = useState('idle'); const [errorMessage, setErrorMessage] = useState(null); const [isMicActive, setIsMicActive] = useState(true); const [transcript, setTranscript] = useState([]); const [selections, setSelections] = useState>({}); const [isAnalyzingSite, setIsAnalyzingSite] = useState(false); const [userAmplitude, setUserAmplitude] = useState(0); const [agentAmplitude, setAgentAmplitude] = useState(0); const [completedBrief, setCompletedBrief] = useState(null); const [completedFormData, setCompletedFormData] = useState(null); const wsRef = useRef(null); const mediaStreamRef = useRef(null); const audioContextRef = useRef(null); const playbackContextRef = useRef(null); const nextStartTimeRef = useRef(0); const analyserRef = useRef(null); const animFrameRef = useRef(0); const addTranscript = useCallback((role: 'user' | 'agent', text: string) => { setTranscript((prev) => [...prev, { role, text, timestamp: Date.now() }]); }, []); const trackAmplitude = useCallback(() => { if (!analyserRef.current) return; const data = new Uint8Array(analyserRef.current.fftSize); analyserRef.current.getByteTimeDomainData(data); let sum = 0; for (let i = 0; i < data.length; i++) { const v = (data[i] - 128) / 128; sum += v * v; } setUserAmplitude(Math.sqrt(sum / data.length)); animFrameRef.current = requestAnimationFrame(trackAmplitude); }, []); const handleToolCall = useCallback( async (name: string, args: Record, callId: string) => { if (name === 'update_selections') { setSelections((prev) => ({ ...prev, ...(args as Partial) })); return JSON.stringify({ success: true }); } if (name === 'analyze_website') { setIsAnalyzingSite(true); try { const res = await fetch('/api/analyze-site', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ url: args.url }), }); const data = await res.json(); setIsAnalyzingSite(false); return JSON.stringify(data); } catch { setIsAnalyzingSite(false); return JSON.stringify({ success: false, summary: "I wasn't able to analyze that site." }); } } if (name === 'complete_brief') { try { const formData = { ...DEFAULT_FORM_DATA, ...(args as Partial), locale }; const res = await fetch('/api/configure', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(formData), }); const data = (await res.json()) as { success: boolean; brief?: string }; if (data.success && data.brief) { setCompletedBrief(data.brief); setCompletedFormData(formData as WizardFormData); } return JSON.stringify(data); } catch { return JSON.stringify({ success: false, error: 'Brief generation failed' }); } } return JSON.stringify({ error: `Unknown tool: ${name}` }); }, [locale], ); const playAudioChunk = useCallback((base64Audio: string) => { if (!playbackContextRef.current) { playbackContextRef.current = new AudioContext({ sampleRate: 24000 }); nextStartTimeRef.current = playbackContextRef.current.currentTime; } const ctx = playbackContextRef.current; const int16 = base64ToInt16(base64Audio); const float32 = int16ToFloat32(int16); const buffer = ctx.createBuffer(1, float32.length, 24000); buffer.copyToChannel(new Float32Array(float32), 0); const source = ctx.createBufferSource(); source.buffer = buffer; source.connect(ctx.destination); if (nextStartTimeRef.current < ctx.currentTime) { nextStartTimeRef.current = ctx.currentTime; } source.start(nextStartTimeRef.current); nextStartTimeRef.current += buffer.duration; const amplitude = Math.sqrt(float32.reduce((sum, v) => sum + v * v, 0) / float32.length); setAgentAmplitude(amplitude); }, []); const startConversation = useCallback(async () => { setStatus('connecting'); setErrorMessage(null); setTranscript([]); setSelections({}); setCompletedBrief(null); setCompletedFormData(null); try { const tokenRes = await fetch('/api/gemini-token', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ locale }), }); const tokenData = await tokenRes.json(); if (!tokenData.success) throw new Error(`Token generation failed: ${tokenData.error ?? tokenRes.status}`); const { apiKey, model, config } = tokenData; const stream = await navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true, noiseSuppression: true }, }); mediaStreamRef.current = stream; // Create AudioContext for mic capture (must be in user gesture handler) const audioCtx = new AudioContext({ sampleRate: 16000 }); audioContextRef.current = audioCtx; const source = audioCtx.createMediaStreamSource(stream); const analyser = audioCtx.createAnalyser(); analyser.fftSize = 256; source.connect(analyser); analyserRef.current = analyser; // Register AudioWorklet const workletBlob = new Blob([WORKLET_CODE], { type: 'application/javascript' }); const workletUrl = URL.createObjectURL(workletBlob); await audioCtx.audioWorklet.addModule(workletUrl); URL.revokeObjectURL(workletUrl); const workletNode = new AudioWorkletNode(audioCtx, 'audio-recorder-worklet'); source.connect(workletNode); workletNode.connect(audioCtx.destination); // Open WebSocket to Gemini Live API const wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key=${apiKey}`; const ws = new WebSocket(wsUrl); wsRef.current = ws; ws.onopen = () => { // Send setup message with config ws.send(JSON.stringify({ setup: { model: `models/${model}`, generationConfig: { responseModalities: config.responseModalities, speechConfig: config.speechConfig, }, systemInstruction: { parts: [{ text: config.systemInstruction }], }, tools: config.tools, }, })); }; // Send audio chunks from worklet workletNode.port.onmessage = (event) => { if (event.data.event === 'chunk' && ws.readyState === WebSocket.OPEN) { const base64 = arrayBufferToBase64(event.data.data.int16arrayBuffer); ws.send(JSON.stringify({ realtimeInput: { mediaChunks: [{ mimeType: 'audio/pcm;rate=16000', data: base64 }], }, })); } }; ws.onmessage = async (event) => { const msg = JSON.parse(event.data as string); // Setup complete if (msg.setupComplete) { setStatus('active'); trackAmplitude(); return; } // Server content (audio + text) if (msg.serverContent) { const parts = msg.serverContent.modelTurn?.parts; if (parts) { for (const part of parts) { if (part.inlineData?.mimeType?.startsWith('audio/')) { playAudioChunk(part.inlineData.data); } if (part.text) { addTranscript('agent', part.text); } } } // Input transcription if (msg.serverContent.inputTranscription?.text) { addTranscript('user', msg.serverContent.inputTranscription.text); } // Output transcription if (msg.serverContent.outputTranscription?.text) { addTranscript('agent', msg.serverContent.outputTranscription.text); } } // Tool call if (msg.toolCall) { const calls = msg.toolCall.functionCalls; if (calls) { const responses = []; for (const call of calls) { const result = await handleToolCall(call.name, call.args ?? {}, call.id); responses.push({ id: call.id, name: call.name, response: { result } }); } ws.send(JSON.stringify({ toolResponse: { functionResponses: responses } })); } } }; ws.onerror = (e) => { console.error('[VoiceAgent] WebSocket error:', e); setStatus('error'); setErrorMessage('Connection error. Please try again.'); }; ws.onclose = (e) => { console.log('[VoiceAgent] WebSocket closed:', e.code, e.reason); if (status === 'active') { setStatus('idle'); } }; } catch (error) { console.error('[VoiceAgent] Start failed:', error); setStatus('error'); if (error instanceof DOMException && error.name === 'NotAllowedError') { setErrorMessage('Microphone access was denied.'); } else { const msg = error instanceof Error ? error.message : 'Unknown error'; setErrorMessage(`Failed to start: ${msg}`); } } }, [locale, trackAmplitude, handleToolCall, playAudioChunk, addTranscript, status]); const endConversation = useCallback(() => { setStatus('ending'); cancelAnimationFrame(animFrameRef.current); if (wsRef.current) { wsRef.current.close(); wsRef.current = null; } if (mediaStreamRef.current) { mediaStreamRef.current.getTracks().forEach((track) => track.stop()); mediaStreamRef.current = null; } if (audioContextRef.current) { void audioContextRef.current.close(); audioContextRef.current = null; } if (playbackContextRef.current) { void playbackContextRef.current.close(); playbackContextRef.current = null; } setUserAmplitude(0); setAgentAmplitude(0); setStatus('idle'); }, []); const toggleMic = useCallback(() => { if (!mediaStreamRef.current) return; const track = mediaStreamRef.current.getAudioTracks()[0]; if (track) { track.enabled = !track.enabled; setIsMicActive(track.enabled); } }, []); const value: VoiceAgentContextValue = { status, errorMessage, isMicActive, toggleMic, transcript, selections, isAnalyzingSite, userAmplitude, agentAmplitude, startConversation, endConversation, completedBrief, completedFormData, }; return ( {children} ); }