'use client'; import { createContext, useContext, useState, useRef, useCallback, useEffect, type ReactNode } from 'react'; import type { WizardFormData } from './WizardContainer'; // ─── Types ─────────────────────────────────────────────────────────────────── export interface TranscriptEntry { role: 'user' | 'agent'; text: string; timestamp: number; } type ConnectionStatus = 'idle' | 'connecting' | 'active' | 'ending' | 'error'; export interface PendingContact { name: string; email: string; } interface VoiceAgentContextValue { status: ConnectionStatus; errorMessage: string | null; isMicActive: boolean; toggleMic: () => void; transcript: TranscriptEntry[]; selections: Partial; isAnalyzingSite: boolean; isGeneratingBrief: boolean; userAmplitude: number; agentAmplitude: number; startConversation: () => Promise; endConversation: () => void; completedBrief: string | null; completedFormData: WizardFormData | null; pendingContact: PendingContact | null; confirmContact: () => void; updatePendingContact: (field: 'name' | 'email', value: string) => void; canReconnect: boolean; reconnect: () => Promise; } // ─── Context ───────────────────────────────────────────────────────────────── const VoiceAgentContext = createContext(null); export function useVoiceAgent() { const ctx = useContext(VoiceAgentContext); if (!ctx) throw new Error('useVoiceAgent must be used within VoiceAgentProvider'); return ctx; } // ─── Audio Helpers ─────────────────────────────────────────────────────────── function int16ToFloat32(int16: Int16Array): Float32Array { const float32 = new Float32Array(int16.length); for (let i = 0; i < int16.length; i++) { float32[i] = int16[i] / 32768; } return float32; } function base64ToInt16(base64: string): Int16Array { const binary = atob(base64); const bytes = new Uint8Array(binary.length); for (let i = 0; i < binary.length; i++) { bytes[i] = binary.charCodeAt(i); } return new Int16Array(bytes.buffer); } function arrayBufferToBase64(buffer: ArrayBuffer): string { const bytes = new Uint8Array(buffer); let binary = ''; for (let i = 0; i < bytes.length; i++) { binary += String.fromCharCode(bytes[i]); } return btoa(binary); } // ─── Audio Worklet Processor Code ──────────────────────────────────────────── const WORKLET_CODE = ` class AudioRecordingWorklet extends AudioWorkletProcessor { buffer = new Int16Array(2048); bufferWriteIndex = 0; process(inputs) { if (inputs[0].length) { const channel0 = inputs[0][0]; for (let i = 0; i < channel0.length; i++) { const sample = Math.max(-1, Math.min(1, channel0[i])); this.buffer[this.bufferWriteIndex++] = sample * 32767; if (this.bufferWriteIndex >= this.buffer.length) { this.port.postMessage({ event: 'chunk', data: { int16arrayBuffer: this.buffer.slice(0, this.bufferWriteIndex).buffer }, }); this.bufferWriteIndex = 0; } } } return true; } } registerProcessor('audio-recorder-worklet', AudioRecordingWorklet); `; // ─── Default Form Data (mirror WizardContainer) ───────────────────────────── const DEFAULT_FORM_DATA: WizardFormData = { services: [], aiEnabled: false, aiTypes: [], industry: null, scope: '', timeline: null, name: '', company: '', email: '', phone: '', contactPreference: 'email', currentSiteUrl: '', currentSiteThoughts: '', }; // ─── Provider Component ────────────────────────────────────────────────────── interface VoiceAgentProviderProps { locale: string; children: ReactNode; } export default function VoiceAgentProvider({ locale, children }: VoiceAgentProviderProps) { const [status, setStatus] = useState('idle'); const [errorMessage, setErrorMessage] = useState(null); const [isMicActive, setIsMicActive] = useState(true); const [transcript, setTranscript] = useState([]); const [selections, setSelections] = useState>({}); const [isAnalyzingSite, setIsAnalyzingSite] = useState(false); const [isGeneratingBrief, setIsGeneratingBrief] = useState(false); const [userAmplitude, setUserAmplitude] = useState(0); const [agentAmplitude, setAgentAmplitude] = useState(0); const [completedBrief, setCompletedBrief] = useState(null); const [completedFormData, setCompletedFormData] = useState(null); const [pendingContact, setPendingContact] = useState(null); const [canReconnect, setCanReconnect] = useState(false); const turnCompleteRef = useRef(true); const briefSubmittedRef = useRef(false); const pendingContactRef = useRef(null); const reconnectTranscriptRef = useRef([]); const statusRef = useRef('idle'); const wsRef = useRef(null); const mediaStreamRef = useRef(null); const audioContextRef = useRef(null); const playbackContextRef = useRef(null); const nextStartTimeRef = useRef(0); const analyserRef = useRef(null); const animFrameRef = useRef(0); // Keep statusRef in sync for use in closures useEffect(() => { statusRef.current = status; }, [status]); const addTranscript = useCallback((role: 'user' | 'agent', text: string) => { setTranscript((prev) => { const last = prev[prev.length - 1]; // Append to last entry if same role and turn is still ongoing if (last && last.role === role && !turnCompleteRef.current) { return [...prev.slice(0, -1), { ...last, text: last.text + text }]; } turnCompleteRef.current = false; return [...prev, { role, text, timestamp: Date.now() }]; }); }, []); const trackAmplitude = useCallback(() => { if (!analyserRef.current) return; const data = new Uint8Array(analyserRef.current.fftSize); analyserRef.current.getByteTimeDomainData(data); let sum = 0; for (let i = 0; i < data.length; i++) { const v = (data[i] - 128) / 128; sum += v * v; } setUserAmplitude(Math.sqrt(sum / data.length)); animFrameRef.current = requestAnimationFrame(trackAmplitude); }, []); const handleToolCall = useCallback( async (name: string, args: Record, callId: string) => { if (name === 'update_selections') { setSelections((prev) => ({ ...prev, ...(args as Partial) })); return JSON.stringify({ success: true }); } if (name === 'analyze_website') { setIsAnalyzingSite(true); try { const res = await fetch('/api/analyze-site', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ url: args.url }), }); const data = await res.json(); setIsAnalyzingSite(false); return JSON.stringify(data); } catch { setIsAnalyzingSite(false); return JSON.stringify({ success: false, summary: "I wasn't able to analyze that site." }); } } if (name === 'request_contact') { const { name: contactName, email: contactEmail } = args as { name: string; email: string }; const contact = { name: contactName, email: contactEmail }; setPendingContact(contact); pendingContactRef.current = contact; // Respond immediately so Gemini doesn't timeout waiting for a tool response. // The agent is told to wait — user confirmation comes as a text message via confirmContact(). return JSON.stringify({ success: true, message: 'Contact card is now shown on screen. Wait for the user to review and confirm before calling complete_brief. Do not proceed until you hear confirmation.' }); } if (name === 'complete_brief') { // Prevent duplicate submissions if (briefSubmittedRef.current) return JSON.stringify({ success: true, message: 'Brief already submitted' }); briefSubmittedRef.current = true; setIsGeneratingBrief(true); console.log('[VoiceAgent] complete_brief called, generating...'); try { const toolArgs = args as Partial & { conversationSummary?: string }; const summary = toolArgs.conversationSummary ?? ''; const existingScope = toolArgs.scope ?? ''; const combinedScope = [existingScope, summary].filter(Boolean).join('\n\n'); // Use confirmed contact details from the on-screen card if available const contactName = pendingContactRef.current?.name ?? toolArgs.name ?? ''; const contactEmail = pendingContactRef.current?.email ?? toolArgs.email ?? ''; const formData = { ...DEFAULT_FORM_DATA, ...toolArgs, name: contactName, email: contactEmail, scope: combinedScope, locale }; delete (formData as Record).conversationSummary; const res = await fetch('/api/configure', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(formData), }); const data = (await res.json()) as { success: boolean; brief?: string }; console.log('[VoiceAgent] Brief API response:', data.success); if (data.success && data.brief) { setCompletedBrief(data.brief); setCompletedFormData(formData as WizardFormData); console.log('[VoiceAgent] completedBrief and completedFormData set'); } return JSON.stringify({ success: true }); } catch (err) { console.error('[VoiceAgent] Brief generation failed:', err); briefSubmittedRef.current = false; return JSON.stringify({ success: false, error: 'Brief generation failed' }); } } return JSON.stringify({ error: `Unknown tool: ${name}` }); }, [locale], ); const playAudioChunk = useCallback((base64Audio: string) => { if (!playbackContextRef.current) return; const ctx = playbackContextRef.current; const int16 = base64ToInt16(base64Audio); const float32 = int16ToFloat32(int16); const buffer = ctx.createBuffer(1, float32.length, 24000); buffer.copyToChannel(new Float32Array(float32), 0); const source = ctx.createBufferSource(); source.buffer = buffer; source.connect(ctx.destination); if (nextStartTimeRef.current < ctx.currentTime) { nextStartTimeRef.current = ctx.currentTime; } source.start(nextStartTimeRef.current); nextStartTimeRef.current += buffer.duration; const amplitude = Math.sqrt(float32.reduce((sum, v) => sum + v * v, 0) / float32.length); setAgentAmplitude(amplitude); }, []); const startConversation = useCallback(async () => { setStatus('connecting'); setErrorMessage(null); setCanReconnect(false); // Only reset transcript/selections on fresh start (not reconnect) if (reconnectTranscriptRef.current.length === 0) { setTranscript([]); setSelections({}); setPendingContact(null); pendingContactRef.current = null; } setCompletedBrief(null); setCompletedFormData(null); briefSubmittedRef.current = false; try { const tokenRes = await fetch('/api/gemini-token', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ locale }), }); const tokenData = await tokenRes.json(); if (!tokenData.success) throw new Error(`Token generation failed: ${tokenData.error ?? tokenRes.status}`); const { apiKey, model, config } = tokenData; const stream = await navigator.mediaDevices.getUserMedia({ audio: { channelCount: 1, echoCancellation: true, noiseSuppression: true }, }); mediaStreamRef.current = stream; // Create AudioContexts during user gesture (required on mobile) const audioCtx = new AudioContext({ sampleRate: 16000 }); audioContextRef.current = audioCtx; // Playback context MUST be created here (user gesture) for mobile playbackContextRef.current = new AudioContext({ sampleRate: 24000 }); nextStartTimeRef.current = playbackContextRef.current.currentTime; const source = audioCtx.createMediaStreamSource(stream); const analyser = audioCtx.createAnalyser(); analyser.fftSize = 256; source.connect(analyser); analyserRef.current = analyser; // Register AudioWorklet const workletBlob = new Blob([WORKLET_CODE], { type: 'application/javascript' }); const workletUrl = URL.createObjectURL(workletBlob); await audioCtx.audioWorklet.addModule(workletUrl); URL.revokeObjectURL(workletUrl); const workletNode = new AudioWorkletNode(audioCtx, 'audio-recorder-worklet'); source.connect(workletNode); workletNode.connect(audioCtx.destination); // Open WebSocket to Gemini Live API const wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key=${apiKey}`; console.log('[VoiceAgent] Connecting to WebSocket...'); const ws = new WebSocket(wsUrl); wsRef.current = ws; // Timeout if setup doesn't complete within 10 seconds const setupTimeout = setTimeout(() => { if (ws.readyState !== WebSocket.CLOSED) { console.error('[VoiceAgent] Setup timed out after 10s'); ws.close(); setStatus('error'); setErrorMessage('Connection timed out. Please try again.'); } }, 10_000); ws.onopen = () => { console.log('[VoiceAgent] WebSocket opened, sending setup...'); ws.send(JSON.stringify({ setup: { model: `models/${model}`, generationConfig: { responseModalities: config.responseModalities, speechConfig: config.speechConfig, }, systemInstruction: { parts: [{ text: config.systemInstruction }], }, tools: config.tools, }, })); }; // Send audio chunks from worklet workletNode.port.onmessage = (event) => { if (event.data.event === 'chunk' && ws.readyState === WebSocket.OPEN) { const base64 = arrayBufferToBase64(event.data.data.int16arrayBuffer); ws.send(JSON.stringify({ realtimeInput: { audio: { data: base64, mimeType: 'audio/pcm;rate=16000', }, }, })); } }; ws.onmessage = async (event) => { let raw: string; if (event.data instanceof Blob) { raw = await event.data.text(); } else { raw = event.data as string; } const msg = JSON.parse(raw); console.log('[VoiceAgent] Message:', JSON.stringify(msg).slice(0, 200)); // Setup complete — Gemini sends back a setupComplete message if (msg.setupComplete !== undefined) { console.log('[VoiceAgent] Setup complete, session active'); clearTimeout(setupTimeout); setStatus('active'); trackAmplitude(); // If reconnecting, seed with prior conversation context const priorTranscript = reconnectTranscriptRef.current; if (priorTranscript.length > 0) { const summary = priorTranscript .map((e) => `${e.role === 'user' ? 'User' : 'Agent'}: ${e.text}`) .join('\n'); ws.send(JSON.stringify({ realtimeInput: { text: `We were having a conversation but got disconnected. Here is what was discussed so far:\n\n${summary}\n\nPlease acknowledge the reconnection briefly and continue where we left off.`, }, })); reconnectTranscriptRef.current = []; } else { // Prompt the agent to introduce itself ws.send(JSON.stringify({ realtimeInput: { text: 'Hello, please introduce yourself.', }, })); } return; } // Server content (audio + text) if (msg.serverContent) { const parts = msg.serverContent.modelTurn?.parts; if (parts) { for (const part of parts) { if (part.inlineData) { console.log('[VoiceAgent] Audio chunk received, mime:', part.inlineData.mimeType, 'len:', part.inlineData.data?.length); playAudioChunk(part.inlineData.data); } if (part.text) { console.log('[VoiceAgent] Text:', part.text); addTranscript('agent', part.text); } } } // Input transcription if (msg.serverContent.inputTranscription?.text) { addTranscript('user', msg.serverContent.inputTranscription.text); } // Output transcription if (msg.serverContent.outputTranscription?.text) { addTranscript('agent', msg.serverContent.outputTranscription.text); } // Turn complete — next output starts a new transcript entry if (msg.serverContent.turnComplete || msg.serverContent.generationComplete) { turnCompleteRef.current = true; } } // Tool call if (msg.toolCall) { const calls = msg.toolCall.functionCalls; if (calls) { const responses = []; for (const call of calls) { const result = await handleToolCall(call.name, call.args ?? {}, call.id); responses.push({ id: call.id, name: call.name, response: { result } }); } ws.send(JSON.stringify({ toolResponse: { functionResponses: responses } })); } } }; ws.onerror = (e) => { console.error('[VoiceAgent] WebSocket error:', e); setStatus('error'); setErrorMessage('Connection error. Please try again.'); }; ws.onclose = (e) => { console.log('[VoiceAgent] WebSocket closed:', e.code, e.reason); // Clean up audio but preserve transcript and selections cancelAnimationFrame(animFrameRef.current); if (mediaStreamRef.current) { mediaStreamRef.current.getTracks().forEach((track) => track.stop()); mediaStreamRef.current = null; } if (audioContextRef.current) { void audioContextRef.current.close(); audioContextRef.current = null; } if (playbackContextRef.current) { void playbackContextRef.current.close(); playbackContextRef.current = null; } wsRef.current = null; setUserAmplitude(0); setAgentAmplitude(0); // If we weren't intentionally ending, allow reconnect if (statusRef.current !== 'ending' && !briefSubmittedRef.current) { setStatus('error'); setErrorMessage(null); setCanReconnect(true); } }; } catch (error) { console.error('[VoiceAgent] Start failed:', error); setStatus('error'); if (error instanceof DOMException && error.name === 'NotAllowedError') { setErrorMessage('Microphone access was denied.'); } else { const msg = error instanceof Error ? error.message : 'Unknown error'; setErrorMessage(`Failed to start: ${msg}`); } } }, [locale, trackAmplitude, handleToolCall, playAudioChunk, addTranscript]); const endConversation = useCallback(() => { setStatus('ending'); cancelAnimationFrame(animFrameRef.current); if (wsRef.current) { wsRef.current.close(); wsRef.current = null; } if (mediaStreamRef.current) { mediaStreamRef.current.getTracks().forEach((track) => track.stop()); mediaStreamRef.current = null; } if (audioContextRef.current) { void audioContextRef.current.close(); audioContextRef.current = null; } if (playbackContextRef.current) { void playbackContextRef.current.close(); playbackContextRef.current = null; } setUserAmplitude(0); setAgentAmplitude(0); setCanReconnect(false); reconnectTranscriptRef.current = []; setStatus('idle'); }, []); const updatePendingContact = useCallback((field: 'name' | 'email', value: string) => { setPendingContact((prev) => { if (!prev) return null; const updated = { ...prev, [field]: value }; pendingContactRef.current = updated; return updated; }); }, []); const confirmContact = useCallback(() => { if (!pendingContactRef.current) return; // Send a text message to let the agent know the user confirmed their details if (wsRef.current?.readyState === WebSocket.OPEN) { const { name, email } = pendingContactRef.current; wsRef.current.send(JSON.stringify({ realtimeInput: { text: `The user has confirmed their contact details on screen. Name: ${name}, Email: ${email}. You may now call complete_brief.`, }, })); console.log('[VoiceAgent] Contact confirmed, notified agent'); } else { console.warn('[VoiceAgent] Cannot confirm contact — WebSocket not open'); } }, []); const reconnect = useCallback(async () => { setCanReconnect(false); setErrorMessage(null); // Preserve transcript for the new session to pick up context reconnectTranscriptRef.current = transcript; await startConversation(); }, [startConversation, transcript]); const toggleMic = useCallback(() => { if (!mediaStreamRef.current) return; const track = mediaStreamRef.current.getAudioTracks()[0]; if (track) { track.enabled = !track.enabled; setIsMicActive(track.enabled); } }, []); const value: VoiceAgentContextValue = { status, errorMessage, isMicActive, toggleMic, transcript, selections, isAnalyzingSite, isGeneratingBrief, userAmplitude, agentAmplitude, startConversation, endConversation, completedBrief, completedFormData, pendingContact, confirmContact, updatePendingContact, canReconnect, reconnect, }; return ( {children} ); }