Files
LetsBeBiz-Site/src/components/configurator/VoiceAgentProvider.tsx

453 lines
16 KiB
TypeScript
Raw Normal View History

'use client';
import { createContext, useContext, useState, useRef, useCallback, type ReactNode } from 'react';
import type { WizardFormData } from './WizardContainer';
// ─── Types ───────────────────────────────────────────────────────────────────
export interface TranscriptEntry {
role: 'user' | 'agent';
text: string;
timestamp: number;
}
type ConnectionStatus = 'idle' | 'connecting' | 'active' | 'ending' | 'error';
interface VoiceAgentContextValue {
status: ConnectionStatus;
errorMessage: string | null;
isMicActive: boolean;
toggleMic: () => void;
transcript: TranscriptEntry[];
selections: Partial<WizardFormData>;
isAnalyzingSite: boolean;
userAmplitude: number;
agentAmplitude: number;
startConversation: () => Promise<void>;
endConversation: () => void;
completedBrief: string | null;
completedFormData: WizardFormData | null;
}
// ─── Context ─────────────────────────────────────────────────────────────────
const VoiceAgentContext = createContext<VoiceAgentContextValue | null>(null);
export function useVoiceAgent() {
const ctx = useContext(VoiceAgentContext);
if (!ctx) throw new Error('useVoiceAgent must be used within VoiceAgentProvider');
return ctx;
}
// ─── Audio Helpers ───────────────────────────────────────────────────────────
function int16ToFloat32(int16: Int16Array): Float32Array {
const float32 = new Float32Array(int16.length);
for (let i = 0; i < int16.length; i++) {
float32[i] = int16[i] / 32768;
}
return float32;
}
function base64ToInt16(base64: string): Int16Array {
const binary = atob(base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return new Int16Array(bytes.buffer);
}
function arrayBufferToBase64(buffer: ArrayBuffer): string {
const bytes = new Uint8Array(buffer);
let binary = '';
for (let i = 0; i < bytes.length; i++) {
binary += String.fromCharCode(bytes[i]);
}
return btoa(binary);
}
// ─── Audio Worklet Processor Code ────────────────────────────────────────────
const WORKLET_CODE = `
class AudioRecordingWorklet extends AudioWorkletProcessor {
buffer = new Int16Array(2048);
bufferWriteIndex = 0;
process(inputs) {
if (inputs[0].length) {
const channel0 = inputs[0][0];
for (let i = 0; i < channel0.length; i++) {
const sample = Math.max(-1, Math.min(1, channel0[i]));
this.buffer[this.bufferWriteIndex++] = sample * 32767;
if (this.bufferWriteIndex >= this.buffer.length) {
this.port.postMessage({
event: 'chunk',
data: { int16arrayBuffer: this.buffer.slice(0, this.bufferWriteIndex).buffer },
});
this.bufferWriteIndex = 0;
}
}
}
return true;
}
}
registerProcessor('audio-recorder-worklet', AudioRecordingWorklet);
`;
// ─── Default Form Data (mirror WizardContainer) ─────────────────────────────
const DEFAULT_FORM_DATA: WizardFormData = {
services: [],
aiEnabled: false,
aiTypes: [],
industry: null,
scope: '',
timeline: null,
name: '',
company: '',
email: '',
phone: '',
contactPreference: 'email',
currentSiteUrl: '',
currentSiteThoughts: '',
};
// ─── Provider Component ──────────────────────────────────────────────────────
interface VoiceAgentProviderProps {
locale: string;
children: ReactNode;
}
export default function VoiceAgentProvider({ locale, children }: VoiceAgentProviderProps) {
const [status, setStatus] = useState<ConnectionStatus>('idle');
const [errorMessage, setErrorMessage] = useState<string | null>(null);
const [isMicActive, setIsMicActive] = useState(true);
const [transcript, setTranscript] = useState<TranscriptEntry[]>([]);
const [selections, setSelections] = useState<Partial<WizardFormData>>({});
const [isAnalyzingSite, setIsAnalyzingSite] = useState(false);
const [userAmplitude, setUserAmplitude] = useState(0);
const [agentAmplitude, setAgentAmplitude] = useState(0);
const [completedBrief, setCompletedBrief] = useState<string | null>(null);
const [completedFormData, setCompletedFormData] = useState<WizardFormData | null>(null);
const wsRef = useRef<WebSocket | null>(null);
const mediaStreamRef = useRef<MediaStream | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const playbackContextRef = useRef<AudioContext | null>(null);
const nextStartTimeRef = useRef(0);
const analyserRef = useRef<AnalyserNode | null>(null);
const animFrameRef = useRef<number>(0);
const addTranscript = useCallback((role: 'user' | 'agent', text: string) => {
setTranscript((prev) => [...prev, { role, text, timestamp: Date.now() }]);
}, []);
const trackAmplitude = useCallback(() => {
if (!analyserRef.current) return;
const data = new Uint8Array(analyserRef.current.fftSize);
analyserRef.current.getByteTimeDomainData(data);
let sum = 0;
for (let i = 0; i < data.length; i++) {
const v = (data[i] - 128) / 128;
sum += v * v;
}
setUserAmplitude(Math.sqrt(sum / data.length));
animFrameRef.current = requestAnimationFrame(trackAmplitude);
}, []);
const handleToolCall = useCallback(
async (name: string, args: Record<string, unknown>, callId: string) => {
if (name === 'update_selections') {
setSelections((prev) => ({ ...prev, ...(args as Partial<WizardFormData>) }));
return JSON.stringify({ success: true });
}
if (name === 'analyze_website') {
setIsAnalyzingSite(true);
try {
const res = await fetch('/api/analyze-site', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url: args.url }),
});
const data = await res.json();
setIsAnalyzingSite(false);
return JSON.stringify(data);
} catch {
setIsAnalyzingSite(false);
return JSON.stringify({ success: false, summary: "I wasn't able to analyze that site." });
}
}
if (name === 'complete_brief') {
try {
const formData = { ...DEFAULT_FORM_DATA, ...(args as Partial<WizardFormData>), locale };
const res = await fetch('/api/configure', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(formData),
});
const data = (await res.json()) as { success: boolean; brief?: string };
if (data.success && data.brief) {
setCompletedBrief(data.brief);
setCompletedFormData(formData as WizardFormData);
}
return JSON.stringify(data);
} catch {
return JSON.stringify({ success: false, error: 'Brief generation failed' });
}
}
return JSON.stringify({ error: `Unknown tool: ${name}` });
},
[locale],
);
const playAudioChunk = useCallback((base64Audio: string) => {
if (!playbackContextRef.current) {
playbackContextRef.current = new AudioContext({ sampleRate: 24000 });
nextStartTimeRef.current = playbackContextRef.current.currentTime;
}
const ctx = playbackContextRef.current;
const int16 = base64ToInt16(base64Audio);
const float32 = int16ToFloat32(int16);
const buffer = ctx.createBuffer(1, float32.length, 24000);
buffer.copyToChannel(new Float32Array(float32), 0);
const source = ctx.createBufferSource();
source.buffer = buffer;
source.connect(ctx.destination);
if (nextStartTimeRef.current < ctx.currentTime) {
nextStartTimeRef.current = ctx.currentTime;
}
source.start(nextStartTimeRef.current);
nextStartTimeRef.current += buffer.duration;
const amplitude = Math.sqrt(float32.reduce((sum, v) => sum + v * v, 0) / float32.length);
setAgentAmplitude(amplitude);
}, []);
const startConversation = useCallback(async () => {
setStatus('connecting');
setErrorMessage(null);
setTranscript([]);
setSelections({});
setCompletedBrief(null);
setCompletedFormData(null);
try {
const tokenRes = await fetch('/api/gemini-token', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ locale }),
});
const tokenData = await tokenRes.json();
if (!tokenData.success) throw new Error(`Token generation failed: ${tokenData.error ?? tokenRes.status}`);
const { apiKey, model, config } = tokenData;
const stream = await navigator.mediaDevices.getUserMedia({
audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true, noiseSuppression: true },
});
mediaStreamRef.current = stream;
// Create AudioContext for mic capture (must be in user gesture handler)
const audioCtx = new AudioContext({ sampleRate: 16000 });
audioContextRef.current = audioCtx;
const source = audioCtx.createMediaStreamSource(stream);
const analyser = audioCtx.createAnalyser();
analyser.fftSize = 256;
source.connect(analyser);
analyserRef.current = analyser;
// Register AudioWorklet
const workletBlob = new Blob([WORKLET_CODE], { type: 'application/javascript' });
const workletUrl = URL.createObjectURL(workletBlob);
await audioCtx.audioWorklet.addModule(workletUrl);
URL.revokeObjectURL(workletUrl);
const workletNode = new AudioWorkletNode(audioCtx, 'audio-recorder-worklet');
source.connect(workletNode);
workletNode.connect(audioCtx.destination);
// Open WebSocket to Gemini Live API
const wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key=${apiKey}`;
console.log('[VoiceAgent] Connecting to WebSocket...');
const ws = new WebSocket(wsUrl);
wsRef.current = ws;
// Timeout if setup doesn't complete within 10 seconds
const setupTimeout = setTimeout(() => {
if (ws.readyState !== WebSocket.CLOSED) {
console.error('[VoiceAgent] Setup timed out after 10s');
ws.close();
setStatus('error');
setErrorMessage('Connection timed out. Please try again.');
}
}, 10_000);
ws.onopen = () => {
console.log('[VoiceAgent] WebSocket opened, sending config...');
// Send setup message — must use "config" key per Gemini Live API spec
ws.send(JSON.stringify({
config: {
model: `models/${model}`,
responseModalities: config.responseModalities,
speechConfig: config.speechConfig,
systemInstruction: {
parts: [{ text: config.systemInstruction }],
},
tools: config.tools,
},
}));
};
// Send audio chunks from worklet
workletNode.port.onmessage = (event) => {
if (event.data.event === 'chunk' && ws.readyState === WebSocket.OPEN) {
const base64 = arrayBufferToBase64(event.data.data.int16arrayBuffer);
ws.send(JSON.stringify({
realtimeInput: {
audio: {
data: base64,
mimeType: 'audio/pcm;rate=16000',
},
},
}));
}
};
ws.onmessage = async (event) => {
const msg = JSON.parse(event.data as string);
console.log('[VoiceAgent] Message:', JSON.stringify(msg).slice(0, 200));
// Setup complete — Gemini sends back a setupComplete message
if (msg.setupComplete !== undefined) {
console.log('[VoiceAgent] Setup complete, session active');
clearTimeout(setupTimeout);
setStatus('active');
trackAmplitude();
return;
}
// Server content (audio + text)
if (msg.serverContent) {
const parts = msg.serverContent.modelTurn?.parts;
if (parts) {
for (const part of parts) {
if (part.inlineData?.mimeType?.startsWith('audio/')) {
playAudioChunk(part.inlineData.data);
}
if (part.text) {
addTranscript('agent', part.text);
}
}
}
// Input transcription
if (msg.serverContent.inputTranscription?.text) {
addTranscript('user', msg.serverContent.inputTranscription.text);
}
// Output transcription
if (msg.serverContent.outputTranscription?.text) {
addTranscript('agent', msg.serverContent.outputTranscription.text);
}
}
// Tool call
if (msg.toolCall) {
const calls = msg.toolCall.functionCalls;
if (calls) {
const responses = [];
for (const call of calls) {
const result = await handleToolCall(call.name, call.args ?? {}, call.id);
responses.push({ id: call.id, name: call.name, response: { result } });
}
ws.send(JSON.stringify({ toolResponse: { functionResponses: responses } }));
}
}
};
ws.onerror = (e) => {
console.error('[VoiceAgent] WebSocket error:', e);
setStatus('error');
setErrorMessage('Connection error. Please try again.');
};
ws.onclose = (e) => {
console.log('[VoiceAgent] WebSocket closed:', e.code, e.reason);
if (status === 'active') {
setStatus('idle');
}
};
} catch (error) {
console.error('[VoiceAgent] Start failed:', error);
setStatus('error');
if (error instanceof DOMException && error.name === 'NotAllowedError') {
setErrorMessage('Microphone access was denied.');
} else {
const msg = error instanceof Error ? error.message : 'Unknown error';
setErrorMessage(`Failed to start: ${msg}`);
}
}
}, [locale, trackAmplitude, handleToolCall, playAudioChunk, addTranscript, status]);
const endConversation = useCallback(() => {
setStatus('ending');
cancelAnimationFrame(animFrameRef.current);
if (wsRef.current) {
wsRef.current.close();
wsRef.current = null;
}
if (mediaStreamRef.current) {
mediaStreamRef.current.getTracks().forEach((track) => track.stop());
mediaStreamRef.current = null;
}
if (audioContextRef.current) {
void audioContextRef.current.close();
audioContextRef.current = null;
}
if (playbackContextRef.current) {
void playbackContextRef.current.close();
playbackContextRef.current = null;
}
setUserAmplitude(0);
setAgentAmplitude(0);
setStatus('idle');
}, []);
const toggleMic = useCallback(() => {
if (!mediaStreamRef.current) return;
const track = mediaStreamRef.current.getAudioTracks()[0];
if (track) {
track.enabled = !track.enabled;
setIsMicActive(track.enabled);
}
}, []);
const value: VoiceAgentContextValue = {
status,
errorMessage,
isMicActive,
toggleMic,
transcript,
selections,
isAnalyzingSite,
userAmplitude,
agentAmplitude,
startConversation,
endConversation,
completedBrief,
completedFormData,
};
return (
<VoiceAgentContext.Provider value={value}>
{children}
</VoiceAgentContext.Provider>
);
}