LetsBeBiz-Site/src/components/configurator/VoiceAgentProvider.tsx

'use client';

import { createContext, useContext, useState, useRef, useCallback, type ReactNode } from 'react';
import type { WizardFormData } from './WizardContainer';

// ─── Types ───────────────────────────────────────────────────────────────────

export interface TranscriptEntry {
  role: 'user' | 'agent';
  text: string;
  timestamp: number;
}

type ConnectionStatus = 'idle' | 'connecting' | 'active' | 'ending' | 'error';

interface VoiceAgentContextValue {
  status: ConnectionStatus;
  errorMessage: string | null;
  isMicActive: boolean;
  toggleMic: () => void;
  transcript: TranscriptEntry[];
  selections: Partial<WizardFormData>;
  isAnalyzingSite: boolean;
  userAmplitude: number;
  agentAmplitude: number;
  startConversation: () => Promise<void>;
  endConversation: () => void;
  completedBrief: string | null;
  completedFormData: WizardFormData | null;
}

// ─── Context ─────────────────────────────────────────────────────────────────

const VoiceAgentContext = createContext<VoiceAgentContextValue | null>(null);

export function useVoiceAgent() {
  const ctx = useContext(VoiceAgentContext);
  if (!ctx) throw new Error('useVoiceAgent must be used within VoiceAgentProvider');
  return ctx;
}

// ─── Audio Helpers ───────────────────────────────────────────────────────────

function int16ToFloat32(int16: Int16Array): Float32Array {
  const float32 = new Float32Array(int16.length);
  for (let i = 0; i < int16.length; i++) {
    float32[i] = int16[i] / 32768;
  }
  return float32;
}

function base64ToInt16(base64: string): Int16Array {
  const binary = atob(base64);
  const bytes = new Uint8Array(binary.length);
  for (let i = 0; i < binary.length; i++) {
    bytes[i] = binary.charCodeAt(i);
  }
  return new Int16Array(bytes.buffer);
}

function arrayBufferToBase64(buffer: ArrayBuffer): string {
  const bytes = new Uint8Array(buffer);
  let binary = '';
  for (let i = 0; i < bytes.length; i++) {
    binary += String.fromCharCode(bytes[i]);
  }
  return btoa(binary);
}

// ─── Audio Worklet Processor Code ────────────────────────────────────────────

const WORKLET_CODE = `
class AudioRecordingWorklet extends AudioWorkletProcessor {
  buffer = new Int16Array(2048);
  bufferWriteIndex = 0;

  process(inputs) {
    if (inputs[0].length) {
      const channel0 = inputs[0][0];
      for (let i = 0; i < channel0.length; i++) {
        const sample = Math.max(-1, Math.min(1, channel0[i]));
        this.buffer[this.bufferWriteIndex++] = sample * 32767;
        if (this.bufferWriteIndex >= this.buffer.length) {
          this.port.postMessage({
            event: 'chunk',
            data: { int16arrayBuffer: this.buffer.slice(0, this.bufferWriteIndex).buffer },
          });
          this.bufferWriteIndex = 0;
        }
      }
    }
    return true;
  }
}
registerProcessor('audio-recorder-worklet', AudioRecordingWorklet);
`;

// ─── Default Form Data (mirror WizardContainer) ─────────────────────────────

const DEFAULT_FORM_DATA: WizardFormData = {
  services: [],
  aiEnabled: false,
  aiTypes: [],
  industry: null,
  scope: '',
  timeline: null,
  name: '',
  company: '',
  email: '',
  phone: '',
  contactPreference: 'email',
  currentSiteUrl: '',
  currentSiteThoughts: '',
};

// ─── Provider Component ──────────────────────────────────────────────────────

interface VoiceAgentProviderProps {
  locale: string;
  children: ReactNode;
}

export default function VoiceAgentProvider({ locale, children }: VoiceAgentProviderProps) {
  const [status, setStatus] = useState<ConnectionStatus>('idle');
  const [errorMessage, setErrorMessage] = useState<string | null>(null);
  const [isMicActive, setIsMicActive] = useState(true);
  const [transcript, setTranscript] = useState<TranscriptEntry[]>([]);
  const [selections, setSelections] = useState<Partial<WizardFormData>>({});
  const [isAnalyzingSite, setIsAnalyzingSite] = useState(false);
  const [userAmplitude, setUserAmplitude] = useState(0);
  const [agentAmplitude, setAgentAmplitude] = useState(0);
  const [completedBrief, setCompletedBrief] = useState<string | null>(null);
  const [completedFormData, setCompletedFormData] = useState<WizardFormData | null>(null);

  const wsRef = useRef<WebSocket | null>(null);
  const mediaStreamRef = useRef<MediaStream | null>(null);
  const audioContextRef = useRef<AudioContext | null>(null);
  const playbackContextRef = useRef<AudioContext | null>(null);
  const nextStartTimeRef = useRef(0);
  const analyserRef = useRef<AnalyserNode | null>(null);
  const animFrameRef = useRef<number>(0);

  const addTranscript = useCallback((role: 'user' | 'agent', text: string) => {
    setTranscript((prev) => [...prev, { role, text, timestamp: Date.now() }]);
  }, []);

  const trackAmplitude = useCallback(() => {
    if (!analyserRef.current) return;
    const data = new Uint8Array(analyserRef.current.fftSize);
    analyserRef.current.getByteTimeDomainData(data);
    let sum = 0;
    for (let i = 0; i < data.length; i++) {
      const v = (data[i] - 128) / 128;
      sum += v * v;
    }
    setUserAmplitude(Math.sqrt(sum / data.length));
    animFrameRef.current = requestAnimationFrame(trackAmplitude);
  }, []);

  const handleToolCall = useCallback(
    async (name: string, args: Record<string, unknown>, callId: string) => {
      if (name === 'update_selections') {
        setSelections((prev) => ({ ...prev, ...(args as Partial<WizardFormData>) }));
        return JSON.stringify({ success: true });
      }

      if (name === 'analyze_website') {
        setIsAnalyzingSite(true);
        try {
          const res = await fetch('/api/analyze-site', {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ url: args.url }),
          });
          const data = await res.json();
          setIsAnalyzingSite(false);
          return JSON.stringify(data);
        } catch {
          setIsAnalyzingSite(false);
          return JSON.stringify({ success: false, summary: "I wasn't able to analyze that site." });
        }
      }

      if (name === 'complete_brief') {
        try {
          const formData = { ...DEFAULT_FORM_DATA, ...(args as Partial<WizardFormData>), locale };
          const res = await fetch('/api/configure', {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify(formData),
          });
          const data = (await res.json()) as { success: boolean; brief?: string };
          if (data.success && data.brief) {
            setCompletedBrief(data.brief);
            setCompletedFormData(formData as WizardFormData);
          }
          return JSON.stringify(data);
        } catch {
          return JSON.stringify({ success: false, error: 'Brief generation failed' });
        }
      }

      return JSON.stringify({ error: `Unknown tool: ${name}` });
    },
    [locale],
  );

  const playAudioChunk = useCallback((base64Audio: string) => {
    if (!playbackContextRef.current) return;
    const ctx = playbackContextRef.current;
    const int16 = base64ToInt16(base64Audio);
    const float32 = int16ToFloat32(int16);
    const buffer = ctx.createBuffer(1, float32.length, 24000);
    buffer.copyToChannel(new Float32Array(float32), 0);
    const source = ctx.createBufferSource();
    source.buffer = buffer;
    source.connect(ctx.destination);
    if (nextStartTimeRef.current < ctx.currentTime) {
      nextStartTimeRef.current = ctx.currentTime;
    }
    source.start(nextStartTimeRef.current);
    nextStartTimeRef.current += buffer.duration;

    const amplitude = Math.sqrt(float32.reduce((sum, v) => sum + v * v, 0) / float32.length);
    setAgentAmplitude(amplitude);
  }, []);

  const startConversation = useCallback(async () => {
    setStatus('connecting');
    setErrorMessage(null);
    setTranscript([]);
    setSelections({});
    setCompletedBrief(null);
    setCompletedFormData(null);

    try {
      const tokenRes = await fetch('/api/gemini-token', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ locale }),
      });
      const tokenData = await tokenRes.json();
      if (!tokenData.success) throw new Error(`Token generation failed: ${tokenData.error ?? tokenRes.status}`);

      const { apiKey, model, config } = tokenData;

      const stream = await navigator.mediaDevices.getUserMedia({
        audio: { channelCount: 1, echoCancellation: true, noiseSuppression: true },
      });
      mediaStreamRef.current = stream;

      // Create AudioContexts during user gesture (required on mobile)
      const audioCtx = new AudioContext({ sampleRate: 16000 });
      audioContextRef.current = audioCtx;

      // Playback context MUST be created here (user gesture) for mobile
      playbackContextRef.current = new AudioContext({ sampleRate: 24000 });
      nextStartTimeRef.current = playbackContextRef.current.currentTime;
      const source = audioCtx.createMediaStreamSource(stream);

      const analyser = audioCtx.createAnalyser();
      analyser.fftSize = 256;
      source.connect(analyser);
      analyserRef.current = analyser;

      // Register AudioWorklet
      const workletBlob = new Blob([WORKLET_CODE], { type: 'application/javascript' });
      const workletUrl = URL.createObjectURL(workletBlob);
      await audioCtx.audioWorklet.addModule(workletUrl);
      URL.revokeObjectURL(workletUrl);

      const workletNode = new AudioWorkletNode(audioCtx, 'audio-recorder-worklet');
      source.connect(workletNode);
      workletNode.connect(audioCtx.destination);

      // Open WebSocket to Gemini Live API
      const wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key=${apiKey}`;
      console.log('[VoiceAgent] Connecting to WebSocket...');
      const ws = new WebSocket(wsUrl);
      wsRef.current = ws;

      // Timeout if setup doesn't complete within 10 seconds
      const setupTimeout = setTimeout(() => {
        if (ws.readyState !== WebSocket.CLOSED) {
          console.error('[VoiceAgent] Setup timed out after 10s');
          ws.close();
          setStatus('error');
          setErrorMessage('Connection timed out. Please try again.');
        }
      }, 10_000);

      ws.onopen = () => {
        console.log('[VoiceAgent] WebSocket opened, sending setup...');
        ws.send(JSON.stringify({
          setup: {
            model: `models/${model}`,
            generationConfig: {
              responseModalities: config.responseModalities,
              speechConfig: config.speechConfig,
            },
            systemInstruction: {
              parts: [{ text: config.systemInstruction }],
            },
            tools: config.tools,
          },
        }));
      };

      // Send audio chunks from worklet
      workletNode.port.onmessage = (event) => {
        if (event.data.event === 'chunk' && ws.readyState === WebSocket.OPEN) {
          const base64 = arrayBufferToBase64(event.data.data.int16arrayBuffer);
          ws.send(JSON.stringify({
            realtimeInput: {
              audio: {
                data: base64,
                mimeType: 'audio/pcm;rate=16000',
              },
            },
          }));
        }
      };

      ws.onmessage = async (event) => {
        let raw: string;
        if (event.data instanceof Blob) {
          raw = await event.data.text();
        } else {
          raw = event.data as string;
        }
        const msg = JSON.parse(raw);
        console.log('[VoiceAgent] Message:', JSON.stringify(msg).slice(0, 200));

        // Setup complete — Gemini sends back a setupComplete message
        if (msg.setupComplete !== undefined) {
          console.log('[VoiceAgent] Setup complete, session active');
          clearTimeout(setupTimeout);
          setStatus('active');
          trackAmplitude();
          // Prompt the agent to introduce itself
          ws.send(JSON.stringify({
            clientContent: {
              turns: [{ role: 'user', parts: [{ text: 'Hello, please introduce yourself.' }] }],
              turnComplete: true,
            },
          }));
          return;
        }

        // Server content (audio + text)
        if (msg.serverContent) {
          const parts = msg.serverContent.modelTurn?.parts;
          if (parts) {
            for (const part of parts) {
              if (part.inlineData) {
                console.log('[VoiceAgent] Audio chunk received, mime:', part.inlineData.mimeType, 'len:', part.inlineData.data?.length);
                playAudioChunk(part.inlineData.data);
              }
              if (part.text) {
                console.log('[VoiceAgent] Text:', part.text);
                addTranscript('agent', part.text);
              }
            }
          }
          // Input transcription
          if (msg.serverContent.inputTranscription?.text) {
            addTranscript('user', msg.serverContent.inputTranscription.text);
          }
          // Output transcription
          if (msg.serverContent.outputTranscription?.text) {
            addTranscript('agent', msg.serverContent.outputTranscription.text);
          }
        }

        // Tool call
        if (msg.toolCall) {
          const calls = msg.toolCall.functionCalls;
          if (calls) {
            const responses = [];
            for (const call of calls) {
              const result = await handleToolCall(call.name, call.args ?? {}, call.id);
              responses.push({ id: call.id, name: call.name, response: { result } });
            }
            ws.send(JSON.stringify({ toolResponse: { functionResponses: responses } }));
          }
        }
      };

      ws.onerror = (e) => {
        console.error('[VoiceAgent] WebSocket error:', e);
        setStatus('error');
        setErrorMessage('Connection error. Please try again.');
      };

      ws.onclose = (e) => {
        console.log('[VoiceAgent] WebSocket closed:', e.code, e.reason);
        if (status === 'active') {
          setStatus('idle');
        }
      };
    } catch (error) {
      console.error('[VoiceAgent] Start failed:', error);
      setStatus('error');
      if (error instanceof DOMException && error.name === 'NotAllowedError') {
        setErrorMessage('Microphone access was denied.');
      } else {
        const msg = error instanceof Error ? error.message : 'Unknown error';
        setErrorMessage(`Failed to start: ${msg}`);
      }
    }
  }, [locale, trackAmplitude, handleToolCall, playAudioChunk, addTranscript, status]);

  const endConversation = useCallback(() => {
    setStatus('ending');
    cancelAnimationFrame(animFrameRef.current);

    if (wsRef.current) {
      wsRef.current.close();
      wsRef.current = null;
    }
    if (mediaStreamRef.current) {
      mediaStreamRef.current.getTracks().forEach((track) => track.stop());
      mediaStreamRef.current = null;
    }
    if (audioContextRef.current) {
      void audioContextRef.current.close();
      audioContextRef.current = null;
    }
    if (playbackContextRef.current) {
      void playbackContextRef.current.close();
      playbackContextRef.current = null;
    }

    setUserAmplitude(0);
    setAgentAmplitude(0);
    setStatus('idle');
  }, []);

  const toggleMic = useCallback(() => {
    if (!mediaStreamRef.current) return;
    const track = mediaStreamRef.current.getAudioTracks()[0];
    if (track) {
      track.enabled = !track.enabled;
      setIsMicActive(track.enabled);
    }
  }, []);

  const value: VoiceAgentContextValue = {
    status,
    errorMessage,
    isMicActive,
    toggleMic,
    transcript,
    selections,
    isAnalyzingSite,
    userAmplitude,
    agentAmplitude,
    startConversation,
    endConversation,
    completedBrief,
    completedFormData,
  };

  return (
    <VoiceAgentContext.Provider value={value}>
      {children}
    </VoiceAgentContext.Provider>
  );
}