All checks were successful
Build & Push / build-and-push (push) Successful in 1m40s
- Playback AudioContext created during startConversation (button click) - Removed sampleRate constraint from getUserMedia (let browser choose) - Added audio chunk logging for debugging Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
470 lines
16 KiB
TypeScript
470 lines
16 KiB
TypeScript
'use client';
|
|
|
|
import { createContext, useContext, useState, useRef, useCallback, type ReactNode } from 'react';
|
|
import type { WizardFormData } from './WizardContainer';
|
|
|
|
// ─── Types ───────────────────────────────────────────────────────────────────
|
|
|
|
export interface TranscriptEntry {
|
|
role: 'user' | 'agent';
|
|
text: string;
|
|
timestamp: number;
|
|
}
|
|
|
|
type ConnectionStatus = 'idle' | 'connecting' | 'active' | 'ending' | 'error';
|
|
|
|
interface VoiceAgentContextValue {
|
|
status: ConnectionStatus;
|
|
errorMessage: string | null;
|
|
isMicActive: boolean;
|
|
toggleMic: () => void;
|
|
transcript: TranscriptEntry[];
|
|
selections: Partial<WizardFormData>;
|
|
isAnalyzingSite: boolean;
|
|
userAmplitude: number;
|
|
agentAmplitude: number;
|
|
startConversation: () => Promise<void>;
|
|
endConversation: () => void;
|
|
completedBrief: string | null;
|
|
completedFormData: WizardFormData | null;
|
|
}
|
|
|
|
// ─── Context ─────────────────────────────────────────────────────────────────
|
|
|
|
const VoiceAgentContext = createContext<VoiceAgentContextValue | null>(null);
|
|
|
|
export function useVoiceAgent() {
|
|
const ctx = useContext(VoiceAgentContext);
|
|
if (!ctx) throw new Error('useVoiceAgent must be used within VoiceAgentProvider');
|
|
return ctx;
|
|
}
|
|
|
|
// ─── Audio Helpers ───────────────────────────────────────────────────────────
|
|
|
|
function int16ToFloat32(int16: Int16Array): Float32Array {
|
|
const float32 = new Float32Array(int16.length);
|
|
for (let i = 0; i < int16.length; i++) {
|
|
float32[i] = int16[i] / 32768;
|
|
}
|
|
return float32;
|
|
}
|
|
|
|
function base64ToInt16(base64: string): Int16Array {
|
|
const binary = atob(base64);
|
|
const bytes = new Uint8Array(binary.length);
|
|
for (let i = 0; i < binary.length; i++) {
|
|
bytes[i] = binary.charCodeAt(i);
|
|
}
|
|
return new Int16Array(bytes.buffer);
|
|
}
|
|
|
|
function arrayBufferToBase64(buffer: ArrayBuffer): string {
|
|
const bytes = new Uint8Array(buffer);
|
|
let binary = '';
|
|
for (let i = 0; i < bytes.length; i++) {
|
|
binary += String.fromCharCode(bytes[i]);
|
|
}
|
|
return btoa(binary);
|
|
}
|
|
|
|
// ─── Audio Worklet Processor Code ────────────────────────────────────────────
|
|
|
|
const WORKLET_CODE = `
|
|
class AudioRecordingWorklet extends AudioWorkletProcessor {
|
|
buffer = new Int16Array(2048);
|
|
bufferWriteIndex = 0;
|
|
|
|
process(inputs) {
|
|
if (inputs[0].length) {
|
|
const channel0 = inputs[0][0];
|
|
for (let i = 0; i < channel0.length; i++) {
|
|
const sample = Math.max(-1, Math.min(1, channel0[i]));
|
|
this.buffer[this.bufferWriteIndex++] = sample * 32767;
|
|
if (this.bufferWriteIndex >= this.buffer.length) {
|
|
this.port.postMessage({
|
|
event: 'chunk',
|
|
data: { int16arrayBuffer: this.buffer.slice(0, this.bufferWriteIndex).buffer },
|
|
});
|
|
this.bufferWriteIndex = 0;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
registerProcessor('audio-recorder-worklet', AudioRecordingWorklet);
|
|
`;
|
|
|
|
// ─── Default Form Data (mirror WizardContainer) ─────────────────────────────
|
|
|
|
const DEFAULT_FORM_DATA: WizardFormData = {
|
|
services: [],
|
|
aiEnabled: false,
|
|
aiTypes: [],
|
|
industry: null,
|
|
scope: '',
|
|
timeline: null,
|
|
name: '',
|
|
company: '',
|
|
email: '',
|
|
phone: '',
|
|
contactPreference: 'email',
|
|
currentSiteUrl: '',
|
|
currentSiteThoughts: '',
|
|
};
|
|
|
|
// ─── Provider Component ──────────────────────────────────────────────────────
|
|
|
|
interface VoiceAgentProviderProps {
|
|
locale: string;
|
|
children: ReactNode;
|
|
}
|
|
|
|
export default function VoiceAgentProvider({ locale, children }: VoiceAgentProviderProps) {
|
|
const [status, setStatus] = useState<ConnectionStatus>('idle');
|
|
const [errorMessage, setErrorMessage] = useState<string | null>(null);
|
|
const [isMicActive, setIsMicActive] = useState(true);
|
|
const [transcript, setTranscript] = useState<TranscriptEntry[]>([]);
|
|
const [selections, setSelections] = useState<Partial<WizardFormData>>({});
|
|
const [isAnalyzingSite, setIsAnalyzingSite] = useState(false);
|
|
const [userAmplitude, setUserAmplitude] = useState(0);
|
|
const [agentAmplitude, setAgentAmplitude] = useState(0);
|
|
const [completedBrief, setCompletedBrief] = useState<string | null>(null);
|
|
const [completedFormData, setCompletedFormData] = useState<WizardFormData | null>(null);
|
|
|
|
const wsRef = useRef<WebSocket | null>(null);
|
|
const mediaStreamRef = useRef<MediaStream | null>(null);
|
|
const audioContextRef = useRef<AudioContext | null>(null);
|
|
const playbackContextRef = useRef<AudioContext | null>(null);
|
|
const nextStartTimeRef = useRef(0);
|
|
const analyserRef = useRef<AnalyserNode | null>(null);
|
|
const animFrameRef = useRef<number>(0);
|
|
|
|
const addTranscript = useCallback((role: 'user' | 'agent', text: string) => {
|
|
setTranscript((prev) => [...prev, { role, text, timestamp: Date.now() }]);
|
|
}, []);
|
|
|
|
const trackAmplitude = useCallback(() => {
|
|
if (!analyserRef.current) return;
|
|
const data = new Uint8Array(analyserRef.current.fftSize);
|
|
analyserRef.current.getByteTimeDomainData(data);
|
|
let sum = 0;
|
|
for (let i = 0; i < data.length; i++) {
|
|
const v = (data[i] - 128) / 128;
|
|
sum += v * v;
|
|
}
|
|
setUserAmplitude(Math.sqrt(sum / data.length));
|
|
animFrameRef.current = requestAnimationFrame(trackAmplitude);
|
|
}, []);
|
|
|
|
const handleToolCall = useCallback(
|
|
async (name: string, args: Record<string, unknown>, callId: string) => {
|
|
if (name === 'update_selections') {
|
|
setSelections((prev) => ({ ...prev, ...(args as Partial<WizardFormData>) }));
|
|
return JSON.stringify({ success: true });
|
|
}
|
|
|
|
if (name === 'analyze_website') {
|
|
setIsAnalyzingSite(true);
|
|
try {
|
|
const res = await fetch('/api/analyze-site', {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ url: args.url }),
|
|
});
|
|
const data = await res.json();
|
|
setIsAnalyzingSite(false);
|
|
return JSON.stringify(data);
|
|
} catch {
|
|
setIsAnalyzingSite(false);
|
|
return JSON.stringify({ success: false, summary: "I wasn't able to analyze that site." });
|
|
}
|
|
}
|
|
|
|
if (name === 'complete_brief') {
|
|
try {
|
|
const formData = { ...DEFAULT_FORM_DATA, ...(args as Partial<WizardFormData>), locale };
|
|
const res = await fetch('/api/configure', {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify(formData),
|
|
});
|
|
const data = (await res.json()) as { success: boolean; brief?: string };
|
|
if (data.success && data.brief) {
|
|
setCompletedBrief(data.brief);
|
|
setCompletedFormData(formData as WizardFormData);
|
|
}
|
|
return JSON.stringify(data);
|
|
} catch {
|
|
return JSON.stringify({ success: false, error: 'Brief generation failed' });
|
|
}
|
|
}
|
|
|
|
return JSON.stringify({ error: `Unknown tool: ${name}` });
|
|
},
|
|
[locale],
|
|
);
|
|
|
|
const playAudioChunk = useCallback((base64Audio: string) => {
|
|
if (!playbackContextRef.current) return;
|
|
const ctx = playbackContextRef.current;
|
|
const int16 = base64ToInt16(base64Audio);
|
|
const float32 = int16ToFloat32(int16);
|
|
const buffer = ctx.createBuffer(1, float32.length, 24000);
|
|
buffer.copyToChannel(new Float32Array(float32), 0);
|
|
const source = ctx.createBufferSource();
|
|
source.buffer = buffer;
|
|
source.connect(ctx.destination);
|
|
if (nextStartTimeRef.current < ctx.currentTime) {
|
|
nextStartTimeRef.current = ctx.currentTime;
|
|
}
|
|
source.start(nextStartTimeRef.current);
|
|
nextStartTimeRef.current += buffer.duration;
|
|
|
|
const amplitude = Math.sqrt(float32.reduce((sum, v) => sum + v * v, 0) / float32.length);
|
|
setAgentAmplitude(amplitude);
|
|
}, []);
|
|
|
|
const startConversation = useCallback(async () => {
|
|
setStatus('connecting');
|
|
setErrorMessage(null);
|
|
setTranscript([]);
|
|
setSelections({});
|
|
setCompletedBrief(null);
|
|
setCompletedFormData(null);
|
|
|
|
try {
|
|
const tokenRes = await fetch('/api/gemini-token', {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ locale }),
|
|
});
|
|
const tokenData = await tokenRes.json();
|
|
if (!tokenData.success) throw new Error(`Token generation failed: ${tokenData.error ?? tokenRes.status}`);
|
|
|
|
const { apiKey, model, config } = tokenData;
|
|
|
|
const stream = await navigator.mediaDevices.getUserMedia({
|
|
audio: { channelCount: 1, echoCancellation: true, noiseSuppression: true },
|
|
});
|
|
mediaStreamRef.current = stream;
|
|
|
|
// Create AudioContexts during user gesture (required on mobile)
|
|
const audioCtx = new AudioContext({ sampleRate: 16000 });
|
|
audioContextRef.current = audioCtx;
|
|
|
|
// Playback context MUST be created here (user gesture) for mobile
|
|
playbackContextRef.current = new AudioContext({ sampleRate: 24000 });
|
|
nextStartTimeRef.current = playbackContextRef.current.currentTime;
|
|
const source = audioCtx.createMediaStreamSource(stream);
|
|
|
|
const analyser = audioCtx.createAnalyser();
|
|
analyser.fftSize = 256;
|
|
source.connect(analyser);
|
|
analyserRef.current = analyser;
|
|
|
|
// Register AudioWorklet
|
|
const workletBlob = new Blob([WORKLET_CODE], { type: 'application/javascript' });
|
|
const workletUrl = URL.createObjectURL(workletBlob);
|
|
await audioCtx.audioWorklet.addModule(workletUrl);
|
|
URL.revokeObjectURL(workletUrl);
|
|
|
|
const workletNode = new AudioWorkletNode(audioCtx, 'audio-recorder-worklet');
|
|
source.connect(workletNode);
|
|
workletNode.connect(audioCtx.destination);
|
|
|
|
// Open WebSocket to Gemini Live API
|
|
const wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key=${apiKey}`;
|
|
console.log('[VoiceAgent] Connecting to WebSocket...');
|
|
const ws = new WebSocket(wsUrl);
|
|
wsRef.current = ws;
|
|
|
|
// Timeout if setup doesn't complete within 10 seconds
|
|
const setupTimeout = setTimeout(() => {
|
|
if (ws.readyState !== WebSocket.CLOSED) {
|
|
console.error('[VoiceAgent] Setup timed out after 10s');
|
|
ws.close();
|
|
setStatus('error');
|
|
setErrorMessage('Connection timed out. Please try again.');
|
|
}
|
|
}, 10_000);
|
|
|
|
ws.onopen = () => {
|
|
console.log('[VoiceAgent] WebSocket opened, sending setup...');
|
|
ws.send(JSON.stringify({
|
|
setup: {
|
|
model: `models/${model}`,
|
|
generationConfig: {
|
|
responseModalities: config.responseModalities,
|
|
speechConfig: config.speechConfig,
|
|
},
|
|
systemInstruction: {
|
|
parts: [{ text: config.systemInstruction }],
|
|
},
|
|
tools: config.tools,
|
|
},
|
|
}));
|
|
};
|
|
|
|
// Send audio chunks from worklet
|
|
workletNode.port.onmessage = (event) => {
|
|
if (event.data.event === 'chunk' && ws.readyState === WebSocket.OPEN) {
|
|
const base64 = arrayBufferToBase64(event.data.data.int16arrayBuffer);
|
|
ws.send(JSON.stringify({
|
|
realtimeInput: {
|
|
audio: {
|
|
data: base64,
|
|
mimeType: 'audio/pcm;rate=16000',
|
|
},
|
|
},
|
|
}));
|
|
}
|
|
};
|
|
|
|
ws.onmessage = async (event) => {
|
|
let raw: string;
|
|
if (event.data instanceof Blob) {
|
|
raw = await event.data.text();
|
|
} else {
|
|
raw = event.data as string;
|
|
}
|
|
const msg = JSON.parse(raw);
|
|
console.log('[VoiceAgent] Message:', JSON.stringify(msg).slice(0, 200));
|
|
|
|
// Setup complete — Gemini sends back a setupComplete message
|
|
if (msg.setupComplete !== undefined) {
|
|
console.log('[VoiceAgent] Setup complete, session active');
|
|
clearTimeout(setupTimeout);
|
|
setStatus('active');
|
|
trackAmplitude();
|
|
// Prompt the agent to introduce itself
|
|
ws.send(JSON.stringify({
|
|
clientContent: {
|
|
turns: [{ role: 'user', parts: [{ text: 'Hello, please introduce yourself.' }] }],
|
|
turnComplete: true,
|
|
},
|
|
}));
|
|
return;
|
|
}
|
|
|
|
// Server content (audio + text)
|
|
if (msg.serverContent) {
|
|
const parts = msg.serverContent.modelTurn?.parts;
|
|
if (parts) {
|
|
for (const part of parts) {
|
|
if (part.inlineData) {
|
|
console.log('[VoiceAgent] Audio chunk received, mime:', part.inlineData.mimeType, 'len:', part.inlineData.data?.length);
|
|
playAudioChunk(part.inlineData.data);
|
|
}
|
|
if (part.text) {
|
|
console.log('[VoiceAgent] Text:', part.text);
|
|
addTranscript('agent', part.text);
|
|
}
|
|
}
|
|
}
|
|
// Input transcription
|
|
if (msg.serverContent.inputTranscription?.text) {
|
|
addTranscript('user', msg.serverContent.inputTranscription.text);
|
|
}
|
|
// Output transcription
|
|
if (msg.serverContent.outputTranscription?.text) {
|
|
addTranscript('agent', msg.serverContent.outputTranscription.text);
|
|
}
|
|
}
|
|
|
|
// Tool call
|
|
if (msg.toolCall) {
|
|
const calls = msg.toolCall.functionCalls;
|
|
if (calls) {
|
|
const responses = [];
|
|
for (const call of calls) {
|
|
const result = await handleToolCall(call.name, call.args ?? {}, call.id);
|
|
responses.push({ id: call.id, name: call.name, response: { result } });
|
|
}
|
|
ws.send(JSON.stringify({ toolResponse: { functionResponses: responses } }));
|
|
}
|
|
}
|
|
};
|
|
|
|
ws.onerror = (e) => {
|
|
console.error('[VoiceAgent] WebSocket error:', e);
|
|
setStatus('error');
|
|
setErrorMessage('Connection error. Please try again.');
|
|
};
|
|
|
|
ws.onclose = (e) => {
|
|
console.log('[VoiceAgent] WebSocket closed:', e.code, e.reason);
|
|
if (status === 'active') {
|
|
setStatus('idle');
|
|
}
|
|
};
|
|
} catch (error) {
|
|
console.error('[VoiceAgent] Start failed:', error);
|
|
setStatus('error');
|
|
if (error instanceof DOMException && error.name === 'NotAllowedError') {
|
|
setErrorMessage('Microphone access was denied.');
|
|
} else {
|
|
const msg = error instanceof Error ? error.message : 'Unknown error';
|
|
setErrorMessage(`Failed to start: ${msg}`);
|
|
}
|
|
}
|
|
}, [locale, trackAmplitude, handleToolCall, playAudioChunk, addTranscript, status]);
|
|
|
|
const endConversation = useCallback(() => {
|
|
setStatus('ending');
|
|
cancelAnimationFrame(animFrameRef.current);
|
|
|
|
if (wsRef.current) {
|
|
wsRef.current.close();
|
|
wsRef.current = null;
|
|
}
|
|
if (mediaStreamRef.current) {
|
|
mediaStreamRef.current.getTracks().forEach((track) => track.stop());
|
|
mediaStreamRef.current = null;
|
|
}
|
|
if (audioContextRef.current) {
|
|
void audioContextRef.current.close();
|
|
audioContextRef.current = null;
|
|
}
|
|
if (playbackContextRef.current) {
|
|
void playbackContextRef.current.close();
|
|
playbackContextRef.current = null;
|
|
}
|
|
|
|
setUserAmplitude(0);
|
|
setAgentAmplitude(0);
|
|
setStatus('idle');
|
|
}, []);
|
|
|
|
const toggleMic = useCallback(() => {
|
|
if (!mediaStreamRef.current) return;
|
|
const track = mediaStreamRef.current.getAudioTracks()[0];
|
|
if (track) {
|
|
track.enabled = !track.enabled;
|
|
setIsMicActive(track.enabled);
|
|
}
|
|
}, []);
|
|
|
|
const value: VoiceAgentContextValue = {
|
|
status,
|
|
errorMessage,
|
|
isMicActive,
|
|
toggleMic,
|
|
transcript,
|
|
selections,
|
|
isAnalyzingSite,
|
|
userAmplitude,
|
|
agentAmplitude,
|
|
startConversation,
|
|
endConversation,
|
|
completedBrief,
|
|
completedFormData,
|
|
};
|
|
|
|
return (
|
|
<VoiceAgentContext.Provider value={value}>
|
|
{children}
|
|
</VoiceAgentContext.Provider>
|
|
);
|
|
}
|