feat: add contact card, deferred tool responses, and reconnection logic

- request_contact tool shows on-screen card for name/email verification
- Deferred tool responses let the UI wait for user confirmation
- WebSocket close preserves transcript and enables reconnection
- Reconnect seeds new Gemini session with prior conversation context

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-06 14:43:27 -04:00
parent 28d063e251
commit cdb89553e0

View File

@@ -1,6 +1,6 @@
'use client'; 'use client';
import { createContext, useContext, useState, useRef, useCallback, type ReactNode } from 'react'; import { createContext, useContext, useState, useRef, useCallback, useEffect, type ReactNode } from 'react';
import type { WizardFormData } from './WizardContainer'; import type { WizardFormData } from './WizardContainer';
// ─── Types ─────────────────────────────────────────────────────────────────── // ─── Types ───────────────────────────────────────────────────────────────────
@@ -13,6 +13,11 @@ export interface TranscriptEntry {
type ConnectionStatus = 'idle' | 'connecting' | 'active' | 'ending' | 'error'; type ConnectionStatus = 'idle' | 'connecting' | 'active' | 'ending' | 'error';
export interface PendingContact {
name: string;
email: string;
}
interface VoiceAgentContextValue { interface VoiceAgentContextValue {
status: ConnectionStatus; status: ConnectionStatus;
errorMessage: string | null; errorMessage: string | null;
@@ -28,6 +33,11 @@ interface VoiceAgentContextValue {
endConversation: () => void; endConversation: () => void;
completedBrief: string | null; completedBrief: string | null;
completedFormData: WizardFormData | null; completedFormData: WizardFormData | null;
pendingContact: PendingContact | null;
confirmContact: () => void;
updatePendingContact: (field: 'name' | 'email', value: string) => void;
canReconnect: boolean;
reconnect: () => Promise<void>;
} }
// ─── Context ───────────────────────────────────────────────────────────────── // ─── Context ─────────────────────────────────────────────────────────────────
@@ -133,9 +143,15 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
const [agentAmplitude, setAgentAmplitude] = useState(0); const [agentAmplitude, setAgentAmplitude] = useState(0);
const [completedBrief, setCompletedBrief] = useState<string | null>(null); const [completedBrief, setCompletedBrief] = useState<string | null>(null);
const [completedFormData, setCompletedFormData] = useState<WizardFormData | null>(null); const [completedFormData, setCompletedFormData] = useState<WizardFormData | null>(null);
const [pendingContact, setPendingContact] = useState<PendingContact | null>(null);
const [canReconnect, setCanReconnect] = useState(false);
const turnCompleteRef = useRef(true); const turnCompleteRef = useRef(true);
const briefSubmittedRef = useRef(false); const briefSubmittedRef = useRef(false);
const pendingContactRef = useRef<PendingContact | null>(null);
const pendingContactCallIdRef = useRef('');
const reconnectTranscriptRef = useRef<TranscriptEntry[]>([]);
const statusRef = useRef<ConnectionStatus>('idle');
const wsRef = useRef<WebSocket | null>(null); const wsRef = useRef<WebSocket | null>(null);
const mediaStreamRef = useRef<MediaStream | null>(null); const mediaStreamRef = useRef<MediaStream | null>(null);
const audioContextRef = useRef<AudioContext | null>(null); const audioContextRef = useRef<AudioContext | null>(null);
@@ -144,6 +160,9 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
const analyserRef = useRef<AnalyserNode | null>(null); const analyserRef = useRef<AnalyserNode | null>(null);
const animFrameRef = useRef<number>(0); const animFrameRef = useRef<number>(0);
// Keep statusRef in sync for use in closures
useEffect(() => { statusRef.current = status; }, [status]);
const addTranscript = useCallback((role: 'user' | 'agent', text: string) => { const addTranscript = useCallback((role: 'user' | 'agent', text: string) => {
setTranscript((prev) => { setTranscript((prev) => {
const last = prev[prev.length - 1]; const last = prev[prev.length - 1];
@@ -193,6 +212,16 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
} }
} }
if (name === 'request_contact') {
const { name: contactName, email: contactEmail } = args as { name: string; email: string };
const contact = { name: contactName, email: contactEmail };
setPendingContact(contact);
pendingContactRef.current = contact;
pendingContactCallIdRef.current = callId;
// Don't return a tool response yet — wait for user confirmation via confirmContact()
return '__DEFERRED__';
}
if (name === 'complete_brief') { if (name === 'complete_brief') {
// Prevent duplicate submissions // Prevent duplicate submissions
if (briefSubmittedRef.current) return JSON.stringify({ success: true, message: 'Brief already submitted' }); if (briefSubmittedRef.current) return JSON.stringify({ success: true, message: 'Brief already submitted' });
@@ -204,7 +233,10 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
const summary = toolArgs.conversationSummary ?? ''; const summary = toolArgs.conversationSummary ?? '';
const existingScope = toolArgs.scope ?? ''; const existingScope = toolArgs.scope ?? '';
const combinedScope = [existingScope, summary].filter(Boolean).join('\n\n'); const combinedScope = [existingScope, summary].filter(Boolean).join('\n\n');
const formData = { ...DEFAULT_FORM_DATA, ...toolArgs, scope: combinedScope, locale }; // Use confirmed contact details from the on-screen card if available
const contactName = pendingContactRef.current?.name ?? toolArgs.name ?? '';
const contactEmail = pendingContactRef.current?.email ?? toolArgs.email ?? '';
const formData = { ...DEFAULT_FORM_DATA, ...toolArgs, name: contactName, email: contactEmail, scope: combinedScope, locale };
delete (formData as Record<string, unknown>).conversationSummary; delete (formData as Record<string, unknown>).conversationSummary;
const res = await fetch('/api/configure', { const res = await fetch('/api/configure', {
method: 'POST', method: 'POST',
@@ -254,8 +286,15 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
const startConversation = useCallback(async () => { const startConversation = useCallback(async () => {
setStatus('connecting'); setStatus('connecting');
setErrorMessage(null); setErrorMessage(null);
setTranscript([]); setCanReconnect(false);
setSelections({}); // Only reset transcript/selections on fresh start (not reconnect)
if (reconnectTranscriptRef.current.length === 0) {
setTranscript([]);
setSelections({});
setPendingContact(null);
pendingContactRef.current = null;
pendingContactCallIdRef.current = '';
}
setCompletedBrief(null); setCompletedBrief(null);
setCompletedFormData(null); setCompletedFormData(null);
briefSubmittedRef.current = false; briefSubmittedRef.current = false;
@@ -364,12 +403,27 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
clearTimeout(setupTimeout); clearTimeout(setupTimeout);
setStatus('active'); setStatus('active');
trackAmplitude(); trackAmplitude();
// Prompt the agent to introduce itself
ws.send(JSON.stringify({ // If reconnecting, seed with prior conversation context
realtimeInput: { const priorTranscript = reconnectTranscriptRef.current;
text: 'Hello, please introduce yourself.', if (priorTranscript.length > 0) {
}, const summary = priorTranscript
})); .map((e) => `${e.role === 'user' ? 'User' : 'Agent'}: ${e.text}`)
.join('\n');
ws.send(JSON.stringify({
realtimeInput: {
text: `We were having a conversation but got disconnected. Here is what was discussed so far:\n\n${summary}\n\nPlease acknowledge the reconnection briefly and continue where we left off.`,
},
}));
reconnectTranscriptRef.current = [];
} else {
// Prompt the agent to introduce itself
ws.send(JSON.stringify({
realtimeInput: {
text: 'Hello, please introduce yourself.',
},
}));
}
return; return;
} }
@@ -409,9 +463,13 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
const responses = []; const responses = [];
for (const call of calls) { for (const call of calls) {
const result = await handleToolCall(call.name, call.args ?? {}, call.id); const result = await handleToolCall(call.name, call.args ?? {}, call.id);
responses.push({ id: call.id, name: call.name, response: { result } }); if (result !== '__DEFERRED__') {
responses.push({ id: call.id, name: call.name, response: { result } });
}
}
if (responses.length > 0) {
ws.send(JSON.stringify({ toolResponse: { functionResponses: responses } }));
} }
ws.send(JSON.stringify({ toolResponse: { functionResponses: responses } }));
} }
} }
}; };
@@ -424,8 +482,28 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
ws.onclose = (e) => { ws.onclose = (e) => {
console.log('[VoiceAgent] WebSocket closed:', e.code, e.reason); console.log('[VoiceAgent] WebSocket closed:', e.code, e.reason);
if (status === 'active') { // Clean up audio but preserve transcript and selections
setStatus('idle'); cancelAnimationFrame(animFrameRef.current);
if (mediaStreamRef.current) {
mediaStreamRef.current.getTracks().forEach((track) => track.stop());
mediaStreamRef.current = null;
}
if (audioContextRef.current) {
void audioContextRef.current.close();
audioContextRef.current = null;
}
if (playbackContextRef.current) {
void playbackContextRef.current.close();
playbackContextRef.current = null;
}
wsRef.current = null;
setUserAmplitude(0);
setAgentAmplitude(0);
// If we weren't intentionally ending, allow reconnect
if (statusRef.current !== 'ending' && !briefSubmittedRef.current) {
setStatus('error');
setErrorMessage(null);
setCanReconnect(true);
} }
}; };
} catch (error) { } catch (error) {
@@ -438,7 +516,7 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
setErrorMessage(`Failed to start: ${msg}`); setErrorMessage(`Failed to start: ${msg}`);
} }
} }
}, [locale, trackAmplitude, handleToolCall, playAudioChunk, addTranscript, status]); }, [locale, trackAmplitude, handleToolCall, playAudioChunk, addTranscript]);
const endConversation = useCallback(() => { const endConversation = useCallback(() => {
setStatus('ending'); setStatus('ending');
@@ -463,9 +541,46 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
setUserAmplitude(0); setUserAmplitude(0);
setAgentAmplitude(0); setAgentAmplitude(0);
setCanReconnect(false);
reconnectTranscriptRef.current = [];
pendingContactCallIdRef.current = '';
setStatus('idle'); setStatus('idle');
}, []); }, []);
const updatePendingContact = useCallback((field: 'name' | 'email', value: string) => {
setPendingContact((prev) => {
if (!prev) return null;
const updated = { ...prev, [field]: value };
pendingContactRef.current = updated;
return updated;
});
}, []);
const confirmContact = useCallback(() => {
if (!pendingContactRef.current) return;
// Send confirmation back through WebSocket so the agent knows
if (wsRef.current?.readyState === WebSocket.OPEN) {
wsRef.current.send(JSON.stringify({
toolResponse: {
functionResponses: [{
id: pendingContactCallIdRef.current,
name: 'request_contact',
response: { result: JSON.stringify({ confirmed: true, name: pendingContactRef.current.name, email: pendingContactRef.current.email }) },
}],
},
}));
}
pendingContactCallIdRef.current = '';
}, []);
const reconnect = useCallback(async () => {
setCanReconnect(false);
setErrorMessage(null);
// Preserve transcript for the new session to pick up context
reconnectTranscriptRef.current = transcript;
await startConversation();
}, [startConversation, transcript]);
const toggleMic = useCallback(() => { const toggleMic = useCallback(() => {
if (!mediaStreamRef.current) return; if (!mediaStreamRef.current) return;
const track = mediaStreamRef.current.getAudioTracks()[0]; const track = mediaStreamRef.current.getAudioTracks()[0];
@@ -490,6 +605,11 @@ export default function VoiceAgentProvider({ locale, children }: VoiceAgentProvi
endConversation, endConversation,
completedBrief, completedBrief,
completedFormData, completedFormData,
pendingContact,
confirmContact,
updatePendingContact,
canReconnect,
reconnect,
}; };
return ( return (