import { requireSalesOrAdmin } from '~/server/utils/auth'; import { getNocoDbConfiguration } from '~/server/utils/nocodb'; import { logAuditEvent } from '~/server/utils/audit-logger'; export default defineEventHandler(async (event) => { console.log('[INTERESTS] Find duplicates request'); try { // Require sales or admin access for duplicate detection await requireSalesOrAdmin(event); const query = getQuery(event); const threshold = query.threshold ? parseFloat(query.threshold as string) : 0.8; const dateRange = query.dateRange ? parseInt(query.dateRange as string) : 365; // Default 1 year // Get all interests from NocoDB const config = getNocoDbConfiguration(); const interestTableId = "mbs9hjauug4eseo"; // Interest table ID let url = `${config.url}/api/v2/tables/${interestTableId}/records`; // Add date filtering if specified (include records without Created At) if (dateRange && dateRange > 0) { const cutoffDate = new Date(); cutoffDate.setDate(cutoffDate.getDate() - dateRange); // Include records without Created At OR within date range const dateFilter = `((Created At,gte,${cutoffDate.toISOString()}),or,(Created At,is,null))`; url += `?where=${encodeURIComponent(dateFilter)}`; } const response = await $fetch(url, { headers: { 'xc-token': config.token }, params: { limit: 5000 // Get a large batch for duplicate detection } }) as any; const interests = response.list || []; console.log('[INTERESTS] Analyzing', interests.length, 'interests for duplicates'); // Find potential duplicates const duplicateGroups = findDuplicateInterests(interests, threshold); console.log('[INTERESTS] Found', duplicateGroups.length, 'duplicate groups'); // Log the audit event await logAuditEvent(event, 'FIND_INTEREST_DUPLICATES', 'interest', { changes: { totalInterests: interests.length, duplicateGroups: duplicateGroups.length, threshold, dateRange } }); return { success: true, data: { duplicateGroups, totalInterests: interests.length, duplicateCount: duplicateGroups.reduce((sum, group) => sum + group.interests.length, 0), threshold, dateRange } }; } catch (error: any) { console.error('[INTERESTS] Failed to find duplicates:', error); if (error.statusCode === 403) { return { success: false, error: 'Insufficient permissions. Sales or admin access required.' }; } return { success: false, error: 'Failed to find duplicates' }; } }); /** * Find duplicate interests based on multiple criteria */ function findDuplicateInterests(interests: any[], threshold: number = 0.8) { console.log('[INTERESTS] Starting duplicate detection with threshold:', threshold); console.log('[INTERESTS] Total interests to analyze:', interests.length); const duplicateGroups: Array<{ id: string; interests: any[]; matchReason: string; confidence: number; masterCandidate: any; }> = []; const processedIds = new Set(); let comparisons = 0; for (let i = 0; i < interests.length; i++) { const interest1 = interests[i]; if (processedIds.has(interest1.Id)) continue; const matches = [interest1]; for (let j = i + 1; j < interests.length; j++) { const interest2 = interests[j]; if (processedIds.has(interest2.Id)) continue; const similarity = calculateSimilarity(interest1, interest2); comparisons++; console.log(`[INTERESTS] Comparing ${interest1.Id} vs ${interest2.Id}: score=${similarity.score.toFixed(3)}, threshold=${threshold}`); if (similarity.score >= threshold) { console.log(`[INTERESTS] MATCH FOUND! ${interest1.Id} vs ${interest2.Id} (score: ${similarity.score.toFixed(3)})`); console.log('[INTERESTS] Match details:', similarity.details); matches.push(interest2); processedIds.add(interest2.Id); } } if (matches.length > 1) { console.log(`[INTERESTS] Creating duplicate group with ${matches.length} matches`); // Mark all as processed matches.forEach(match => processedIds.add(match.Id)); // Determine the best master candidate (most complete record) const masterCandidate = selectMasterCandidate(matches); // Calculate average confidence const avgConfidence = matches.slice(1).reduce((sum, match) => { return sum + calculateSimilarity(masterCandidate, match).score; }, 0) / (matches.length - 1); duplicateGroups.push({ id: `group_${duplicateGroups.length + 1}`, interests: matches, matchReason: generateMatchReason(matches), confidence: avgConfidence, masterCandidate }); } } console.log(`[INTERESTS] Completed ${comparisons} comparisons, found ${duplicateGroups.length} duplicate groups`); return duplicateGroups; } /** * Calculate similarity between two interests */ function calculateSimilarity(interest1: any, interest2: any) { const scores: Array<{ type: string; score: number; weight: number }> = []; console.log(`[INTERESTS] Calculating similarity between:`, { id1: interest1.Id, name1: interest1['Full Name'], email1: interest1['Email Address'], phone1: interest1['Phone Number'], id2: interest2.Id, name2: interest2['Full Name'], email2: interest2['Email Address'], phone2: interest2['Phone Number'] }); // Email similarity (highest weight) - exact match required if (interest1['Email Address'] && interest2['Email Address']) { const email1 = normalizeEmail(interest1['Email Address']); const email2 = normalizeEmail(interest2['Email Address']); const emailScore = email1 === email2 ? 1.0 : 0.0; scores.push({ type: 'email', score: emailScore, weight: 0.5 }); console.log(`[INTERESTS] Email comparison: "${email1}" vs "${email2}" = ${emailScore}`); } // Phone similarity - exact match on normalized numbers if (interest1['Phone Number'] && interest2['Phone Number']) { const phone1 = normalizePhone(interest1['Phone Number']); const phone2 = normalizePhone(interest2['Phone Number']); const phoneScore = phone1 === phone2 && phone1.length >= 8 ? 1.0 : 0.0; // Require at least 8 digits scores.push({ type: 'phone', score: phoneScore, weight: 0.4 }); console.log(`[INTERESTS] Phone comparison: "${phone1}" vs "${phone2}" = ${phoneScore}`); } // Name similarity - fuzzy matching if (interest1['Full Name'] && interest2['Full Name']) { const nameScore = calculateNameSimilarity(interest1['Full Name'], interest2['Full Name']); scores.push({ type: 'name', score: nameScore, weight: 0.3 }); console.log(`[INTERESTS] Name comparison: "${interest1['Full Name']}" vs "${interest2['Full Name']}" = ${nameScore.toFixed(3)}`); } // Address similarity if (interest1.Address && interest2.Address) { const addressScore = calculateStringSimilarity(interest1.Address, interest2.Address); scores.push({ type: 'address', score: addressScore, weight: 0.2 }); console.log(`[INTERESTS] Address comparison: ${addressScore.toFixed(3)}`); } // Special case: if we have exact email OR phone match, give high score regardless of other fields const hasExactEmailMatch = scores.find(s => s.type === 'email' && s.score === 1.0); const hasExactPhoneMatch = scores.find(s => s.type === 'phone' && s.score === 1.0); if (hasExactEmailMatch || hasExactPhoneMatch) { console.log('[INTERESTS] Exact email or phone match found - high confidence'); return { score: 0.95, // High confidence for exact email/phone match details: scores }; } // Calculate weighted average for other cases const totalWeight = scores.reduce((sum, s) => sum + s.weight, 0); const weightedScore = scores.reduce((sum, s) => sum + (s.score * s.weight), 0) / (totalWeight || 1); console.log(`[INTERESTS] Weighted score: ${weightedScore.toFixed(3)} (weights: ${totalWeight})`); return { score: weightedScore, details: scores }; } /** * Normalize email for comparison */ function normalizeEmail(email: string): string { return email.toLowerCase().trim(); } /** * Normalize phone number for comparison */ function normalizePhone(phone: string): string { return phone.replace(/\D/g, ''); // Remove all non-digits } /** * Calculate name similarity using Levenshtein distance */ function calculateNameSimilarity(name1: string, name2: string): number { const str1 = name1.toLowerCase().trim(); const str2 = name2.toLowerCase().trim(); if (str1 === str2) return 1.0; const distance = levenshteinDistance(str1, str2); const maxLength = Math.max(str1.length, str2.length); return maxLength > 0 ? 1 - (distance / maxLength) : 0; } /** * Calculate string similarity using Levenshtein distance */ function calculateStringSimilarity(str1: string, str2: string): number { const s1 = str1.toLowerCase().trim(); const s2 = str2.toLowerCase().trim(); if (s1 === s2) return 1.0; const distance = levenshteinDistance(s1, s2); const maxLength = Math.max(s1.length, s2.length); return maxLength > 0 ? 1 - (distance / maxLength) : 0; } /** * Calculate Levenshtein distance between two strings */ function levenshteinDistance(str1: string, str2: string): number { const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null)); for (let i = 0; i <= str1.length; i += 1) { matrix[0][i] = i; } for (let j = 0; j <= str2.length; j += 1) { matrix[j][0] = j; } for (let j = 1; j <= str2.length; j += 1) { for (let i = 1; i <= str1.length; i += 1) { const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1; matrix[j][i] = Math.min( matrix[j][i - 1] + 1, // deletion matrix[j - 1][i] + 1, // insertion matrix[j - 1][i - 1] + indicator // substitution ); } } return matrix[str2.length][str1.length]; } /** * Select the best master candidate from a group of duplicates */ function selectMasterCandidate(interests: any[]) { return interests.reduce((best, current) => { const bestScore = calculateCompletenessScore(best); const currentScore = calculateCompletenessScore(current); return currentScore > bestScore ? current : best; }); } /** * Calculate completeness score for an interest record */ function calculateCompletenessScore(interest: any): number { const fields = ['Full Name', 'Email Address', 'Phone Number', 'Address', 'Extra Comments', 'Berth Size Desired']; const filledFields = fields.filter(field => interest[field] && interest[field].toString().trim().length > 0 ); let score = filledFields.length / fields.length; // Bonus for recent creation if (interest['Created At']) { const created = new Date(interest['Created At']); const now = new Date(); const daysOld = (now.getTime() - created.getTime()) / (1000 * 60 * 60 * 24); // More recent records get a small bonus if (daysOld < 30) score += 0.1; else if (daysOld < 90) score += 0.05; } return score; } /** * Generate a descriptive match reason */ function generateMatchReason(interests: any[]): string { const reasons = []; // Check for exact email matches const emails = interests.map(i => i['Email Address']).filter(Boolean); if (emails.length > 1 && new Set(emails.map(e => normalizeEmail(e))).size === 1) { reasons.push('Same email address'); } // Check for exact phone matches const phones = interests.map(i => i['Phone Number']).filter(Boolean); if (phones.length > 1 && new Set(phones.map(p => normalizePhone(p))).size === 1) { reasons.push('Same phone number'); } // Check for similar names const names = interests.map(i => i['Full Name']).filter(Boolean); if (names.length > 1) { const normalizedNames = names.map(n => n.toLowerCase().trim()); if (new Set(normalizedNames).size === 1) { reasons.push('Same name'); } else { reasons.push('Similar names'); } } return reasons.length > 0 ? reasons.join(', ') : 'Multiple matching criteria'; }