import { requireAuth, requireSalesOrAdmin } from '~/server/utils/auth'; import { getNocoDbConfiguration } from '~/server/utils/nocodb'; export default defineEventHandler(async (event) => { console.log('[DUPLICATES] Find duplicates request'); try { // Require sales or admin access for duplicate detection await requireSalesOrAdmin(event); const query = getQuery(event); const threshold = query.threshold ? parseFloat(query.threshold as string) : 0.8; // Get all interests from NocoDB const config = getNocoDbConfiguration(); const interestTableId = "mbs9hjauug4eseo"; // Interest table ID from nocodb.ts const response = await $fetch(`${config.url}/api/v2/tables/${interestTableId}/records`, { headers: { 'xc-token': config.token }, params: { limit: 5000 // Get a large batch for duplicate detection } }) as any; const interests = response.list || []; console.log('[ADMIN] Analyzing', interests.length, 'interests for duplicates'); // Find potential duplicates const duplicateGroups = findDuplicateInterests(interests, threshold); console.log('[ADMIN] Found', duplicateGroups.length, 'duplicate groups'); return { success: true, data: { duplicateGroups, totalInterests: interests.length, duplicateCount: duplicateGroups.reduce((sum, group) => sum + group.interests.length, 0), threshold } }; } catch (error: any) { console.error('[ADMIN] Failed to find duplicates:', error); if (error.statusCode === 403) { return { success: false, error: 'Insufficient permissions. Admin access required.' }; } return { success: false, error: 'Failed to find duplicates' }; } }); /** * Find duplicate interests based on multiple criteria */ function findDuplicateInterests(interests: any[], threshold: number = 0.8) { const duplicateGroups: Array<{ id: string; interests: any[]; matchReason: string; confidence: number; masterCandidate: any; }> = []; const processedIds = new Set(); for (let i = 0; i < interests.length; i++) { const interest1 = interests[i]; if (processedIds.has(interest1.Id)) continue; const matches = [interest1]; for (let j = i + 1; j < interests.length; j++) { const interest2 = interests[j]; if (processedIds.has(interest2.Id)) continue; const similarity = calculateSimilarity(interest1, interest2); if (similarity.score >= threshold) { matches.push(interest2); processedIds.add(interest2.Id); } } if (matches.length > 1) { // Mark all as processed matches.forEach(match => processedIds.add(match.Id)); // Determine the best master candidate (most complete record) const masterCandidate = selectMasterCandidate(matches); duplicateGroups.push({ id: `group_${duplicateGroups.length + 1}`, interests: matches, matchReason: 'Multiple matching criteria', confidence: Math.max(...matches.slice(1).map(match => calculateSimilarity(masterCandidate, match).score )), masterCandidate }); } } return duplicateGroups; } /** * Calculate similarity between two interests */ function calculateSimilarity(interest1: any, interest2: any) { const scores: Array<{ type: string; score: number; weight: number }> = []; // Email similarity (highest weight) if (interest1['Email Address'] && interest2['Email Address']) { const emailScore = interest1['Email Address'].toLowerCase() === interest2['Email Address'].toLowerCase() ? 1.0 : 0.0; scores.push({ type: 'email', score: emailScore, weight: 0.4 }); } // Phone similarity if (interest1['Phone Number'] && interest2['Phone Number']) { const phone1 = normalizePhone(interest1['Phone Number']); const phone2 = normalizePhone(interest2['Phone Number']); const phoneScore = phone1 === phone2 ? 1.0 : 0.0; scores.push({ type: 'phone', score: phoneScore, weight: 0.3 }); } // Name similarity if (interest1['Full Name'] && interest2['Full Name']) { const nameScore = calculateNameSimilarity(interest1['Full Name'], interest2['Full Name']); scores.push({ type: 'name', score: nameScore, weight: 0.2 }); } // Address similarity if (interest1.Address && interest2.Address) { const addressScore = calculateStringSimilarity(interest1.Address, interest2.Address); scores.push({ type: 'address', score: addressScore, weight: 0.1 }); } // Calculate weighted average const totalWeight = scores.reduce((sum, s) => sum + s.weight, 0); const weightedScore = scores.reduce((sum, s) => sum + (s.score * s.weight), 0) / (totalWeight || 1); return { score: weightedScore, details: scores }; } /** * Normalize phone number for comparison */ function normalizePhone(phone: string): string { return phone.replace(/\D/g, ''); // Remove all non-digits } /** * Calculate name similarity using Levenshtein distance */ function calculateNameSimilarity(name1: string, name2: string): number { const str1 = name1.toLowerCase().trim(); const str2 = name2.toLowerCase().trim(); if (str1 === str2) return 1.0; const distance = levenshteinDistance(str1, str2); const maxLength = Math.max(str1.length, str2.length); return maxLength > 0 ? 1 - (distance / maxLength) : 0; } /** * Calculate string similarity using Levenshtein distance */ function calculateStringSimilarity(str1: string, str2: string): number { const s1 = str1.toLowerCase().trim(); const s2 = str2.toLowerCase().trim(); if (s1 === s2) return 1.0; const distance = levenshteinDistance(s1, s2); const maxLength = Math.max(s1.length, s2.length); return maxLength > 0 ? 1 - (distance / maxLength) : 0; } /** * Calculate Levenshtein distance between two strings */ function levenshteinDistance(str1: string, str2: string): number { const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null)); for (let i = 0; i <= str1.length; i += 1) { matrix[0][i] = i; } for (let j = 0; j <= str2.length; j += 1) { matrix[j][0] = j; } for (let j = 1; j <= str2.length; j += 1) { for (let i = 1; i <= str1.length; i += 1) { const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1; matrix[j][i] = Math.min( matrix[j][i - 1] + 1, // deletion matrix[j - 1][i] + 1, // insertion matrix[j - 1][i - 1] + indicator // substitution ); } } return matrix[str2.length][str1.length]; } /** * Select the best master candidate from a group of duplicates */ function selectMasterCandidate(interests: any[]) { return interests.reduce((best, current) => { const bestScore = calculateCompletenessScore(best); const currentScore = calculateCompletenessScore(current); return currentScore > bestScore ? current : best; }); } /** * Calculate completeness score for an interest record */ function calculateCompletenessScore(interest: any): number { const fields = ['Full Name', 'Email Address', 'Phone Number', 'Address', 'Extra Comments', 'Berth Size Desired']; const filledFields = fields.filter(field => interest[field] && interest[field].toString().trim().length > 0 ); let score = filledFields.length / fields.length; // Bonus for recent creation if (interest['Created At']) { const created = new Date(interest['Created At']); const now = new Date(); const daysOld = (now.getTime() - created.getTime()) / (1000 * 60 * 60 * 24); // More recent records get a small bonus if (daysOld < 30) score += 0.1; else if (daysOld < 90) score += 0.05; } return score; }