diff --git a/server/api/admin/duplicates/find.ts b/server/api/admin/duplicates/find.ts index c1d51cc..adc3b8a 100644 --- a/server/api/admin/duplicates/find.ts +++ b/server/api/admin/duplicates/find.ts @@ -1,8 +1,9 @@ import { requireAuth, requireSalesOrAdmin } from '~/server/utils/auth'; import { getNocoDbConfiguration } from '~/server/utils/nocodb'; +import { findDuplicates, createInterestConfig } from '~/server/utils/duplicate-detection'; export default defineEventHandler(async (event) => { - console.log('[DUPLICATES] Find duplicates request'); + console.log('[ADMIN] Find duplicates request'); try { // Require sales or admin access for duplicate detection @@ -26,17 +27,27 @@ export default defineEventHandler(async (event) => { const interests = response.list || []; console.log('[ADMIN] Analyzing', interests.length, 'interests for duplicates'); - // Find potential duplicates - const duplicateGroups = findDuplicateInterests(interests, threshold); + // Find duplicate groups using the new centralized utility + const duplicateConfig = createInterestConfig(); + const duplicateGroups = findDuplicates(interests, duplicateConfig); + + // Convert to the expected format + const formattedGroups = duplicateGroups.map(group => ({ + id: group.id, + interests: group.items, + matchReason: group.matchReason, + confidence: group.confidence, + masterCandidate: group.masterCandidate + })); - console.log('[ADMIN] Found', duplicateGroups.length, 'duplicate groups'); + console.log('[ADMIN] Found', formattedGroups.length, 'duplicate groups'); return { success: true, data: { - duplicateGroups, + duplicateGroups: formattedGroups, totalInterests: interests.length, - duplicateCount: duplicateGroups.reduce((sum, group) => sum + group.interests.length, 0), + duplicateCount: formattedGroups.reduce((sum, group) => sum + group.interests.length, 0), threshold } }; @@ -57,203 +68,3 @@ export default defineEventHandler(async (event) => { }; } }); - -/** - * Find duplicate interests based on multiple criteria - */ -function findDuplicateInterests(interests: any[], threshold: number = 0.8) { - const duplicateGroups: Array<{ - id: string; - interests: any[]; - matchReason: string; - confidence: number; - masterCandidate: any; - }> = []; - - const processedIds = new Set(); - - for (let i = 0; i < interests.length; i++) { - const interest1 = interests[i]; - - if (processedIds.has(interest1.Id)) continue; - - const matches = [interest1]; - - for (let j = i + 1; j < interests.length; j++) { - const interest2 = interests[j]; - - if (processedIds.has(interest2.Id)) continue; - - const similarity = calculateSimilarity(interest1, interest2); - - if (similarity.score >= threshold) { - matches.push(interest2); - processedIds.add(interest2.Id); - } - } - - if (matches.length > 1) { - // Mark all as processed - matches.forEach(match => processedIds.add(match.Id)); - - // Determine the best master candidate (most complete record) - const masterCandidate = selectMasterCandidate(matches); - - duplicateGroups.push({ - id: `group_${duplicateGroups.length + 1}`, - interests: matches, - matchReason: 'Multiple matching criteria', - confidence: Math.max(...matches.slice(1).map(match => - calculateSimilarity(masterCandidate, match).score - )), - masterCandidate - }); - } - } - - return duplicateGroups; -} - -/** - * Calculate similarity between two interests - */ -function calculateSimilarity(interest1: any, interest2: any) { - const scores: Array<{ type: string; score: number; weight: number }> = []; - - // Email similarity (highest weight) - if (interest1['Email Address'] && interest2['Email Address']) { - const emailScore = interest1['Email Address'].toLowerCase() === interest2['Email Address'].toLowerCase() ? 1.0 : 0.0; - scores.push({ type: 'email', score: emailScore, weight: 0.4 }); - } - - // Phone similarity - if (interest1['Phone Number'] && interest2['Phone Number']) { - const phone1 = normalizePhone(interest1['Phone Number']); - const phone2 = normalizePhone(interest2['Phone Number']); - const phoneScore = phone1 === phone2 ? 1.0 : 0.0; - scores.push({ type: 'phone', score: phoneScore, weight: 0.3 }); - } - - // Name similarity - if (interest1['Full Name'] && interest2['Full Name']) { - const nameScore = calculateNameSimilarity(interest1['Full Name'], interest2['Full Name']); - scores.push({ type: 'name', score: nameScore, weight: 0.2 }); - } - - // Address similarity - if (interest1.Address && interest2.Address) { - const addressScore = calculateStringSimilarity(interest1.Address, interest2.Address); - scores.push({ type: 'address', score: addressScore, weight: 0.1 }); - } - - // Calculate weighted average - const totalWeight = scores.reduce((sum, s) => sum + s.weight, 0); - const weightedScore = scores.reduce((sum, s) => sum + (s.score * s.weight), 0) / (totalWeight || 1); - - return { - score: weightedScore, - details: scores - }; -} - -/** - * Normalize phone number for comparison - */ -function normalizePhone(phone: string): string { - return phone.replace(/\D/g, ''); // Remove all non-digits -} - -/** - * Calculate name similarity using Levenshtein distance - */ -function calculateNameSimilarity(name1: string, name2: string): number { - const str1 = name1.toLowerCase().trim(); - const str2 = name2.toLowerCase().trim(); - - if (str1 === str2) return 1.0; - - const distance = levenshteinDistance(str1, str2); - const maxLength = Math.max(str1.length, str2.length); - - return maxLength > 0 ? 1 - (distance / maxLength) : 0; -} - -/** - * Calculate string similarity using Levenshtein distance - */ -function calculateStringSimilarity(str1: string, str2: string): number { - const s1 = str1.toLowerCase().trim(); - const s2 = str2.toLowerCase().trim(); - - if (s1 === s2) return 1.0; - - const distance = levenshteinDistance(s1, s2); - const maxLength = Math.max(s1.length, s2.length); - - return maxLength > 0 ? 1 - (distance / maxLength) : 0; -} - -/** - * Calculate Levenshtein distance between two strings - */ -function levenshteinDistance(str1: string, str2: string): number { - const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null)); - - for (let i = 0; i <= str1.length; i += 1) { - matrix[0][i] = i; - } - - for (let j = 0; j <= str2.length; j += 1) { - matrix[j][0] = j; - } - - for (let j = 1; j <= str2.length; j += 1) { - for (let i = 1; i <= str1.length; i += 1) { - const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1; - matrix[j][i] = Math.min( - matrix[j][i - 1] + 1, // deletion - matrix[j - 1][i] + 1, // insertion - matrix[j - 1][i - 1] + indicator // substitution - ); - } - } - - return matrix[str2.length][str1.length]; -} - -/** - * Select the best master candidate from a group of duplicates - */ -function selectMasterCandidate(interests: any[]) { - return interests.reduce((best, current) => { - const bestScore = calculateCompletenessScore(best); - const currentScore = calculateCompletenessScore(current); - - return currentScore > bestScore ? current : best; - }); -} - -/** - * Calculate completeness score for an interest record - */ -function calculateCompletenessScore(interest: any): number { - const fields = ['Full Name', 'Email Address', 'Phone Number', 'Address', 'Extra Comments', 'Berth Size Desired']; - const filledFields = fields.filter(field => - interest[field] && interest[field].toString().trim().length > 0 - ); - - let score = filledFields.length / fields.length; - - // Bonus for recent creation - if (interest['Created At']) { - const created = new Date(interest['Created At']); - const now = new Date(); - const daysOld = (now.getTime() - created.getTime()) / (1000 * 60 * 60 * 24); - - // More recent records get a small bonus - if (daysOld < 30) score += 0.1; - else if (daysOld < 90) score += 0.05; - } - - return score; -} diff --git a/server/utils/duplicate-detection.ts b/server/utils/duplicate-detection.ts index bf9ff41..efb3cd4 100644 --- a/server/utils/duplicate-detection.ts +++ b/server/utils/duplicate-detection.ts @@ -342,14 +342,23 @@ export function createInterestConfig(): DuplicateDetectionConfig { return { type: 'interest', - // Group by normalized email domain or phone prefix for blocking + // Group by normalized name prefix for blocking to catch name-based duplicates getKey: (interest) => { + // Priority 1: Use normalized name prefix (first 3 chars) to catch name duplicates + if (interest['Full Name']) { + const name = interest['Full Name'].toLowerCase().trim(); + const prefix = name.substring(0, 3); + return `name_${prefix}`; + } + + // Priority 2: Use email domain for email-based grouping if (interest['Email Address']) { const email = normalizeEmail(interest['Email Address']); const domain = email.split('@')[1] || 'unknown'; return `email_${domain}`; } + // Priority 3: Use phone prefix if (interest['Phone Number']) { const phone = normalizePhone(interest['Phone Number']); const prefix = phone.length >= 4 ? phone.substring(0, 4) : phone;