-
-
+
+
diff --git a/server/api/interests/duplicates/find.ts b/server/api/interests/duplicates/find.ts
new file mode 100644
index 0000000..51b6179
--- /dev/null
+++ b/server/api/interests/duplicates/find.ts
@@ -0,0 +1,325 @@
+import { requireSalesOrAdmin } from '~/server/utils/auth';
+import { getNocoDbConfiguration } from '~/server/utils/nocodb';
+import { logAuditEvent } from '~/server/utils/audit-logger';
+
+export default defineEventHandler(async (event) => {
+ console.log('[INTERESTS] Find duplicates request');
+
+ try {
+ // Require sales or admin access for duplicate detection
+ await requireSalesOrAdmin(event);
+
+ const query = getQuery(event);
+ const threshold = query.threshold ? parseFloat(query.threshold as string) : 0.8;
+ const dateRange = query.dateRange ? parseInt(query.dateRange as string) : 365; // Default 1 year
+
+ // Get all interests from NocoDB
+ const config = getNocoDbConfiguration();
+ const interestTableId = "mbs9hjauug4eseo"; // Interest table ID
+
+ let url = `${config.url}/api/v2/tables/${interestTableId}/records`;
+
+ // Add date filtering if specified
+ if (dateRange && dateRange > 0) {
+ const cutoffDate = new Date();
+ cutoffDate.setDate(cutoffDate.getDate() - dateRange);
+ const dateFilter = `(Created At,gte,${cutoffDate.toISOString()})`;
+ url += `?where=${encodeURIComponent(dateFilter)}`;
+ }
+
+ const response = await $fetch(url, {
+ headers: {
+ 'xc-token': config.token
+ },
+ params: {
+ limit: 5000 // Get a large batch for duplicate detection
+ }
+ }) as any;
+
+ const interests = response.list || [];
+ console.log('[INTERESTS] Analyzing', interests.length, 'interests for duplicates');
+
+ // Find potential duplicates
+ const duplicateGroups = findDuplicateInterests(interests, threshold);
+
+ console.log('[INTERESTS] Found', duplicateGroups.length, 'duplicate groups');
+
+ // Log the audit event
+ await logAuditEvent(event, 'FIND_INTEREST_DUPLICATES', 'interest', {
+ changes: {
+ totalInterests: interests.length,
+ duplicateGroups: duplicateGroups.length,
+ threshold,
+ dateRange
+ }
+ });
+
+ return {
+ success: true,
+ data: {
+ duplicateGroups,
+ totalInterests: interests.length,
+ duplicateCount: duplicateGroups.reduce((sum, group) => sum + group.interests.length, 0),
+ threshold,
+ dateRange
+ }
+ };
+
+ } catch (error: any) {
+ console.error('[INTERESTS] Failed to find duplicates:', error);
+
+ if (error.statusCode === 403) {
+ return {
+ success: false,
+ error: 'Insufficient permissions. Sales or admin access required.'
+ };
+ }
+
+ return {
+ success: false,
+ error: 'Failed to find duplicates'
+ };
+ }
+});
+
+/**
+ * Find duplicate interests based on multiple criteria
+ */
+function findDuplicateInterests(interests: any[], threshold: number = 0.8) {
+ const duplicateGroups: Array<{
+ id: string;
+ interests: any[];
+ matchReason: string;
+ confidence: number;
+ masterCandidate: any;
+ }> = [];
+
+ const processedIds = new Set();
+
+ for (let i = 0; i < interests.length; i++) {
+ const interest1 = interests[i];
+
+ if (processedIds.has(interest1.Id)) continue;
+
+ const matches = [interest1];
+
+ for (let j = i + 1; j < interests.length; j++) {
+ const interest2 = interests[j];
+
+ if (processedIds.has(interest2.Id)) continue;
+
+ const similarity = calculateSimilarity(interest1, interest2);
+
+ if (similarity.score >= threshold) {
+ matches.push(interest2);
+ processedIds.add(interest2.Id);
+ }
+ }
+
+ if (matches.length > 1) {
+ // Mark all as processed
+ matches.forEach(match => processedIds.add(match.Id));
+
+ // Determine the best master candidate (most complete record)
+ const masterCandidate = selectMasterCandidate(matches);
+
+ // Calculate average confidence
+ const avgConfidence = matches.slice(1).reduce((sum, match) => {
+ return sum + calculateSimilarity(masterCandidate, match).score;
+ }, 0) / (matches.length - 1);
+
+ duplicateGroups.push({
+ id: `group_${duplicateGroups.length + 1}`,
+ interests: matches,
+ matchReason: generateMatchReason(matches),
+ confidence: avgConfidence,
+ masterCandidate
+ });
+ }
+ }
+
+ return duplicateGroups;
+}
+
+/**
+ * Calculate similarity between two interests
+ */
+function calculateSimilarity(interest1: any, interest2: any) {
+ const scores: Array<{ type: string; score: number; weight: number }> = [];
+
+ // Email similarity (highest weight)
+ if (interest1['Email Address'] && interest2['Email Address']) {
+ const emailScore = normalizeEmail(interest1['Email Address']) === normalizeEmail(interest2['Email Address']) ? 1.0 : 0.0;
+ scores.push({ type: 'email', score: emailScore, weight: 0.4 });
+ }
+
+ // Phone similarity
+ if (interest1['Phone Number'] && interest2['Phone Number']) {
+ const phone1 = normalizePhone(interest1['Phone Number']);
+ const phone2 = normalizePhone(interest2['Phone Number']);
+ const phoneScore = phone1 === phone2 ? 1.0 : 0.0;
+ scores.push({ type: 'phone', score: phoneScore, weight: 0.3 });
+ }
+
+ // Name similarity
+ if (interest1['Full Name'] && interest2['Full Name']) {
+ const nameScore = calculateNameSimilarity(interest1['Full Name'], interest2['Full Name']);
+ scores.push({ type: 'name', score: nameScore, weight: 0.2 });
+ }
+
+ // Address similarity
+ if (interest1.Address && interest2.Address) {
+ const addressScore = calculateStringSimilarity(interest1.Address, interest2.Address);
+ scores.push({ type: 'address', score: addressScore, weight: 0.1 });
+ }
+
+ // Calculate weighted average
+ const totalWeight = scores.reduce((sum, s) => sum + s.weight, 0);
+ const weightedScore = scores.reduce((sum, s) => sum + (s.score * s.weight), 0) / (totalWeight || 1);
+
+ return {
+ score: weightedScore,
+ details: scores
+ };
+}
+
+/**
+ * Normalize email for comparison
+ */
+function normalizeEmail(email: string): string {
+ return email.toLowerCase().trim();
+}
+
+/**
+ * Normalize phone number for comparison
+ */
+function normalizePhone(phone: string): string {
+ return phone.replace(/\D/g, ''); // Remove all non-digits
+}
+
+/**
+ * Calculate name similarity using Levenshtein distance
+ */
+function calculateNameSimilarity(name1: string, name2: string): number {
+ const str1 = name1.toLowerCase().trim();
+ const str2 = name2.toLowerCase().trim();
+
+ if (str1 === str2) return 1.0;
+
+ const distance = levenshteinDistance(str1, str2);
+ const maxLength = Math.max(str1.length, str2.length);
+
+ return maxLength > 0 ? 1 - (distance / maxLength) : 0;
+}
+
+/**
+ * Calculate string similarity using Levenshtein distance
+ */
+function calculateStringSimilarity(str1: string, str2: string): number {
+ const s1 = str1.toLowerCase().trim();
+ const s2 = str2.toLowerCase().trim();
+
+ if (s1 === s2) return 1.0;
+
+ const distance = levenshteinDistance(s1, s2);
+ const maxLength = Math.max(s1.length, s2.length);
+
+ return maxLength > 0 ? 1 - (distance / maxLength) : 0;
+}
+
+/**
+ * Calculate Levenshtein distance between two strings
+ */
+function levenshteinDistance(str1: string, str2: string): number {
+ const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
+
+ for (let i = 0; i <= str1.length; i += 1) {
+ matrix[0][i] = i;
+ }
+
+ for (let j = 0; j <= str2.length; j += 1) {
+ matrix[j][0] = j;
+ }
+
+ for (let j = 1; j <= str2.length; j += 1) {
+ for (let i = 1; i <= str1.length; i += 1) {
+ const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
+ matrix[j][i] = Math.min(
+ matrix[j][i - 1] + 1, // deletion
+ matrix[j - 1][i] + 1, // insertion
+ matrix[j - 1][i - 1] + indicator // substitution
+ );
+ }
+ }
+
+ return matrix[str2.length][str1.length];
+}
+
+/**
+ * Select the best master candidate from a group of duplicates
+ */
+function selectMasterCandidate(interests: any[]) {
+ return interests.reduce((best, current) => {
+ const bestScore = calculateCompletenessScore(best);
+ const currentScore = calculateCompletenessScore(current);
+
+ return currentScore > bestScore ? current : best;
+ });
+}
+
+/**
+ * Calculate completeness score for an interest record
+ */
+function calculateCompletenessScore(interest: any): number {
+ const fields = ['Full Name', 'Email Address', 'Phone Number', 'Address', 'Extra Comments', 'Berth Size Desired'];
+ const filledFields = fields.filter(field =>
+ interest[field] && interest[field].toString().trim().length > 0
+ );
+
+ let score = filledFields.length / fields.length;
+
+ // Bonus for recent creation
+ if (interest['Created At']) {
+ const created = new Date(interest['Created At']);
+ const now = new Date();
+ const daysOld = (now.getTime() - created.getTime()) / (1000 * 60 * 60 * 24);
+
+ // More recent records get a small bonus
+ if (daysOld < 30) score += 0.1;
+ else if (daysOld < 90) score += 0.05;
+ }
+
+ return score;
+}
+
+/**
+ * Generate a descriptive match reason
+ */
+function generateMatchReason(interests: any[]): string {
+ const reasons = [];
+
+ // Check for exact email matches
+ const emails = interests.map(i => i['Email Address']).filter(Boolean);
+ if (emails.length > 1 && new Set(emails.map(e => normalizeEmail(e))).size === 1) {
+ reasons.push('Same email address');
+ }
+
+ // Check for exact phone matches
+ const phones = interests.map(i => i['Phone Number']).filter(Boolean);
+ if (phones.length > 1 && new Set(phones.map(p => normalizePhone(p))).size === 1) {
+ reasons.push('Same phone number');
+ }
+
+ // Check for similar names
+ const names = interests.map(i => i['Full Name']).filter(Boolean);
+ if (names.length > 1) {
+ const normalizedNames = names.map(n => n.toLowerCase().trim());
+ if (new Set(normalizedNames).size === 1) {
+ reasons.push('Same name');
+ } else {
+ reasons.push('Similar names');
+ }
+ }
+
+ return reasons.length > 0 ? reasons.join(', ') : 'Multiple matching criteria';
+}