260 lines
7.5 KiB
TypeScript
260 lines
7.5 KiB
TypeScript
import { requireAuth } from '~/server/utils/auth';
|
|
import { getNocoDbConfiguration } from '~/server/utils/nocodb';
|
|
|
|
export default defineEventHandler(async (event) => {
|
|
console.log('[DUPLICATES] Find duplicates request');
|
|
|
|
try {
|
|
// Require authentication (any authenticated user with interest access)
|
|
await requireAuth(event);
|
|
|
|
const query = getQuery(event);
|
|
const threshold = query.threshold ? parseFloat(query.threshold as string) : 0.8;
|
|
|
|
// Get all interests from NocoDB
|
|
const config = getNocoDbConfiguration();
|
|
const interestTableId = "mbs9hjauug4eseo"; // Interest table ID from nocodb.ts
|
|
const response = await $fetch(`${config.url}/api/v2/tables/${interestTableId}/records`, {
|
|
headers: {
|
|
'xc-token': config.token
|
|
},
|
|
params: {
|
|
limit: 5000 // Get a large batch for duplicate detection
|
|
}
|
|
}) as any;
|
|
|
|
const interests = response.list || [];
|
|
console.log('[ADMIN] Analyzing', interests.length, 'interests for duplicates');
|
|
|
|
// Find potential duplicates
|
|
const duplicateGroups = findDuplicateInterests(interests, threshold);
|
|
|
|
console.log('[ADMIN] Found', duplicateGroups.length, 'duplicate groups');
|
|
|
|
return {
|
|
success: true,
|
|
data: {
|
|
duplicateGroups,
|
|
totalInterests: interests.length,
|
|
duplicateCount: duplicateGroups.reduce((sum, group) => sum + group.interests.length, 0),
|
|
threshold
|
|
}
|
|
};
|
|
|
|
} catch (error: any) {
|
|
console.error('[ADMIN] Failed to find duplicates:', error);
|
|
|
|
if (error.statusCode === 403) {
|
|
return {
|
|
success: false,
|
|
error: 'Insufficient permissions. Admin access required.'
|
|
};
|
|
}
|
|
|
|
return {
|
|
success: false,
|
|
error: 'Failed to find duplicates'
|
|
};
|
|
}
|
|
});
|
|
|
|
/**
|
|
* Find duplicate interests based on multiple criteria
|
|
*/
|
|
function findDuplicateInterests(interests: any[], threshold: number = 0.8) {
|
|
const duplicateGroups: Array<{
|
|
id: string;
|
|
interests: any[];
|
|
matchReason: string;
|
|
confidence: number;
|
|
masterCandidate: any;
|
|
}> = [];
|
|
|
|
const processedIds = new Set<number>();
|
|
|
|
for (let i = 0; i < interests.length; i++) {
|
|
const interest1 = interests[i];
|
|
|
|
if (processedIds.has(interest1.Id)) continue;
|
|
|
|
const matches = [interest1];
|
|
|
|
for (let j = i + 1; j < interests.length; j++) {
|
|
const interest2 = interests[j];
|
|
|
|
if (processedIds.has(interest2.Id)) continue;
|
|
|
|
const similarity = calculateSimilarity(interest1, interest2);
|
|
|
|
if (similarity.score >= threshold) {
|
|
matches.push(interest2);
|
|
processedIds.add(interest2.Id);
|
|
}
|
|
}
|
|
|
|
if (matches.length > 1) {
|
|
// Mark all as processed
|
|
matches.forEach(match => processedIds.add(match.Id));
|
|
|
|
// Determine the best master candidate (most complete record)
|
|
const masterCandidate = selectMasterCandidate(matches);
|
|
|
|
duplicateGroups.push({
|
|
id: `group_${duplicateGroups.length + 1}`,
|
|
interests: matches,
|
|
matchReason: 'Multiple matching criteria',
|
|
confidence: Math.max(...matches.slice(1).map(match =>
|
|
calculateSimilarity(masterCandidate, match).score
|
|
)),
|
|
masterCandidate
|
|
});
|
|
}
|
|
}
|
|
|
|
return duplicateGroups;
|
|
}
|
|
|
|
/**
|
|
* Calculate similarity between two interests
|
|
*/
|
|
function calculateSimilarity(interest1: any, interest2: any) {
|
|
const scores: Array<{ type: string; score: number; weight: number }> = [];
|
|
|
|
// Email similarity (highest weight)
|
|
if (interest1.Email && interest2.Email) {
|
|
const emailScore = interest1.Email.toLowerCase() === interest2.Email.toLowerCase() ? 1.0 : 0.0;
|
|
scores.push({ type: 'email', score: emailScore, weight: 0.4 });
|
|
}
|
|
|
|
// Phone similarity
|
|
if (interest1.Phone && interest2.Phone) {
|
|
const phone1 = normalizePhone(interest1.Phone);
|
|
const phone2 = normalizePhone(interest2.Phone);
|
|
const phoneScore = phone1 === phone2 ? 1.0 : 0.0;
|
|
scores.push({ type: 'phone', score: phoneScore, weight: 0.3 });
|
|
}
|
|
|
|
// Name similarity
|
|
if (interest1.Name && interest2.Name) {
|
|
const nameScore = calculateNameSimilarity(interest1.Name, interest2.Name);
|
|
scores.push({ type: 'name', score: nameScore, weight: 0.2 });
|
|
}
|
|
|
|
// Address similarity
|
|
if (interest1.Address && interest2.Address) {
|
|
const addressScore = calculateStringSimilarity(interest1.Address, interest2.Address);
|
|
scores.push({ type: 'address', score: addressScore, weight: 0.1 });
|
|
}
|
|
|
|
// Calculate weighted average
|
|
const totalWeight = scores.reduce((sum, s) => sum + s.weight, 0);
|
|
const weightedScore = scores.reduce((sum, s) => sum + (s.score * s.weight), 0) / (totalWeight || 1);
|
|
|
|
return {
|
|
score: weightedScore,
|
|
details: scores
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Normalize phone number for comparison
|
|
*/
|
|
function normalizePhone(phone: string): string {
|
|
return phone.replace(/\D/g, ''); // Remove all non-digits
|
|
}
|
|
|
|
/**
|
|
* Calculate name similarity using Levenshtein distance
|
|
*/
|
|
function calculateNameSimilarity(name1: string, name2: string): number {
|
|
const str1 = name1.toLowerCase().trim();
|
|
const str2 = name2.toLowerCase().trim();
|
|
|
|
if (str1 === str2) return 1.0;
|
|
|
|
const distance = levenshteinDistance(str1, str2);
|
|
const maxLength = Math.max(str1.length, str2.length);
|
|
|
|
return maxLength > 0 ? 1 - (distance / maxLength) : 0;
|
|
}
|
|
|
|
/**
|
|
* Calculate string similarity using Levenshtein distance
|
|
*/
|
|
function calculateStringSimilarity(str1: string, str2: string): number {
|
|
const s1 = str1.toLowerCase().trim();
|
|
const s2 = str2.toLowerCase().trim();
|
|
|
|
if (s1 === s2) return 1.0;
|
|
|
|
const distance = levenshteinDistance(s1, s2);
|
|
const maxLength = Math.max(s1.length, s2.length);
|
|
|
|
return maxLength > 0 ? 1 - (distance / maxLength) : 0;
|
|
}
|
|
|
|
/**
|
|
* Calculate Levenshtein distance between two strings
|
|
*/
|
|
function levenshteinDistance(str1: string, str2: string): number {
|
|
const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
|
|
|
|
for (let i = 0; i <= str1.length; i += 1) {
|
|
matrix[0][i] = i;
|
|
}
|
|
|
|
for (let j = 0; j <= str2.length; j += 1) {
|
|
matrix[j][0] = j;
|
|
}
|
|
|
|
for (let j = 1; j <= str2.length; j += 1) {
|
|
for (let i = 1; i <= str1.length; i += 1) {
|
|
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
|
|
matrix[j][i] = Math.min(
|
|
matrix[j][i - 1] + 1, // deletion
|
|
matrix[j - 1][i] + 1, // insertion
|
|
matrix[j - 1][i - 1] + indicator // substitution
|
|
);
|
|
}
|
|
}
|
|
|
|
return matrix[str2.length][str1.length];
|
|
}
|
|
|
|
/**
|
|
* Select the best master candidate from a group of duplicates
|
|
*/
|
|
function selectMasterCandidate(interests: any[]) {
|
|
return interests.reduce((best, current) => {
|
|
const bestScore = calculateCompletenessScore(best);
|
|
const currentScore = calculateCompletenessScore(current);
|
|
|
|
return currentScore > bestScore ? current : best;
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Calculate completeness score for an interest record
|
|
*/
|
|
function calculateCompletenessScore(interest: any): number {
|
|
const fields = ['Name', 'Email', 'Phone', 'Address', 'Comments', 'BerthRequirements'];
|
|
const filledFields = fields.filter(field =>
|
|
interest[field] && interest[field].toString().trim().length > 0
|
|
);
|
|
|
|
let score = filledFields.length / fields.length;
|
|
|
|
// Bonus for recent creation
|
|
if (interest.CreatedAt) {
|
|
const created = new Date(interest.CreatedAt);
|
|
const now = new Date();
|
|
const daysOld = (now.getTime() - created.getTime()) / (1000 * 60 * 60 * 24);
|
|
|
|
// More recent records get a small bonus
|
|
if (daysOld < 30) score += 0.1;
|
|
else if (daysOld < 90) score += 0.05;
|
|
}
|
|
|
|
return score;
|
|
}
|