import { requireSalesOrAdmin } from '~/server/utils/auth'; import { getNocoDbConfiguration, normalizePersonName } from '~/server/utils/nocodb'; import type { Expense } from '~/utils/types'; export default defineEventHandler(async (event) => { console.log('[EXPENSES] Find duplicate expenses request'); try { // Require sales or admin access await requireSalesOrAdmin(event); const query = getQuery(event); const dateRange = query.dateRange as string || '30'; // Default to last 30 days // Calculate date range const endDate = new Date(); const startDate = new Date(); startDate.setDate(startDate.getDate() - parseInt(dateRange)); // Get expenses from NocoDB const config = getNocoDbConfiguration(); const expenseTableId = "mxfcefkk4dqs6uq"; const response = await $fetch(`${config.url}/api/v2/tables/${expenseTableId}/records`, { headers: { 'xc-token': config.token }, params: { limit: 5000, where: `(Time,gte,${startDate.toISOString().split('T')[0]})~and(Time,lte,${endDate.toISOString().split('T')[0]})`, sort: '-Time' } }) as any; const expenses = response.list || []; console.log('[EXPENSES] Analyzing', expenses.length, 'expenses for duplicates'); // Find duplicate groups const duplicateGroups = findDuplicateExpenses(expenses); // Also find payer name variations const payerVariations = findPayerNameVariations(expenses); console.log('[EXPENSES] Found', duplicateGroups.length, 'duplicate groups and', payerVariations.length, 'payer variations'); return { success: true, data: { duplicateGroups, payerVariations, totalExpenses: expenses.length, duplicateCount: duplicateGroups.reduce((sum, group) => sum + group.expenses.length, 0), dateRange: { start: startDate.toISOString().split('T')[0], end: endDate.toISOString().split('T')[0] } } }; } catch (error: any) { console.error('[EXPENSES] Failed to find duplicates:', error); if (error.statusCode === 403) { throw createError({ statusCode: 403, statusMessage: 'Access denied. Sales or admin role required.' }); } throw createError({ statusCode: 500, statusMessage: 'Failed to find duplicate expenses' }); } }); /** * Find duplicate expenses based on multiple criteria */ function findDuplicateExpenses(expenses: any[]) { const duplicateGroups: Array<{ id: string; expenses: any[]; matchReason: string; confidence: number; masterCandidate: any; }> = []; const processedIds = new Set(); for (let i = 0; i < expenses.length; i++) { const expense1 = expenses[i]; if (processedIds.has(expense1.Id)) continue; const matches = [expense1]; let matchReasons = new Set(); for (let j = i + 1; j < expenses.length; j++) { const expense2 = expenses[j]; if (processedIds.has(expense2.Id)) continue; const similarity = calculateExpenseSimilarity(expense1, expense2); if (similarity.score >= 0.8) { matches.push(expense2); processedIds.add(expense2.Id); similarity.reasons.forEach(r => matchReasons.add(r)); } } if (matches.length > 1) { // Mark all as processed matches.forEach(match => processedIds.add(match.Id)); // Determine the best master candidate const masterCandidate = selectMasterExpense(matches); duplicateGroups.push({ id: `group_${duplicateGroups.length + 1}`, expenses: matches, matchReason: Array.from(matchReasons).join(', '), confidence: Math.max(...matches.slice(1).map(match => calculateExpenseSimilarity(masterCandidate, match).score )), masterCandidate }); } } return duplicateGroups; } /** * Find payer name variations (like "Abbie" vs "abbie") */ function findPayerNameVariations(expenses: any[]) { const payerMap = new Map>(); // Group payers by normalized name expenses.forEach(expense => { if (expense.Payer) { const normalized = normalizePersonName(expense.Payer); if (!payerMap.has(normalized)) { payerMap.set(normalized, new Set()); } payerMap.get(normalized)!.add(expense.Payer); } }); // Find variations const variations: Array<{ normalizedName: string; variations: string[]; expenseCount: number; }> = []; payerMap.forEach((variationSet, normalized) => { if (variationSet.size > 1) { const variationArray = Array.from(variationSet); const expenseCount = expenses.filter(e => e.Payer && normalizePersonName(e.Payer) === normalized ).length; variations.push({ normalizedName: normalized, variations: variationArray, expenseCount }); } }); return variations.sort((a, b) => b.expenseCount - a.expenseCount); } /** * Calculate similarity between two expenses */ function calculateExpenseSimilarity(expense1: any, expense2: any) { const scores: Array<{ type: string; score: number; weight: number }> = []; const reasons: string[] = []; // Exact match on establishment, price, and date (highest weight for true duplicates) if (expense1['Establishment Name'] === expense2['Establishment Name'] && expense1.Price === expense2.Price && expense1.Time === expense2.Time) { scores.push({ type: 'exact', score: 1.0, weight: 0.5 }); reasons.push('Exact match'); } // Same payer, establishment, and price on same day (likely duplicate) const date1 = expense1.Time?.split('T')[0]; const date2 = expense2.Time?.split('T')[0]; if (normalizePersonName(expense1.Payer) === normalizePersonName(expense2.Payer) && expense1['Establishment Name'] === expense2['Establishment Name'] && expense1.Price === expense2.Price && date1 === date2) { scores.push({ type: 'same-day', score: 0.95, weight: 0.4 }); reasons.push('Same person, place, amount on same day'); } // Similar establishment names with same price and payer if (expense1['Establishment Name'] && expense2['Establishment Name']) { const nameSimilarity = calculateStringSimilarity( expense1['Establishment Name'], expense2['Establishment Name'] ); if (nameSimilarity > 0.8 && expense1.Price === expense2.Price && normalizePersonName(expense1.Payer) === normalizePersonName(expense2.Payer)) { scores.push({ type: 'similar', score: nameSimilarity, weight: 0.3 }); reasons.push('Similar establishment name'); } } // Time proximity check (within 5 minutes) if (expense1.Time && expense2.Time) { const time1 = new Date(expense1.Time).getTime(); const time2 = new Date(expense2.Time).getTime(); const timeDiff = Math.abs(time1 - time2); if (timeDiff < 5 * 60 * 1000 && // 5 minutes expense1['Establishment Name'] === expense2['Establishment Name']) { scores.push({ type: 'time-proximity', score: 0.9, weight: 0.2 }); reasons.push('Within 5 minutes at same establishment'); } } // Calculate weighted average const totalWeight = scores.reduce((sum, s) => sum + s.weight, 0); const weightedScore = totalWeight > 0 ? scores.reduce((sum, s) => sum + (s.score * s.weight), 0) / totalWeight : 0; return { score: weightedScore, reasons, details: scores }; } /** * Calculate string similarity using Levenshtein distance */ function calculateStringSimilarity(str1: string, str2: string): number { const s1 = str1.toLowerCase().trim(); const s2 = str2.toLowerCase().trim(); if (s1 === s2) return 1.0; const distance = levenshteinDistance(s1, s2); const maxLength = Math.max(s1.length, s2.length); return maxLength > 0 ? 1 - (distance / maxLength) : 0; } /** * Calculate Levenshtein distance between two strings */ function levenshteinDistance(str1: string, str2: string): number { const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null)); for (let i = 0; i <= str1.length; i += 1) { matrix[0][i] = i; } for (let j = 0; j <= str2.length; j += 1) { matrix[j][0] = j; } for (let j = 1; j <= str2.length; j += 1) { for (let i = 1; i <= str1.length; i += 1) { const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1; matrix[j][i] = Math.min( matrix[j][i - 1] + 1, // deletion matrix[j - 1][i] + 1, // insertion matrix[j - 1][i - 1] + indicator // substitution ); } } return matrix[str2.length][str1.length]; } /** * Select the best master expense from a group */ function selectMasterExpense(expenses: any[]) { return expenses.reduce((best, current) => { const bestScore = calculateExpenseCompletenessScore(best); const currentScore = calculateExpenseCompletenessScore(current); return currentScore > bestScore ? current : best; }); } /** * Calculate completeness score for an expense */ function calculateExpenseCompletenessScore(expense: any): number { const fields = ['Establishment Name', 'Price', 'Payer', 'Category', 'Contents', 'Time']; const filledFields = fields.filter(field => expense[field] && expense[field].toString().trim().length > 0 ); let score = filledFields.length / fields.length; // Bonus for having contents description if (expense.Contents && expense.Contents.length > 10) { score += 0.2; } // Bonus for recent creation (more likely to be accurate) if (expense.CreatedAt) { const created = new Date(expense.CreatedAt); const now = new Date(); const hoursOld = (now.getTime() - created.getTime()) / (1000 * 60 * 60); if (hoursOld < 24) score += 0.1; } return Math.min(score, 1.0); }