335 lines
10 KiB
TypeScript
335 lines
10 KiB
TypeScript
import { requireSalesOrAdmin } from '~/server/utils/auth';
|
|
import { getNocoDbConfiguration, normalizePersonName } from '~/server/utils/nocodb';
|
|
import type { Expense } from '~/utils/types';
|
|
|
|
export default defineEventHandler(async (event) => {
|
|
console.log('[EXPENSES] Find duplicate expenses request');
|
|
|
|
try {
|
|
// Require sales or admin access
|
|
await requireSalesOrAdmin(event);
|
|
|
|
const query = getQuery(event);
|
|
const dateRange = query.dateRange as string || '30'; // Default to last 30 days
|
|
|
|
// Calculate date range
|
|
const endDate = new Date();
|
|
const startDate = new Date();
|
|
startDate.setDate(startDate.getDate() - parseInt(dateRange));
|
|
|
|
// Get expenses from NocoDB
|
|
const config = getNocoDbConfiguration();
|
|
const expenseTableId = "mxfcefkk4dqs6uq";
|
|
|
|
const response = await $fetch(`${config.url}/api/v2/tables/${expenseTableId}/records`, {
|
|
headers: {
|
|
'xc-token': config.token
|
|
},
|
|
params: {
|
|
limit: 5000,
|
|
where: `(Time,gte,${startDate.toISOString().split('T')[0]})~and(Time,lte,${endDate.toISOString().split('T')[0]})`,
|
|
sort: '-Time'
|
|
}
|
|
}) as any;
|
|
|
|
const expenses = response.list || [];
|
|
console.log('[EXPENSES] Analyzing', expenses.length, 'expenses for duplicates');
|
|
|
|
// Find duplicate groups
|
|
const duplicateGroups = findDuplicateExpenses(expenses);
|
|
|
|
// Also find payer name variations
|
|
const payerVariations = findPayerNameVariations(expenses);
|
|
|
|
console.log('[EXPENSES] Found', duplicateGroups.length, 'duplicate groups and', payerVariations.length, 'payer variations');
|
|
|
|
return {
|
|
success: true,
|
|
data: {
|
|
duplicateGroups,
|
|
payerVariations,
|
|
totalExpenses: expenses.length,
|
|
duplicateCount: duplicateGroups.reduce((sum, group) => sum + group.expenses.length, 0),
|
|
dateRange: {
|
|
start: startDate.toISOString().split('T')[0],
|
|
end: endDate.toISOString().split('T')[0]
|
|
}
|
|
}
|
|
};
|
|
|
|
} catch (error: any) {
|
|
console.error('[EXPENSES] Failed to find duplicates:', error);
|
|
|
|
if (error.statusCode === 403) {
|
|
throw createError({
|
|
statusCode: 403,
|
|
statusMessage: 'Access denied. Sales or admin role required.'
|
|
});
|
|
}
|
|
|
|
throw createError({
|
|
statusCode: 500,
|
|
statusMessage: 'Failed to find duplicate expenses'
|
|
});
|
|
}
|
|
});
|
|
|
|
/**
|
|
* Find duplicate expenses based on multiple criteria
|
|
*/
|
|
function findDuplicateExpenses(expenses: any[]) {
|
|
console.log('[EXPENSES] Starting duplicate detection for', expenses.length, 'expenses');
|
|
|
|
const duplicateGroups: Array<{
|
|
id: string;
|
|
expenses: any[];
|
|
matchReason: string;
|
|
confidence: number;
|
|
masterCandidate: any;
|
|
}> = [];
|
|
|
|
const processedIds = new Set<number>();
|
|
let comparisons = 0;
|
|
|
|
for (let i = 0; i < expenses.length; i++) {
|
|
const expense1 = expenses[i];
|
|
|
|
if (processedIds.has(expense1.Id)) continue;
|
|
|
|
const matches = [expense1];
|
|
let matchReasons = new Set<string>();
|
|
|
|
for (let j = i + 1; j < expenses.length; j++) {
|
|
const expense2 = expenses[j];
|
|
|
|
if (processedIds.has(expense2.Id)) continue;
|
|
|
|
const similarity = calculateExpenseSimilarity(expense1, expense2);
|
|
comparisons++;
|
|
|
|
console.log(`[EXPENSES] Comparing ${expense1.Id} vs ${expense2.Id}: score=${similarity.score.toFixed(3)}, threshold=0.7`);
|
|
|
|
if (similarity.score >= 0.7) { // Lower threshold for expenses
|
|
console.log(`[EXPENSES] MATCH FOUND! ${expense1.Id} vs ${expense2.Id} (score: ${similarity.score.toFixed(3)})`);
|
|
console.log('[EXPENSES] Match reasons:', similarity.reasons);
|
|
matches.push(expense2);
|
|
processedIds.add(expense2.Id);
|
|
similarity.reasons.forEach(r => matchReasons.add(r));
|
|
}
|
|
}
|
|
|
|
if (matches.length > 1) {
|
|
// Mark all as processed
|
|
matches.forEach(match => processedIds.add(match.Id));
|
|
|
|
// Determine the best master candidate
|
|
const masterCandidate = selectMasterExpense(matches);
|
|
|
|
duplicateGroups.push({
|
|
id: `group_${duplicateGroups.length + 1}`,
|
|
expenses: matches,
|
|
matchReason: Array.from(matchReasons).join(', '),
|
|
confidence: Math.max(...matches.slice(1).map(match =>
|
|
calculateExpenseSimilarity(masterCandidate, match).score
|
|
)),
|
|
masterCandidate
|
|
});
|
|
}
|
|
}
|
|
|
|
return duplicateGroups;
|
|
}
|
|
|
|
/**
|
|
* Find payer name variations (like "Abbie" vs "abbie")
|
|
*/
|
|
function findPayerNameVariations(expenses: any[]) {
|
|
const payerMap = new Map<string, Set<string>>();
|
|
|
|
// Group payers by normalized name
|
|
expenses.forEach(expense => {
|
|
if (expense.Payer) {
|
|
const normalized = normalizePersonName(expense.Payer);
|
|
if (!payerMap.has(normalized)) {
|
|
payerMap.set(normalized, new Set());
|
|
}
|
|
payerMap.get(normalized)!.add(expense.Payer);
|
|
}
|
|
});
|
|
|
|
// Find variations
|
|
const variations: Array<{
|
|
normalizedName: string;
|
|
variations: string[];
|
|
expenseCount: number;
|
|
}> = [];
|
|
|
|
payerMap.forEach((variationSet, normalized) => {
|
|
if (variationSet.size > 1) {
|
|
const variationArray = Array.from(variationSet);
|
|
const expenseCount = expenses.filter(e =>
|
|
e.Payer && normalizePersonName(e.Payer) === normalized
|
|
).length;
|
|
|
|
variations.push({
|
|
normalizedName: normalized,
|
|
variations: variationArray,
|
|
expenseCount
|
|
});
|
|
}
|
|
});
|
|
|
|
return variations.sort((a, b) => b.expenseCount - a.expenseCount);
|
|
}
|
|
|
|
/**
|
|
* Calculate similarity between two expenses
|
|
*/
|
|
function calculateExpenseSimilarity(expense1: any, expense2: any) {
|
|
const scores: Array<{ type: string; score: number; weight: number }> = [];
|
|
const reasons: string[] = [];
|
|
|
|
// Exact match on establishment, price, and date (highest weight for true duplicates)
|
|
if (expense1['Establishment Name'] === expense2['Establishment Name'] &&
|
|
expense1.Price === expense2.Price &&
|
|
expense1.Time === expense2.Time) {
|
|
scores.push({ type: 'exact', score: 1.0, weight: 0.5 });
|
|
reasons.push('Exact match');
|
|
}
|
|
|
|
// Same payer, establishment, and price on same day (likely duplicate)
|
|
const date1 = expense1.Time?.split('T')[0];
|
|
const date2 = expense2.Time?.split('T')[0];
|
|
|
|
if (normalizePersonName(expense1.Payer) === normalizePersonName(expense2.Payer) &&
|
|
expense1['Establishment Name'] === expense2['Establishment Name'] &&
|
|
expense1.Price === expense2.Price &&
|
|
date1 === date2) {
|
|
scores.push({ type: 'same-day', score: 0.95, weight: 0.4 });
|
|
reasons.push('Same person, place, amount on same day');
|
|
}
|
|
|
|
// Similar establishment names with same price and payer
|
|
if (expense1['Establishment Name'] && expense2['Establishment Name']) {
|
|
const nameSimilarity = calculateStringSimilarity(
|
|
expense1['Establishment Name'],
|
|
expense2['Establishment Name']
|
|
);
|
|
|
|
if (nameSimilarity > 0.8 &&
|
|
expense1.Price === expense2.Price &&
|
|
normalizePersonName(expense1.Payer) === normalizePersonName(expense2.Payer)) {
|
|
scores.push({ type: 'similar', score: nameSimilarity, weight: 0.3 });
|
|
reasons.push('Similar establishment name');
|
|
}
|
|
}
|
|
|
|
// Time proximity check (within 5 minutes)
|
|
if (expense1.Time && expense2.Time) {
|
|
const time1 = new Date(expense1.Time).getTime();
|
|
const time2 = new Date(expense2.Time).getTime();
|
|
const timeDiff = Math.abs(time1 - time2);
|
|
|
|
if (timeDiff < 5 * 60 * 1000 && // 5 minutes
|
|
expense1['Establishment Name'] === expense2['Establishment Name']) {
|
|
scores.push({ type: 'time-proximity', score: 0.9, weight: 0.2 });
|
|
reasons.push('Within 5 minutes at same establishment');
|
|
}
|
|
}
|
|
|
|
// Calculate weighted average
|
|
const totalWeight = scores.reduce((sum, s) => sum + s.weight, 0);
|
|
const weightedScore = totalWeight > 0
|
|
? scores.reduce((sum, s) => sum + (s.score * s.weight), 0) / totalWeight
|
|
: 0;
|
|
|
|
return {
|
|
score: weightedScore,
|
|
reasons,
|
|
details: scores
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Calculate string similarity using Levenshtein distance
|
|
*/
|
|
function calculateStringSimilarity(str1: string, str2: string): number {
|
|
const s1 = str1.toLowerCase().trim();
|
|
const s2 = str2.toLowerCase().trim();
|
|
|
|
if (s1 === s2) return 1.0;
|
|
|
|
const distance = levenshteinDistance(s1, s2);
|
|
const maxLength = Math.max(s1.length, s2.length);
|
|
|
|
return maxLength > 0 ? 1 - (distance / maxLength) : 0;
|
|
}
|
|
|
|
/**
|
|
* Calculate Levenshtein distance between two strings
|
|
*/
|
|
function levenshteinDistance(str1: string, str2: string): number {
|
|
const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
|
|
|
|
for (let i = 0; i <= str1.length; i += 1) {
|
|
matrix[0][i] = i;
|
|
}
|
|
|
|
for (let j = 0; j <= str2.length; j += 1) {
|
|
matrix[j][0] = j;
|
|
}
|
|
|
|
for (let j = 1; j <= str2.length; j += 1) {
|
|
for (let i = 1; i <= str1.length; i += 1) {
|
|
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
|
|
matrix[j][i] = Math.min(
|
|
matrix[j][i - 1] + 1, // deletion
|
|
matrix[j - 1][i] + 1, // insertion
|
|
matrix[j - 1][i - 1] + indicator // substitution
|
|
);
|
|
}
|
|
}
|
|
|
|
return matrix[str2.length][str1.length];
|
|
}
|
|
|
|
/**
|
|
* Select the best master expense from a group
|
|
*/
|
|
function selectMasterExpense(expenses: any[]) {
|
|
return expenses.reduce((best, current) => {
|
|
const bestScore = calculateExpenseCompletenessScore(best);
|
|
const currentScore = calculateExpenseCompletenessScore(current);
|
|
|
|
return currentScore > bestScore ? current : best;
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Calculate completeness score for an expense
|
|
*/
|
|
function calculateExpenseCompletenessScore(expense: any): number {
|
|
const fields = ['Establishment Name', 'Price', 'Payer', 'Category', 'Contents', 'Time'];
|
|
const filledFields = fields.filter(field =>
|
|
expense[field] && expense[field].toString().trim().length > 0
|
|
);
|
|
|
|
let score = filledFields.length / fields.length;
|
|
|
|
// Bonus for having contents description
|
|
if (expense.Contents && expense.Contents.length > 10) {
|
|
score += 0.2;
|
|
}
|
|
|
|
// Bonus for recent creation (more likely to be accurate)
|
|
if (expense.CreatedAt) {
|
|
const created = new Date(expense.CreatedAt);
|
|
const now = new Date();
|
|
const hoursOld = (now.getTime() - created.getTime()) / (1000 * 60 * 60);
|
|
|
|
if (hoursOld < 24) score += 0.1;
|
|
}
|
|
|
|
return Math.min(score, 1.0);
|
|
}
|