/** * PR8 — expense duplicate detection. * * Validates: * 1. `scanForDuplicates` matches by port + lower(vendor) + amount + date ±3d * 2. Same-day matches score 1.0; off-by-N-days score 0.85 * 3. `markBestDuplicate` writes the highest-confidence match into `duplicateOf` * 4. `clearDuplicate` nulls `duplicateOf` but keeps `dedupScannedAt` * 5. `mergeDuplicate` consolidates receipts + archives the source row */ import { describe, it, expect } from 'vitest'; import { eq } from 'drizzle-orm'; import { db } from '@/lib/db'; import { expenses } from '@/lib/db/schema/financial'; import { scanForDuplicates, markBestDuplicate, clearDuplicate, mergeDuplicate, } from '@/lib/services/expense-dedup.service'; import { makePort } from '../helpers/factories'; async function seedExpense(args: { portId: string; establishmentName: string; amount: string; expenseDate: Date; receiptFileIds?: string[]; }) { const [row] = await db .insert(expenses) .values({ portId: args.portId, establishmentName: args.establishmentName, amount: args.amount, currency: 'USD', expenseDate: args.expenseDate, receiptFileIds: args.receiptFileIds ?? [], createdBy: 'seed', }) .returning(); return row!; } describe('expense dedup', () => { it('scanForDuplicates finds matches in the ±3 day window with case-insensitive vendor', async () => { const port = await makePort(); const today = new Date('2026-04-15T12:00:00Z'); const target = await seedExpense({ portId: port.id, establishmentName: 'Marina Fuel', amount: '120.00', expenseDate: today, }); // Match: same vendor (different case), same amount, +2 days await seedExpense({ portId: port.id, establishmentName: 'marina fuel', amount: '120.00', expenseDate: new Date('2026-04-17T09:00:00Z'), }); // Non-match: outside the window await seedExpense({ portId: port.id, establishmentName: 'Marina Fuel', amount: '120.00', expenseDate: new Date('2026-04-22T09:00:00Z'), }); // Non-match: different amount await seedExpense({ portId: port.id, establishmentName: 'Marina Fuel', amount: '125.00', expenseDate: today, }); const matches = await scanForDuplicates(target.id); expect(matches).toHaveLength(1); expect(matches[0]?.confidence).toBeCloseTo(0.85, 2); }); it('same-day match scores 1.0', async () => { const port = await makePort(); const today = new Date('2026-04-15T12:00:00Z'); const target = await seedExpense({ portId: port.id, establishmentName: 'Acme', amount: '50', expenseDate: today, }); await seedExpense({ portId: port.id, establishmentName: 'Acme', amount: '50', expenseDate: today, }); const [m] = await scanForDuplicates(target.id); expect(m?.confidence).toBe(1.0); }); it('markBestDuplicate writes duplicateOf when a candidate exists, leaves null otherwise', async () => { const port = await makePort(); const lonely = await seedExpense({ portId: port.id, establishmentName: 'Solo', amount: '10', expenseDate: new Date('2026-04-15T12:00:00Z'), }); const matchedId = await markBestDuplicate(lonely.id); expect(matchedId).toBeNull(); const [refreshed] = await db.select().from(expenses).where(eq(expenses.id, lonely.id)); expect(refreshed?.duplicateOf).toBeNull(); expect(refreshed?.dedupScannedAt).not.toBeNull(); const original = await seedExpense({ portId: port.id, establishmentName: 'Twin', amount: '20', expenseDate: new Date('2026-04-15T12:00:00Z'), }); const dup = await seedExpense({ portId: port.id, establishmentName: 'Twin', amount: '20', expenseDate: new Date('2026-04-15T13:00:00Z'), }); const matched = await markBestDuplicate(dup.id); expect(matched).toBe(original.id); const [withDup] = await db.select().from(expenses).where(eq(expenses.id, dup.id)); expect(withDup?.duplicateOf).toBe(original.id); }); it('clearDuplicate nulls duplicateOf but preserves dedupScannedAt', async () => { const port = await makePort(); const a = await seedExpense({ portId: port.id, establishmentName: 'X', amount: '5', expenseDate: new Date('2026-04-15T12:00:00Z'), }); const b = await seedExpense({ portId: port.id, establishmentName: 'X', amount: '5', expenseDate: new Date('2026-04-15T13:00:00Z'), }); await markBestDuplicate(b.id); await clearDuplicate(b.id, port.id); const [refreshed] = await db.select().from(expenses).where(eq(expenses.id, b.id)); expect(refreshed?.duplicateOf).toBeNull(); expect(refreshed?.dedupScannedAt).not.toBeNull(); expect(a).toBeDefined(); }); it('mergeDuplicate combines receipts and archives the source', async () => { const port = await makePort(); const target = await seedExpense({ portId: port.id, establishmentName: 'Y', amount: '7', expenseDate: new Date('2026-04-15T12:00:00Z'), receiptFileIds: ['file-A'], }); const source = await seedExpense({ portId: port.id, establishmentName: 'Y', amount: '7', expenseDate: new Date('2026-04-15T13:00:00Z'), receiptFileIds: ['file-B', 'file-A'], }); await mergeDuplicate(source.id, target.id, port.id); const [survivor] = await db.select().from(expenses).where(eq(expenses.id, target.id)); expect(new Set(survivor?.receiptFileIds ?? [])).toEqual(new Set(['file-A', 'file-B'])); const [archived] = await db.select().from(expenses).where(eq(expenses.id, source.id)); expect(archived?.archivedAt).not.toBeNull(); expect(archived?.duplicateOf).toBeNull(); }); it('mergeDuplicate refuses self-merge and cross-port', async () => { const portA = await makePort(); const portB = await makePort(); const a = await seedExpense({ portId: portA.id, establishmentName: 'Z', amount: '1', expenseDate: new Date('2026-04-15T12:00:00Z'), }); const b = await seedExpense({ portId: portB.id, establishmentName: 'Z', amount: '1', expenseDate: new Date('2026-04-15T12:00:00Z'), }); await expect(mergeDuplicate(a.id, a.id, portA.id)).rejects.toThrow(/itself/); await expect(mergeDuplicate(a.id, b.id, portA.id)).rejects.toThrow(/not found/); }); });