203 lines
6.4 KiB
TypeScript
203 lines
6.4 KiB
TypeScript
|
|
/**
|
||
|
|
* PR8 — expense duplicate detection.
|
||
|
|
*
|
||
|
|
* Validates:
|
||
|
|
* 1. `scanForDuplicates` matches by port + lower(vendor) + amount + date ±3d
|
||
|
|
* 2. Same-day matches score 1.0; off-by-N-days score 0.85
|
||
|
|
* 3. `markBestDuplicate` writes the highest-confidence match into `duplicateOf`
|
||
|
|
* 4. `clearDuplicate` nulls `duplicateOf` but keeps `dedupScannedAt`
|
||
|
|
* 5. `mergeDuplicate` consolidates receipts + archives the source row
|
||
|
|
*/
|
||
|
|
|
||
|
|
import { describe, it, expect } from 'vitest';
|
||
|
|
import { eq } from 'drizzle-orm';
|
||
|
|
|
||
|
|
import { db } from '@/lib/db';
|
||
|
|
import { expenses } from '@/lib/db/schema/financial';
|
||
|
|
import {
|
||
|
|
scanForDuplicates,
|
||
|
|
markBestDuplicate,
|
||
|
|
clearDuplicate,
|
||
|
|
mergeDuplicate,
|
||
|
|
} from '@/lib/services/expense-dedup.service';
|
||
|
|
import { makePort } from '../helpers/factories';
|
||
|
|
|
||
|
|
async function seedExpense(args: {
|
||
|
|
portId: string;
|
||
|
|
establishmentName: string;
|
||
|
|
amount: string;
|
||
|
|
expenseDate: Date;
|
||
|
|
receiptFileIds?: string[];
|
||
|
|
}) {
|
||
|
|
const [row] = await db
|
||
|
|
.insert(expenses)
|
||
|
|
.values({
|
||
|
|
portId: args.portId,
|
||
|
|
establishmentName: args.establishmentName,
|
||
|
|
amount: args.amount,
|
||
|
|
currency: 'USD',
|
||
|
|
expenseDate: args.expenseDate,
|
||
|
|
receiptFileIds: args.receiptFileIds ?? [],
|
||
|
|
createdBy: 'seed',
|
||
|
|
})
|
||
|
|
.returning();
|
||
|
|
return row!;
|
||
|
|
}
|
||
|
|
|
||
|
|
describe('expense dedup', () => {
|
||
|
|
it('scanForDuplicates finds matches in the ±3 day window with case-insensitive vendor', async () => {
|
||
|
|
const port = await makePort();
|
||
|
|
const today = new Date('2026-04-15T12:00:00Z');
|
||
|
|
const target = await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'Marina Fuel',
|
||
|
|
amount: '120.00',
|
||
|
|
expenseDate: today,
|
||
|
|
});
|
||
|
|
// Match: same vendor (different case), same amount, +2 days
|
||
|
|
await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'marina fuel',
|
||
|
|
amount: '120.00',
|
||
|
|
expenseDate: new Date('2026-04-17T09:00:00Z'),
|
||
|
|
});
|
||
|
|
// Non-match: outside the window
|
||
|
|
await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'Marina Fuel',
|
||
|
|
amount: '120.00',
|
||
|
|
expenseDate: new Date('2026-04-22T09:00:00Z'),
|
||
|
|
});
|
||
|
|
// Non-match: different amount
|
||
|
|
await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'Marina Fuel',
|
||
|
|
amount: '125.00',
|
||
|
|
expenseDate: today,
|
||
|
|
});
|
||
|
|
|
||
|
|
const matches = await scanForDuplicates(target.id);
|
||
|
|
expect(matches).toHaveLength(1);
|
||
|
|
expect(matches[0]?.confidence).toBeCloseTo(0.85, 2);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('same-day match scores 1.0', async () => {
|
||
|
|
const port = await makePort();
|
||
|
|
const today = new Date('2026-04-15T12:00:00Z');
|
||
|
|
const target = await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'Acme',
|
||
|
|
amount: '50',
|
||
|
|
expenseDate: today,
|
||
|
|
});
|
||
|
|
await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'Acme',
|
||
|
|
amount: '50',
|
||
|
|
expenseDate: today,
|
||
|
|
});
|
||
|
|
const [m] = await scanForDuplicates(target.id);
|
||
|
|
expect(m?.confidence).toBe(1.0);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('markBestDuplicate writes duplicateOf when a candidate exists, leaves null otherwise', async () => {
|
||
|
|
const port = await makePort();
|
||
|
|
const lonely = await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'Solo',
|
||
|
|
amount: '10',
|
||
|
|
expenseDate: new Date('2026-04-15T12:00:00Z'),
|
||
|
|
});
|
||
|
|
const matchedId = await markBestDuplicate(lonely.id);
|
||
|
|
expect(matchedId).toBeNull();
|
||
|
|
const [refreshed] = await db.select().from(expenses).where(eq(expenses.id, lonely.id));
|
||
|
|
expect(refreshed?.duplicateOf).toBeNull();
|
||
|
|
expect(refreshed?.dedupScannedAt).not.toBeNull();
|
||
|
|
|
||
|
|
const original = await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'Twin',
|
||
|
|
amount: '20',
|
||
|
|
expenseDate: new Date('2026-04-15T12:00:00Z'),
|
||
|
|
});
|
||
|
|
const dup = await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'Twin',
|
||
|
|
amount: '20',
|
||
|
|
expenseDate: new Date('2026-04-15T13:00:00Z'),
|
||
|
|
});
|
||
|
|
const matched = await markBestDuplicate(dup.id);
|
||
|
|
expect(matched).toBe(original.id);
|
||
|
|
const [withDup] = await db.select().from(expenses).where(eq(expenses.id, dup.id));
|
||
|
|
expect(withDup?.duplicateOf).toBe(original.id);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('clearDuplicate nulls duplicateOf but preserves dedupScannedAt', async () => {
|
||
|
|
const port = await makePort();
|
||
|
|
const a = await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'X',
|
||
|
|
amount: '5',
|
||
|
|
expenseDate: new Date('2026-04-15T12:00:00Z'),
|
||
|
|
});
|
||
|
|
const b = await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'X',
|
||
|
|
amount: '5',
|
||
|
|
expenseDate: new Date('2026-04-15T13:00:00Z'),
|
||
|
|
});
|
||
|
|
await markBestDuplicate(b.id);
|
||
|
|
await clearDuplicate(b.id, port.id);
|
||
|
|
const [refreshed] = await db.select().from(expenses).where(eq(expenses.id, b.id));
|
||
|
|
expect(refreshed?.duplicateOf).toBeNull();
|
||
|
|
expect(refreshed?.dedupScannedAt).not.toBeNull();
|
||
|
|
expect(a).toBeDefined();
|
||
|
|
});
|
||
|
|
|
||
|
|
it('mergeDuplicate combines receipts and archives the source', async () => {
|
||
|
|
const port = await makePort();
|
||
|
|
const target = await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'Y',
|
||
|
|
amount: '7',
|
||
|
|
expenseDate: new Date('2026-04-15T12:00:00Z'),
|
||
|
|
receiptFileIds: ['file-A'],
|
||
|
|
});
|
||
|
|
const source = await seedExpense({
|
||
|
|
portId: port.id,
|
||
|
|
establishmentName: 'Y',
|
||
|
|
amount: '7',
|
||
|
|
expenseDate: new Date('2026-04-15T13:00:00Z'),
|
||
|
|
receiptFileIds: ['file-B', 'file-A'],
|
||
|
|
});
|
||
|
|
|
||
|
|
await mergeDuplicate(source.id, target.id, port.id);
|
||
|
|
|
||
|
|
const [survivor] = await db.select().from(expenses).where(eq(expenses.id, target.id));
|
||
|
|
expect(new Set(survivor?.receiptFileIds ?? [])).toEqual(new Set(['file-A', 'file-B']));
|
||
|
|
|
||
|
|
const [archived] = await db.select().from(expenses).where(eq(expenses.id, source.id));
|
||
|
|
expect(archived?.archivedAt).not.toBeNull();
|
||
|
|
expect(archived?.duplicateOf).toBeNull();
|
||
|
|
});
|
||
|
|
|
||
|
|
it('mergeDuplicate refuses self-merge and cross-port', async () => {
|
||
|
|
const portA = await makePort();
|
||
|
|
const portB = await makePort();
|
||
|
|
const a = await seedExpense({
|
||
|
|
portId: portA.id,
|
||
|
|
establishmentName: 'Z',
|
||
|
|
amount: '1',
|
||
|
|
expenseDate: new Date('2026-04-15T12:00:00Z'),
|
||
|
|
});
|
||
|
|
const b = await seedExpense({
|
||
|
|
portId: portB.id,
|
||
|
|
establishmentName: 'Z',
|
||
|
|
amount: '1',
|
||
|
|
expenseDate: new Date('2026-04-15T12:00:00Z'),
|
||
|
|
});
|
||
|
|
await expect(mergeDuplicate(a.id, a.id, portA.id)).rejects.toThrow(/itself/);
|
||
|
|
await expect(mergeDuplicate(a.id, b.id, portA.id)).rejects.toThrow(/not found/);
|
||
|
|
});
|
||
|
|
});
|