Files
pn-new-crm/tests/integration/expense-dedup.test.ts
Matt Ciaccio f52d21df83 feat(phase-b): ship analytics dashboard, alerts, scanner PWA, dedup, audit view
Phase B (Insights & Alerts) PR4-11 in one drop. Builds on the schema +
service skeletons committed in PRs 1-3.

PR4  Analytics dashboard — 4 chart types (funnel/timeline/breakdown/source),
     date-range picker (today/7d/30d/90d), CSV+PNG export per card.
PR5  Alert rail UI + /alerts page — topbar bell w/ live count, dashboard
     right-rail, three-tab page (active/dismissed/resolved), socket-driven
     invalidation. Bell lazy-loads list on popover open to keep cold pages
     fast in non-dashboard routes.
PR6  EOI queue tab on documents hub — filters to in-flight EOIs, count
     surfaces in tab label.
PR7  Interests-by-berth tab on berth detail — replaces the stub.
PR8  Expense duplicate detection — BullMQ job runs scan on create, yellow
     banner on detail w/ Merge / Not-a-duplicate, transactional merge
     consolidates receipts and archives the source.
PR9  Receipt scanner PWA + multi-provider AI — port-scoped /scan route in
     its own (scanner) group with no dashboard chrome, dynamic per-port
     manifest, OpenAI + Claude provider abstraction, admin OCR settings
     page (port-level + super-admin global default w/ opt-in fallback),
     test-connection endpoint, manual-entry fallback when no key is
     configured. Verify form always shown before save — no ghost rows.
PR10 Audit log read view — swap to tsvector full-text search on the
     existing GIN index, cursor pagination, filters for entity/action/user
     /date range, batched actor-email resolution.
PR11 Real-API tests — opt-in receipt-ocr.spec (admin save+test, optional
     real-receipt parse via REALAPI_RECEIPT_FIXTURE) and alert-engine
     socket-fanout spec gated behind RUN_ALERT_ENGINE_REALAPI. Both skip
     cleanly without their gate envs so CI stays green.

Test totals: vitest 690 -> 713, smoke 130 -> 138, realapi +2 opt-in.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 17:21:55 +02:00

203 lines
6.4 KiB
TypeScript

/**
* PR8 — expense duplicate detection.
*
* Validates:
* 1. `scanForDuplicates` matches by port + lower(vendor) + amount + date ±3d
* 2. Same-day matches score 1.0; off-by-N-days score 0.85
* 3. `markBestDuplicate` writes the highest-confidence match into `duplicateOf`
* 4. `clearDuplicate` nulls `duplicateOf` but keeps `dedupScannedAt`
* 5. `mergeDuplicate` consolidates receipts + archives the source row
*/
import { describe, it, expect } from 'vitest';
import { eq } from 'drizzle-orm';
import { db } from '@/lib/db';
import { expenses } from '@/lib/db/schema/financial';
import {
scanForDuplicates,
markBestDuplicate,
clearDuplicate,
mergeDuplicate,
} from '@/lib/services/expense-dedup.service';
import { makePort } from '../helpers/factories';
async function seedExpense(args: {
portId: string;
establishmentName: string;
amount: string;
expenseDate: Date;
receiptFileIds?: string[];
}) {
const [row] = await db
.insert(expenses)
.values({
portId: args.portId,
establishmentName: args.establishmentName,
amount: args.amount,
currency: 'USD',
expenseDate: args.expenseDate,
receiptFileIds: args.receiptFileIds ?? [],
createdBy: 'seed',
})
.returning();
return row!;
}
describe('expense dedup', () => {
it('scanForDuplicates finds matches in the ±3 day window with case-insensitive vendor', async () => {
const port = await makePort();
const today = new Date('2026-04-15T12:00:00Z');
const target = await seedExpense({
portId: port.id,
establishmentName: 'Marina Fuel',
amount: '120.00',
expenseDate: today,
});
// Match: same vendor (different case), same amount, +2 days
await seedExpense({
portId: port.id,
establishmentName: 'marina fuel',
amount: '120.00',
expenseDate: new Date('2026-04-17T09:00:00Z'),
});
// Non-match: outside the window
await seedExpense({
portId: port.id,
establishmentName: 'Marina Fuel',
amount: '120.00',
expenseDate: new Date('2026-04-22T09:00:00Z'),
});
// Non-match: different amount
await seedExpense({
portId: port.id,
establishmentName: 'Marina Fuel',
amount: '125.00',
expenseDate: today,
});
const matches = await scanForDuplicates(target.id);
expect(matches).toHaveLength(1);
expect(matches[0]?.confidence).toBeCloseTo(0.85, 2);
});
it('same-day match scores 1.0', async () => {
const port = await makePort();
const today = new Date('2026-04-15T12:00:00Z');
const target = await seedExpense({
portId: port.id,
establishmentName: 'Acme',
amount: '50',
expenseDate: today,
});
await seedExpense({
portId: port.id,
establishmentName: 'Acme',
amount: '50',
expenseDate: today,
});
const [m] = await scanForDuplicates(target.id);
expect(m?.confidence).toBe(1.0);
});
it('markBestDuplicate writes duplicateOf when a candidate exists, leaves null otherwise', async () => {
const port = await makePort();
const lonely = await seedExpense({
portId: port.id,
establishmentName: 'Solo',
amount: '10',
expenseDate: new Date('2026-04-15T12:00:00Z'),
});
const matchedId = await markBestDuplicate(lonely.id);
expect(matchedId).toBeNull();
const [refreshed] = await db.select().from(expenses).where(eq(expenses.id, lonely.id));
expect(refreshed?.duplicateOf).toBeNull();
expect(refreshed?.dedupScannedAt).not.toBeNull();
const original = await seedExpense({
portId: port.id,
establishmentName: 'Twin',
amount: '20',
expenseDate: new Date('2026-04-15T12:00:00Z'),
});
const dup = await seedExpense({
portId: port.id,
establishmentName: 'Twin',
amount: '20',
expenseDate: new Date('2026-04-15T13:00:00Z'),
});
const matched = await markBestDuplicate(dup.id);
expect(matched).toBe(original.id);
const [withDup] = await db.select().from(expenses).where(eq(expenses.id, dup.id));
expect(withDup?.duplicateOf).toBe(original.id);
});
it('clearDuplicate nulls duplicateOf but preserves dedupScannedAt', async () => {
const port = await makePort();
const a = await seedExpense({
portId: port.id,
establishmentName: 'X',
amount: '5',
expenseDate: new Date('2026-04-15T12:00:00Z'),
});
const b = await seedExpense({
portId: port.id,
establishmentName: 'X',
amount: '5',
expenseDate: new Date('2026-04-15T13:00:00Z'),
});
await markBestDuplicate(b.id);
await clearDuplicate(b.id, port.id);
const [refreshed] = await db.select().from(expenses).where(eq(expenses.id, b.id));
expect(refreshed?.duplicateOf).toBeNull();
expect(refreshed?.dedupScannedAt).not.toBeNull();
expect(a).toBeDefined();
});
it('mergeDuplicate combines receipts and archives the source', async () => {
const port = await makePort();
const target = await seedExpense({
portId: port.id,
establishmentName: 'Y',
amount: '7',
expenseDate: new Date('2026-04-15T12:00:00Z'),
receiptFileIds: ['file-A'],
});
const source = await seedExpense({
portId: port.id,
establishmentName: 'Y',
amount: '7',
expenseDate: new Date('2026-04-15T13:00:00Z'),
receiptFileIds: ['file-B', 'file-A'],
});
await mergeDuplicate(source.id, target.id, port.id);
const [survivor] = await db.select().from(expenses).where(eq(expenses.id, target.id));
expect(new Set(survivor?.receiptFileIds ?? [])).toEqual(new Set(['file-A', 'file-B']));
const [archived] = await db.select().from(expenses).where(eq(expenses.id, source.id));
expect(archived?.archivedAt).not.toBeNull();
expect(archived?.duplicateOf).toBeNull();
});
it('mergeDuplicate refuses self-merge and cross-port', async () => {
const portA = await makePort();
const portB = await makePort();
const a = await seedExpense({
portId: portA.id,
establishmentName: 'Z',
amount: '1',
expenseDate: new Date('2026-04-15T12:00:00Z'),
});
const b = await seedExpense({
portId: portB.id,
establishmentName: 'Z',
amount: '1',
expenseDate: new Date('2026-04-15T12:00:00Z'),
});
await expect(mergeDuplicate(a.id, a.id, portA.id)).rejects.toThrow(/itself/);
await expect(mergeDuplicate(a.id, b.id, portA.id)).rejects.toThrow(/not found/);
});
});