feat(insights): Phase B schema + service skeletons
PR1 of Phase B per docs/superpowers/specs/2026-04-28-phase-b-insights-alerts-design.md. Lays the foundation that PRs 2-10 will fill in with behaviour. Schema (migration 0014): - alerts table with rule-engine fields (rule_id, severity, link, entity_type/id, fingerprint, fired/dismissed/acknowledged/resolved timestamps, jsonb metadata). Partial-unique fingerprint index keeps one open row per (port, rule, entity); separate indexes power severity-filtered and time-ordered queries. - analytics_snapshots (port_id, metric_id) -> jsonb cache + computedAt for the 15-min recurring refresh. - expenses: duplicate_of self-FK, dedup_scanned_at, ocr_status/raw/ confidence; partial index on (port, vendor, amount, date) where duplicate_of IS NULL drives the dedup heuristic. - audit_logs.search_text: GENERATED ALWAYS tsvector over action+entity_type+entity_id+user_id, GIN-indexed (drizzle can't model GENERATED ALWAYS in TS yet, so the migration appends manual ALTER + the GIN index). Service skeletons in src/lib/services/: - alerts.service.ts: fingerprintFor, reconcileAlertsForPort (upsert + auto-resolve), dismiss, acknowledge, listAlertsForPort. - alert-rules.ts: RULE_REGISTRY of 10 rule evaluators (currently no-op); PR2 fills in the bodies. - analytics.service.ts: readSnapshot/writeSnapshot with 15-min TTL + no-op compute* stubs for the four chart series; PR3 fills behavior. - expense-dedup.service.ts: scanForDuplicates + markBestDuplicate using the partial dedup index. PR8 wires the BullMQ trigger. - expense-ocr.service.ts: OcrResult/OcrLineItem types + ocrReceipt stub. PR9 wires Claude Vision (Haiku 4.5 + ephemeral system-prompt cache). - audit-search.service.ts: tsvector @@ plainto_tsquery + cursor pagination on (createdAt, id). PR10 wires the admin UI. tsc clean, lint clean, vitest 675/675 (one unrelated AES random-output flake passes solo). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
71
src/lib/services/expense-dedup.service.ts
Normal file
71
src/lib/services/expense-dedup.service.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
/**
|
||||
* Expense duplicate detection — heuristic match on
|
||||
* (port + vendor + amount + date ± 3d). PR1 ships the function shape;
|
||||
* PR8 wires the BullMQ trigger and the merge service.
|
||||
*/
|
||||
|
||||
import { and, between, eq, ne, sql } from 'drizzle-orm';
|
||||
|
||||
import { db } from '@/lib/db';
|
||||
import { expenses } from '@/lib/db/schema/financial';
|
||||
|
||||
const DEDUP_WINDOW_DAYS = 3;
|
||||
|
||||
export interface DedupCandidate {
|
||||
/** Existing expense that the new one likely duplicates. */
|
||||
candidateId: string;
|
||||
/** 0..1 confidence; 1.0 = exact vendor + amount + same day. */
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
export async function scanForDuplicates(expenseId: string): Promise<DedupCandidate[]> {
|
||||
const target = await db.query.expenses.findFirst({ where: eq(expenses.id, expenseId) });
|
||||
if (!target) return [];
|
||||
|
||||
const { portId, establishmentName, amount, expenseDate } = target;
|
||||
if (!establishmentName || !amount || !expenseDate) return [];
|
||||
|
||||
const lo = new Date(expenseDate);
|
||||
lo.setDate(lo.getDate() - DEDUP_WINDOW_DAYS);
|
||||
const hi = new Date(expenseDate);
|
||||
hi.setDate(hi.getDate() + DEDUP_WINDOW_DAYS);
|
||||
|
||||
const matches = await db.query.expenses.findMany({
|
||||
where: and(
|
||||
eq(expenses.portId, portId),
|
||||
sql`lower(${expenses.establishmentName}) = lower(${establishmentName})`,
|
||||
eq(expenses.amount, amount),
|
||||
between(expenses.expenseDate, lo, hi),
|
||||
ne(expenses.id, expenseId),
|
||||
),
|
||||
limit: 5,
|
||||
});
|
||||
|
||||
return matches.map((m) => ({
|
||||
candidateId: m.id,
|
||||
confidence: dayDiff(m.expenseDate, expenseDate) === 0 ? 1.0 : 0.85,
|
||||
}));
|
||||
}
|
||||
|
||||
function dayDiff(a: Date, b: Date): number {
|
||||
const ms = Math.abs(a.getTime() - b.getTime());
|
||||
return Math.round(ms / 86_400_000);
|
||||
}
|
||||
|
||||
/** Mark an expense as a duplicate of the candidate with the highest score. */
|
||||
export async function markBestDuplicate(expenseId: string): Promise<string | null> {
|
||||
const candidates = await scanForDuplicates(expenseId);
|
||||
if (candidates.length === 0) {
|
||||
await db
|
||||
.update(expenses)
|
||||
.set({ dedupScannedAt: sql`now()` })
|
||||
.where(eq(expenses.id, expenseId));
|
||||
return null;
|
||||
}
|
||||
const best = candidates.reduce((a, b) => (a.confidence >= b.confidence ? a : b));
|
||||
await db
|
||||
.update(expenses)
|
||||
.set({ duplicateOf: best.candidateId, dedupScannedAt: sql`now()` })
|
||||
.where(eq(expenses.id, expenseId));
|
||||
return best.candidateId;
|
||||
}
|
||||
Reference in New Issue
Block a user