Compare commits
4 Commits
feat/mobil
...
feat/dedup
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7574c3b575 | ||
|
|
4bcc7f8be6 | ||
|
|
18e5c124b0 | ||
|
|
8b077e1999 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -34,3 +34,4 @@ docker-compose.override.yml
|
|||||||
|
|
||||||
# Mobile audit screenshots — generated locally, regenerable
|
# Mobile audit screenshots — generated locally, regenerable
|
||||||
/.audit/
|
/.audit/
|
||||||
|
.migration/
|
||||||
|
|||||||
144
scripts/migrate-from-nocodb.ts
Normal file
144
scripts/migrate-from-nocodb.ts
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
/**
|
||||||
|
* One-shot migration: legacy NocoDB Interests → new client/interest split.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
*
|
||||||
|
* pnpm tsx scripts/migrate-from-nocodb.ts --dry-run
|
||||||
|
* Pulls the live NocoDB base, runs the transform + dedup pipeline,
|
||||||
|
* writes a report to .migration/<timestamp>/. NO database writes.
|
||||||
|
*
|
||||||
|
* pnpm tsx scripts/migrate-from-nocodb.ts --dry-run --port-slug harbor-royale
|
||||||
|
* Same, but tags the planned writes with the named port (matters for
|
||||||
|
* the apply phase — every client/interest belongs to one port).
|
||||||
|
*
|
||||||
|
* pnpm tsx scripts/migrate-from-nocodb.ts --apply --report .migration/<dir>/
|
||||||
|
* [Not yet implemented — apply phase comes in a follow-up PR.]
|
||||||
|
*
|
||||||
|
* Design reference: docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §9.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import 'dotenv/config';
|
||||||
|
|
||||||
|
import path from 'node:path';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
|
||||||
|
import { fetchSnapshot, loadNocoDbConfig } from '@/lib/dedup/nocodb-source';
|
||||||
|
import { transformSnapshot } from '@/lib/dedup/migration-transform';
|
||||||
|
import { resolveReportPaths, writeReport } from '@/lib/dedup/migration-report';
|
||||||
|
|
||||||
|
interface CliArgs {
|
||||||
|
dryRun: boolean;
|
||||||
|
apply: boolean;
|
||||||
|
portSlug: string | null;
|
||||||
|
reportDir: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseArgs(argv: string[]): CliArgs {
|
||||||
|
const args: CliArgs = {
|
||||||
|
dryRun: false,
|
||||||
|
apply: false,
|
||||||
|
portSlug: null,
|
||||||
|
reportDir: null,
|
||||||
|
};
|
||||||
|
for (let i = 0; i < argv.length; i += 1) {
|
||||||
|
const a = argv[i]!;
|
||||||
|
if (a === '--dry-run') args.dryRun = true;
|
||||||
|
else if (a === '--apply') args.apply = true;
|
||||||
|
else if (a === '--port-slug') args.portSlug = argv[++i] ?? null;
|
||||||
|
else if (a === '--report') args.reportDir = argv[++i] ?? null;
|
||||||
|
else if (a === '-h' || a === '--help') {
|
||||||
|
printHelp();
|
||||||
|
process.exit(0);
|
||||||
|
} else {
|
||||||
|
console.error(`Unknown argument: ${a}`);
|
||||||
|
printHelp();
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
|
||||||
|
function printHelp(): void {
|
||||||
|
console.log(`Usage:
|
||||||
|
pnpm tsx scripts/migrate-from-nocodb.ts --dry-run [--port-slug <slug>]
|
||||||
|
Pulls NocoDB → transforms → writes report to .migration/<timestamp>/.
|
||||||
|
No database writes.
|
||||||
|
|
||||||
|
pnpm tsx scripts/migrate-from-nocodb.ts --apply --report .migration/<dir>/
|
||||||
|
Apply phase. (Not yet implemented.)
|
||||||
|
|
||||||
|
Flags:
|
||||||
|
--dry-run Read NocoDB, write report only.
|
||||||
|
--apply Actually write to the new DB. (Not yet supported.)
|
||||||
|
--port-slug <slug> Port slug to attach to all imported entities.
|
||||||
|
Defaults to the first available port if omitted.
|
||||||
|
--report <dir> Path to a previously-generated report dir
|
||||||
|
(only used by --apply).
|
||||||
|
-h, --help Show this help.
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const args = parseArgs(process.argv.slice(2));
|
||||||
|
|
||||||
|
if (!args.dryRun && !args.apply) {
|
||||||
|
console.error('Must specify --dry-run or --apply');
|
||||||
|
printHelp();
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.apply) {
|
||||||
|
console.error('--apply is not yet implemented in this version. P3 ships dry-run first.');
|
||||||
|
console.error('See docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §9.2.');
|
||||||
|
process.exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Dry-run path ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
console.log('[migrate] Loading NocoDB config…');
|
||||||
|
const config = loadNocoDbConfig();
|
||||||
|
console.log(`[migrate] Source: ${config.url}`);
|
||||||
|
|
||||||
|
console.log('[migrate] Fetching snapshot from NocoDB…');
|
||||||
|
const start = Date.now();
|
||||||
|
const snapshot = await fetchSnapshot(config);
|
||||||
|
const elapsed = ((Date.now() - start) / 1000).toFixed(1);
|
||||||
|
console.log(
|
||||||
|
`[migrate] Snapshot fetched in ${elapsed}s — ${snapshot.interests.length} interests, ${snapshot.residentialInterests.length} residential, ${snapshot.berths.length} berths.`,
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log('[migrate] Running transform + dedup pipeline…');
|
||||||
|
const plan = transformSnapshot(snapshot);
|
||||||
|
|
||||||
|
// Resolve output paths relative to the worktree root (the script itself
|
||||||
|
// lives in scripts/; we want the .migration dir at the repo root).
|
||||||
|
const scriptDir = path.dirname(fileURLToPath(import.meta.url));
|
||||||
|
const repoRoot = path.resolve(scriptDir, '..');
|
||||||
|
const generatedAt = new Date().toISOString();
|
||||||
|
const paths = resolveReportPaths(repoRoot);
|
||||||
|
|
||||||
|
console.log(`[migrate] Writing report to ${paths.rootDir}…`);
|
||||||
|
await writeReport(paths, plan, generatedAt);
|
||||||
|
|
||||||
|
// ── Console summary ──────────────────────────────────────────────────────
|
||||||
|
const s = plan.stats;
|
||||||
|
console.log('');
|
||||||
|
console.log('=== Migration Plan Summary ===');
|
||||||
|
console.log(
|
||||||
|
` Input: ${s.inputInterestRows} interests, ${s.inputResidentialRows} residential interests`,
|
||||||
|
);
|
||||||
|
console.log(` Output: ${s.outputClients} clients, ${s.outputInterests} interests`);
|
||||||
|
console.log(` ${s.outputContacts} contacts, ${s.outputAddresses} addresses`);
|
||||||
|
console.log(
|
||||||
|
` Dedup: ${s.autoLinkedClusters} auto-linked clusters, ${s.needsReviewPairs} pairs flagged for review`,
|
||||||
|
);
|
||||||
|
console.log(` Quality: ${s.flaggedRows} rows flagged (see report.csv)`);
|
||||||
|
console.log('');
|
||||||
|
console.log(` Full report: ${paths.summaryPath}`);
|
||||||
|
console.log('');
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((err) => {
|
||||||
|
console.error('[migrate] Fatal error:', err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
5
src/app/(dashboard)/[portSlug]/admin/duplicates/page.tsx
Normal file
5
src/app/(dashboard)/[portSlug]/admin/duplicates/page.tsx
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
import { DuplicatesReviewQueue } from '@/components/admin/duplicates/duplicates-review-queue';
|
||||||
|
|
||||||
|
export default function DuplicatesAdminPage() {
|
||||||
|
return <DuplicatesReviewQueue />;
|
||||||
|
}
|
||||||
4
src/app/api/v1/admin/duplicates/[id]/dismiss/route.ts
Normal file
4
src/app/api/v1/admin/duplicates/[id]/dismiss/route.ts
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
import { withAuth, withPermission } from '@/lib/api/helpers';
|
||||||
|
import { dismissHandler } from '../../handlers';
|
||||||
|
|
||||||
|
export const POST = withAuth(withPermission('clients', 'edit', dismissHandler));
|
||||||
4
src/app/api/v1/admin/duplicates/[id]/merge/route.ts
Normal file
4
src/app/api/v1/admin/duplicates/[id]/merge/route.ts
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
import { withAuth, withPermission } from '@/lib/api/helpers';
|
||||||
|
import { confirmMergeHandler } from '../../handlers';
|
||||||
|
|
||||||
|
export const POST = withAuth(withPermission('clients', 'edit', confirmMergeHandler));
|
||||||
160
src/app/api/v1/admin/duplicates/handlers.ts
Normal file
160
src/app/api/v1/admin/duplicates/handlers.ts
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
import { NextResponse } from 'next/server';
|
||||||
|
import { and, eq, inArray } from 'drizzle-orm';
|
||||||
|
|
||||||
|
import type { AuthContext } from '@/lib/api/helpers';
|
||||||
|
import { db } from '@/lib/db';
|
||||||
|
import { clients, clientMergeCandidates } from '@/lib/db/schema/clients';
|
||||||
|
import { errorResponse, NotFoundError } from '@/lib/errors';
|
||||||
|
import {
|
||||||
|
listPendingMergeCandidates,
|
||||||
|
mergeClients,
|
||||||
|
type MergeFieldChoices,
|
||||||
|
} from '@/lib/services/client-merge.service';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/v1/admin/duplicates
|
||||||
|
*
|
||||||
|
* Pending merge candidates for the current port, sorted by score.
|
||||||
|
* Each row hydrates its two client summaries so the review-queue UI
|
||||||
|
* can render side-by-side cards without an N+1 fetch.
|
||||||
|
*/
|
||||||
|
export async function listHandler(_req: Request, ctx: AuthContext): Promise<NextResponse> {
|
||||||
|
try {
|
||||||
|
const pairs = await listPendingMergeCandidates(ctx.portId);
|
||||||
|
if (pairs.length === 0) return NextResponse.json({ data: [] });
|
||||||
|
|
||||||
|
const ids = Array.from(new Set(pairs.flatMap((p) => [p.clientAId, p.clientBId])));
|
||||||
|
const clientRows = await db
|
||||||
|
.select({
|
||||||
|
id: clients.id,
|
||||||
|
fullName: clients.fullName,
|
||||||
|
archivedAt: clients.archivedAt,
|
||||||
|
mergedIntoClientId: clients.mergedIntoClientId,
|
||||||
|
createdAt: clients.createdAt,
|
||||||
|
})
|
||||||
|
.from(clients)
|
||||||
|
.where(inArray(clients.id, ids));
|
||||||
|
const clientById = new Map(clientRows.map((c) => [c.id, c]));
|
||||||
|
|
||||||
|
const data = pairs
|
||||||
|
.map((p) => {
|
||||||
|
const a = clientById.get(p.clientAId);
|
||||||
|
const b = clientById.get(p.clientBId);
|
||||||
|
if (!a || !b) return null; // FK orphan — shouldn't happen, but be defensive
|
||||||
|
// Skip pairs where one side has already been merged or archived.
|
||||||
|
if (a.mergedIntoClientId || b.mergedIntoClientId) return null;
|
||||||
|
return {
|
||||||
|
id: p.id,
|
||||||
|
score: p.score,
|
||||||
|
reasons: p.reasons,
|
||||||
|
createdAt: p.createdAt,
|
||||||
|
clientA: { id: a.id, fullName: a.fullName, createdAt: a.createdAt },
|
||||||
|
clientB: { id: b.id, fullName: b.fullName, createdAt: b.createdAt },
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter((row): row is NonNullable<typeof row> => row !== null);
|
||||||
|
|
||||||
|
return NextResponse.json({ data });
|
||||||
|
} catch (error) {
|
||||||
|
return errorResponse(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/v1/admin/duplicates/[id]/merge
|
||||||
|
*
|
||||||
|
* Body: { winnerId: string, fieldChoices?: MergeFieldChoices }
|
||||||
|
*
|
||||||
|
* Confirms a merge candidate. The winner is the one the user picked
|
||||||
|
* to keep; the other side becomes the loser. Calls into the merge
|
||||||
|
* service which is the only path that touches client_merge_log.
|
||||||
|
*/
|
||||||
|
export async function confirmMergeHandler(
|
||||||
|
req: Request,
|
||||||
|
ctx: AuthContext,
|
||||||
|
params: { id?: string },
|
||||||
|
): Promise<NextResponse> {
|
||||||
|
try {
|
||||||
|
const id = params.id ?? '';
|
||||||
|
const body = (await req.json().catch(() => ({}))) as {
|
||||||
|
winnerId?: string;
|
||||||
|
fieldChoices?: MergeFieldChoices;
|
||||||
|
};
|
||||||
|
if (!body.winnerId) {
|
||||||
|
return NextResponse.json({ error: 'winnerId required' }, { status: 400 });
|
||||||
|
}
|
||||||
|
|
||||||
|
const [candidate] = await db
|
||||||
|
.select()
|
||||||
|
.from(clientMergeCandidates)
|
||||||
|
.where(
|
||||||
|
and(
|
||||||
|
eq(clientMergeCandidates.id, id),
|
||||||
|
eq(clientMergeCandidates.portId, ctx.portId),
|
||||||
|
eq(clientMergeCandidates.status, 'pending'),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
if (!candidate) throw new NotFoundError('Merge candidate');
|
||||||
|
|
||||||
|
const loserId =
|
||||||
|
body.winnerId === candidate.clientAId
|
||||||
|
? candidate.clientBId
|
||||||
|
: body.winnerId === candidate.clientBId
|
||||||
|
? candidate.clientAId
|
||||||
|
: null;
|
||||||
|
if (!loserId) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: 'winnerId must match one of the candidate clients' },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await mergeClients({
|
||||||
|
winnerId: body.winnerId,
|
||||||
|
loserId,
|
||||||
|
mergedBy: ctx.userId,
|
||||||
|
fieldChoices: body.fieldChoices,
|
||||||
|
});
|
||||||
|
|
||||||
|
return NextResponse.json({ data: result });
|
||||||
|
} catch (error) {
|
||||||
|
return errorResponse(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/v1/admin/duplicates/[id]/dismiss
|
||||||
|
*
|
||||||
|
* Mark a merge candidate as dismissed. The background scoring job
|
||||||
|
* skips dismissed pairs on subsequent runs (a future score increase
|
||||||
|
* can re-create them).
|
||||||
|
*/
|
||||||
|
export async function dismissHandler(
|
||||||
|
_req: Request,
|
||||||
|
ctx: AuthContext,
|
||||||
|
params: { id?: string },
|
||||||
|
): Promise<NextResponse> {
|
||||||
|
try {
|
||||||
|
const id = params.id ?? '';
|
||||||
|
const result = await db
|
||||||
|
.update(clientMergeCandidates)
|
||||||
|
.set({
|
||||||
|
status: 'dismissed',
|
||||||
|
resolvedAt: new Date(),
|
||||||
|
resolvedBy: ctx.userId,
|
||||||
|
})
|
||||||
|
.where(
|
||||||
|
and(
|
||||||
|
eq(clientMergeCandidates.id, id),
|
||||||
|
eq(clientMergeCandidates.portId, ctx.portId),
|
||||||
|
eq(clientMergeCandidates.status, 'pending'),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.returning({ id: clientMergeCandidates.id });
|
||||||
|
|
||||||
|
if (result.length === 0) throw new NotFoundError('Merge candidate');
|
||||||
|
return NextResponse.json({ data: { id: result[0]!.id, status: 'dismissed' } });
|
||||||
|
} catch (error) {
|
||||||
|
return errorResponse(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
4
src/app/api/v1/admin/duplicates/route.ts
Normal file
4
src/app/api/v1/admin/duplicates/route.ts
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
import { withAuth, withPermission } from '@/lib/api/helpers';
|
||||||
|
import { listHandler } from './handlers';
|
||||||
|
|
||||||
|
export const GET = withAuth(withPermission('clients', 'view', listHandler));
|
||||||
160
src/app/api/v1/clients/match-candidates/handlers.ts
Normal file
160
src/app/api/v1/clients/match-candidates/handlers.ts
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
import { NextResponse } from 'next/server';
|
||||||
|
import { and, eq, inArray } from 'drizzle-orm';
|
||||||
|
|
||||||
|
import type { AuthContext } from '@/lib/api/helpers';
|
||||||
|
import { db } from '@/lib/db';
|
||||||
|
import { clients, clientContacts } from '@/lib/db/schema/clients';
|
||||||
|
import { interests } from '@/lib/db/schema/interests';
|
||||||
|
import { errorResponse } from '@/lib/errors';
|
||||||
|
import { findClientMatches, type MatchCandidate } from '@/lib/dedup/find-matches';
|
||||||
|
import { normalizeEmail, normalizeName, normalizePhone } from '@/lib/dedup/normalize';
|
||||||
|
import type { CountryCode } from '@/lib/i18n/countries';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/v1/clients/match-candidates
|
||||||
|
*
|
||||||
|
* Query parameters (any combination):
|
||||||
|
* email Free-text email; gets normalized server-side.
|
||||||
|
* phone Free-text phone; gets normalized to E.164 server-side.
|
||||||
|
* name Free-text full name; used for surname-token blocking.
|
||||||
|
* country Optional ISO country hint (default: AI for Port Nimara).
|
||||||
|
*
|
||||||
|
* Returns the top candidates that scored above the soft-warn threshold,
|
||||||
|
* each with a small client summary the form's suggestion card can
|
||||||
|
* render. Confidence tiers and rules are applied server-side from the
|
||||||
|
* port's `system_settings` (when wired) or sensible defaults otherwise.
|
||||||
|
*
|
||||||
|
* Used by `useDedupSuggestion` in the new-client form. Debounced on
|
||||||
|
* the client; this endpoint must be cheap (single port pool fetch +
|
||||||
|
* an in-memory dedup pass).
|
||||||
|
*/
|
||||||
|
export async function getMatchCandidatesHandler(
|
||||||
|
req: Request,
|
||||||
|
ctx: AuthContext,
|
||||||
|
): Promise<NextResponse> {
|
||||||
|
try {
|
||||||
|
const url = new URL(req.url);
|
||||||
|
const rawEmail = url.searchParams.get('email');
|
||||||
|
const rawPhone = url.searchParams.get('phone');
|
||||||
|
const rawName = url.searchParams.get('name');
|
||||||
|
const country = (url.searchParams.get('country') ?? 'AI') as CountryCode;
|
||||||
|
|
||||||
|
const email = rawEmail ? normalizeEmail(rawEmail) : null;
|
||||||
|
const phoneResult = rawPhone ? normalizePhone(rawPhone, country) : null;
|
||||||
|
const nameResult = rawName ? normalizeName(rawName) : null;
|
||||||
|
|
||||||
|
// If the caller didn't give us anything useful to match on, return empty
|
||||||
|
// — short-circuit rather than scan every client for nothing.
|
||||||
|
if (!email && !phoneResult?.e164 && !nameResult?.surnameToken) {
|
||||||
|
return NextResponse.json({ data: [] });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the input candidate.
|
||||||
|
const input: MatchCandidate = {
|
||||||
|
id: '__incoming__',
|
||||||
|
fullName: nameResult?.display ?? null,
|
||||||
|
surnameToken: nameResult?.surnameToken ?? null,
|
||||||
|
emails: email ? [email] : [],
|
||||||
|
phonesE164: phoneResult?.e164 ? [phoneResult.e164] : [],
|
||||||
|
countryIso: country,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Fetch the live pool for this port. We keep this O(N) over clients
|
||||||
|
// since the dedup library does its own blocking; for ports with
|
||||||
|
// thousands of clients we can later restrict by surname-token /
|
||||||
|
// contact lookups, but for current scale the simple full-pool fetch
|
||||||
|
// is fine.
|
||||||
|
const liveClients = await db
|
||||||
|
.select({
|
||||||
|
id: clients.id,
|
||||||
|
fullName: clients.fullName,
|
||||||
|
nationalityIso: clients.nationalityIso,
|
||||||
|
})
|
||||||
|
.from(clients)
|
||||||
|
.where(and(eq(clients.portId, ctx.portId)));
|
||||||
|
|
||||||
|
if (liveClients.length === 0) {
|
||||||
|
return NextResponse.json({ data: [] });
|
||||||
|
}
|
||||||
|
|
||||||
|
const clientIds = liveClients.map((c) => c.id);
|
||||||
|
const contactRows = await db
|
||||||
|
.select({
|
||||||
|
clientId: clientContacts.clientId,
|
||||||
|
channel: clientContacts.channel,
|
||||||
|
value: clientContacts.value,
|
||||||
|
valueE164: clientContacts.valueE164,
|
||||||
|
})
|
||||||
|
.from(clientContacts)
|
||||||
|
.where(inArray(clientContacts.clientId, clientIds));
|
||||||
|
|
||||||
|
// Group contacts by client for the candidate map.
|
||||||
|
const emailsByClient = new Map<string, string[]>();
|
||||||
|
const phonesByClient = new Map<string, string[]>();
|
||||||
|
for (const c of contactRows) {
|
||||||
|
if (c.channel === 'email') {
|
||||||
|
const arr = emailsByClient.get(c.clientId) ?? [];
|
||||||
|
arr.push(c.value.toLowerCase());
|
||||||
|
emailsByClient.set(c.clientId, arr);
|
||||||
|
} else if (c.channel === 'phone' || c.channel === 'whatsapp') {
|
||||||
|
if (c.valueE164) {
|
||||||
|
const arr = phonesByClient.get(c.clientId) ?? [];
|
||||||
|
arr.push(c.valueE164);
|
||||||
|
phonesByClient.set(c.clientId, arr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const pool: MatchCandidate[] = liveClients.map((c) => {
|
||||||
|
const named = normalizeName(c.fullName);
|
||||||
|
return {
|
||||||
|
id: c.id,
|
||||||
|
fullName: c.fullName,
|
||||||
|
surnameToken: named.surnameToken ?? null,
|
||||||
|
emails: emailsByClient.get(c.id) ?? [],
|
||||||
|
phonesE164: phonesByClient.get(c.id) ?? [],
|
||||||
|
countryIso: (c.nationalityIso as CountryCode | null) ?? null,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
const matches = findClientMatches(input, pool, {
|
||||||
|
highScore: 90,
|
||||||
|
mediumScore: 50,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Only return medium+ — low-confidence noise isn't useful at the
|
||||||
|
// create-form layer (background scoring queue picks those up).
|
||||||
|
const useful = matches.filter((m) => m.confidence !== 'low');
|
||||||
|
if (useful.length === 0) {
|
||||||
|
return NextResponse.json({ data: [] });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pull a quick summary for each surfaced candidate so the suggestion
|
||||||
|
// card has enough to render ("Marcus Laurent · 2 interests · last
|
||||||
|
// contact 9d ago").
|
||||||
|
const summarizedIds = useful.map((m) => m.candidate.id);
|
||||||
|
const interestCounts = await db
|
||||||
|
.select({ clientId: interests.clientId })
|
||||||
|
.from(interests)
|
||||||
|
.where(inArray(interests.clientId, summarizedIds));
|
||||||
|
const interestsByClient = new Map<string, number>();
|
||||||
|
for (const r of interestCounts) {
|
||||||
|
interestsByClient.set(r.clientId, (interestsByClient.get(r.clientId) ?? 0) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = useful.map((m) => ({
|
||||||
|
clientId: m.candidate.id,
|
||||||
|
fullName: m.candidate.fullName,
|
||||||
|
score: m.score,
|
||||||
|
confidence: m.confidence,
|
||||||
|
reasons: m.reasons,
|
||||||
|
interestCount: interestsByClient.get(m.candidate.id) ?? 0,
|
||||||
|
emails: m.candidate.emails,
|
||||||
|
phonesE164: m.candidate.phonesE164,
|
||||||
|
}));
|
||||||
|
|
||||||
|
return NextResponse.json({ data });
|
||||||
|
} catch (error) {
|
||||||
|
return errorResponse(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
4
src/app/api/v1/clients/match-candidates/route.ts
Normal file
4
src/app/api/v1/clients/match-candidates/route.ts
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
import { withAuth, withPermission } from '@/lib/api/helpers';
|
||||||
|
import { getMatchCandidatesHandler } from './handlers';
|
||||||
|
|
||||||
|
export const GET = withAuth(withPermission('clients', 'view', getMatchCandidatesHandler));
|
||||||
215
src/components/admin/duplicates/duplicates-review-queue.tsx
Normal file
215
src/components/admin/duplicates/duplicates-review-queue.tsx
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
'use client';
|
||||||
|
|
||||||
|
import { useState } from 'react';
|
||||||
|
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
|
||||||
|
import { ArrowRight, GitMerge, X } from 'lucide-react';
|
||||||
|
import { toast } from 'sonner';
|
||||||
|
|
||||||
|
import { Button } from '@/components/ui/button';
|
||||||
|
import { PageHeader } from '@/components/shared/page-header';
|
||||||
|
import { EmptyState } from '@/components/shared/empty-state';
|
||||||
|
import { Skeleton } from '@/components/ui/skeleton';
|
||||||
|
import { apiFetch } from '@/lib/api/client';
|
||||||
|
import { cn } from '@/lib/utils';
|
||||||
|
|
||||||
|
interface CandidatePair {
|
||||||
|
id: string;
|
||||||
|
score: number;
|
||||||
|
reasons: string[];
|
||||||
|
createdAt: string;
|
||||||
|
clientA: { id: string; fullName: string; createdAt: string };
|
||||||
|
clientB: { id: string; fullName: string; createdAt: string };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Admin review queue for the dedup background scoring job.
|
||||||
|
*
|
||||||
|
* Lists every pending merge candidate (pairs where score >=
|
||||||
|
* `dedup_review_queue_threshold`). For each pair the admin can:
|
||||||
|
* - Pick a winner via the side-by-side card → confirms a merge
|
||||||
|
* - Dismiss → removes from the queue (a future score increase
|
||||||
|
* re-creates the pair on the next scoring run)
|
||||||
|
*
|
||||||
|
* Only minimal merge UI here: the user picks which side is the winner
|
||||||
|
* (no per-field choice), and the loser archives. A richer side-by-side
|
||||||
|
* field-merge dialog is a future enhancement.
|
||||||
|
*/
|
||||||
|
export function DuplicatesReviewQueue() {
|
||||||
|
const queryClient = useQueryClient();
|
||||||
|
|
||||||
|
const { data, isLoading } = useQuery<{ data: CandidatePair[] }>({
|
||||||
|
queryKey: ['admin', 'duplicates'],
|
||||||
|
queryFn: () => apiFetch<{ data: CandidatePair[] }>('/api/v1/admin/duplicates'),
|
||||||
|
});
|
||||||
|
|
||||||
|
const pairs = data?.data ?? [];
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<PageHeader
|
||||||
|
title="Duplicate clients"
|
||||||
|
description={
|
||||||
|
pairs.length === 0
|
||||||
|
? 'No pending pairs to review.'
|
||||||
|
: `${pairs.length} pair${pairs.length === 1 ? '' : 's'} flagged for review.`
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
|
||||||
|
{isLoading ? (
|
||||||
|
<div className="space-y-3">
|
||||||
|
{[0, 1, 2].map((i) => (
|
||||||
|
<Skeleton key={i} className="h-32 w-full" />
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
) : pairs.length === 0 ? (
|
||||||
|
<EmptyState
|
||||||
|
title="All clear"
|
||||||
|
description="The background scoring job hasn't surfaced any potential duplicates yet."
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<ul className="space-y-3">
|
||||||
|
{pairs.map((pair) => (
|
||||||
|
<li key={pair.id}>
|
||||||
|
<CandidateRow pair={pair} queryClient={queryClient} />
|
||||||
|
</li>
|
||||||
|
))}
|
||||||
|
</ul>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function CandidateRow({
|
||||||
|
pair,
|
||||||
|
queryClient,
|
||||||
|
}: {
|
||||||
|
pair: CandidatePair;
|
||||||
|
queryClient: ReturnType<typeof useQueryClient>;
|
||||||
|
}) {
|
||||||
|
const [busy, setBusy] = useState<'merge' | 'dismiss' | null>(null);
|
||||||
|
const [winnerId, setWinnerId] = useState<string>(pair.clientA.id);
|
||||||
|
|
||||||
|
const mergeMutation = useMutation({
|
||||||
|
mutationFn: () =>
|
||||||
|
apiFetch(`/api/v1/admin/duplicates/${pair.id}/merge`, {
|
||||||
|
method: 'POST',
|
||||||
|
body: { winnerId },
|
||||||
|
}),
|
||||||
|
onSuccess: () => {
|
||||||
|
const loserName =
|
||||||
|
winnerId === pair.clientA.id ? pair.clientB.fullName : pair.clientA.fullName;
|
||||||
|
const winnerName =
|
||||||
|
winnerId === pair.clientA.id ? pair.clientA.fullName : pair.clientB.fullName;
|
||||||
|
toast.success(`Merged "${loserName}" into "${winnerName}"`);
|
||||||
|
queryClient.invalidateQueries({ queryKey: ['admin', 'duplicates'] });
|
||||||
|
queryClient.invalidateQueries({ queryKey: ['clients'] });
|
||||||
|
},
|
||||||
|
onError: (err) => toast.error(err instanceof Error ? err.message : 'Merge failed'),
|
||||||
|
onSettled: () => setBusy(null),
|
||||||
|
});
|
||||||
|
|
||||||
|
const dismissMutation = useMutation({
|
||||||
|
mutationFn: () => apiFetch(`/api/v1/admin/duplicates/${pair.id}/dismiss`, { method: 'POST' }),
|
||||||
|
onSuccess: () => {
|
||||||
|
toast.message('Dismissed');
|
||||||
|
queryClient.invalidateQueries({ queryKey: ['admin', 'duplicates'] });
|
||||||
|
},
|
||||||
|
onError: (err) => toast.error(err instanceof Error ? err.message : 'Dismiss failed'),
|
||||||
|
onSettled: () => setBusy(null),
|
||||||
|
});
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="rounded-lg border bg-card p-4">
|
||||||
|
<div className="mb-3 flex items-baseline justify-between gap-3">
|
||||||
|
<div>
|
||||||
|
<span className="rounded-full bg-muted px-2 py-0.5 text-[10px] font-medium uppercase tracking-wide text-muted-foreground">
|
||||||
|
score {pair.score}
|
||||||
|
</span>{' '}
|
||||||
|
<span className="text-xs text-muted-foreground">{pair.reasons.join(' · ')}</span>
|
||||||
|
</div>
|
||||||
|
<span className="text-xs text-muted-foreground">
|
||||||
|
flagged {new Date(pair.createdAt).toLocaleDateString()}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="grid gap-3 sm:grid-cols-[1fr_auto_1fr]">
|
||||||
|
<ClientCard
|
||||||
|
client={pair.clientA}
|
||||||
|
isSelected={winnerId === pair.clientA.id}
|
||||||
|
onSelect={() => setWinnerId(pair.clientA.id)}
|
||||||
|
/>
|
||||||
|
<div className="flex items-center justify-center text-muted-foreground">
|
||||||
|
<ArrowRight className="size-4" aria-hidden />
|
||||||
|
</div>
|
||||||
|
<ClientCard
|
||||||
|
client={pair.clientB}
|
||||||
|
isSelected={winnerId === pair.clientB.id}
|
||||||
|
onSelect={() => setWinnerId(pair.clientB.id)}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="mt-3 flex flex-wrap items-center gap-2">
|
||||||
|
<Button
|
||||||
|
size="sm"
|
||||||
|
onClick={() => {
|
||||||
|
setBusy('merge');
|
||||||
|
mergeMutation.mutate();
|
||||||
|
}}
|
||||||
|
disabled={busy !== null}
|
||||||
|
>
|
||||||
|
<GitMerge className="mr-1 size-3.5" aria-hidden />
|
||||||
|
Merge into selected
|
||||||
|
</Button>
|
||||||
|
<Button
|
||||||
|
size="sm"
|
||||||
|
variant="ghost"
|
||||||
|
onClick={() => {
|
||||||
|
setBusy('dismiss');
|
||||||
|
dismissMutation.mutate();
|
||||||
|
}}
|
||||||
|
disabled={busy !== null}
|
||||||
|
>
|
||||||
|
<X className="mr-1 size-3.5" aria-hidden />
|
||||||
|
Dismiss
|
||||||
|
</Button>
|
||||||
|
<p className="text-xs text-muted-foreground">
|
||||||
|
The unselected card becomes the loser; its interests + contacts move to the selected
|
||||||
|
client and the original is archived.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function ClientCard({
|
||||||
|
client,
|
||||||
|
isSelected,
|
||||||
|
onSelect,
|
||||||
|
}: {
|
||||||
|
client: CandidatePair['clientA'];
|
||||||
|
isSelected: boolean;
|
||||||
|
onSelect: () => void;
|
||||||
|
}) {
|
||||||
|
return (
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={onSelect}
|
||||||
|
className={cn(
|
||||||
|
'rounded-md border p-3 text-left transition-colors',
|
||||||
|
isSelected
|
||||||
|
? 'border-primary bg-primary/5 ring-1 ring-primary/30'
|
||||||
|
: 'border-border hover:bg-muted/40',
|
||||||
|
)}
|
||||||
|
>
|
||||||
|
<p className="text-sm font-medium">{client.fullName}</p>
|
||||||
|
<p className="mt-0.5 text-[11px] text-muted-foreground">
|
||||||
|
Created {new Date(client.createdAt).toLocaleDateString()}
|
||||||
|
</p>
|
||||||
|
{isSelected ? (
|
||||||
|
<span className="mt-1 inline-block rounded-full bg-primary/10 px-1.5 py-0.5 text-[10px] font-semibold text-primary">
|
||||||
|
KEEP
|
||||||
|
</span>
|
||||||
|
) : null}
|
||||||
|
</button>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -167,10 +167,7 @@ export function BerthDetailHeader({ berth }: BerthDetailHeaderProps) {
|
|||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<DetailHeaderStrip>
|
<DetailHeaderStrip>
|
||||||
{/* Stacks vertically on phone widths so the action buttons don't
|
<div className="flex items-start gap-4">
|
||||||
squeeze the area subtitle into a two-line wrap. From sm up the
|
|
||||||
title/area block sits side-by-side with the action buttons. */}
|
|
||||||
<div className="flex flex-col gap-3 sm:flex-row sm:items-start sm:gap-4">
|
|
||||||
<div className="flex-1 min-w-0">
|
<div className="flex-1 min-w-0">
|
||||||
<div className="flex items-center gap-3 flex-wrap">
|
<div className="flex items-center gap-3 flex-wrap">
|
||||||
<h1 className="hidden sm:block text-2xl font-bold text-foreground">
|
<h1 className="hidden sm:block text-2xl font-bold text-foreground">
|
||||||
@@ -185,7 +182,7 @@ export function BerthDetailHeader({ berth }: BerthDetailHeaderProps) {
|
|||||||
{berth.area && <p className="text-muted-foreground mt-1">{berth.area}</p>}
|
{berth.area && <p className="text-muted-foreground mt-1">{berth.area}</p>}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="flex flex-wrap items-center gap-2 sm:shrink-0">
|
<div className="flex flex-wrap items-center gap-2 shrink-0">
|
||||||
<PermissionGate resource="berths" action="edit">
|
<PermissionGate resource="berths" action="edit">
|
||||||
<Button variant="outline" size="sm" onClick={() => setStatusOpen(true)}>
|
<Button variant="outline" size="sm" onClick={() => setStatusOpen(true)}>
|
||||||
<RefreshCw className="mr-1.5 h-4 w-4" />
|
<RefreshCw className="mr-1.5 h-4 w-4" />
|
||||||
|
|||||||
@@ -48,13 +48,10 @@ type BerthData = {
|
|||||||
|
|
||||||
function SpecRow({ label, value }: { label: string; value: React.ReactNode }) {
|
function SpecRow({ label, value }: { label: string; value: React.ReactNode }) {
|
||||||
if (!value && value !== 0 && value !== false) return null;
|
if (!value && value !== 0 && value !== false) return null;
|
||||||
// Mobile-first: stack vertically with label on top so long values
|
|
||||||
// (e.g. "206.69 ft / 62.99 m") never clip at the right edge.
|
|
||||||
// From `sm` (>=640px) up: switch to the original two-column layout.
|
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col gap-0.5 py-2 text-sm sm:flex-row sm:items-baseline sm:justify-between sm:gap-3">
|
<div className="flex justify-between py-2 text-sm">
|
||||||
<span className="text-muted-foreground">{label}</span>
|
<span className="text-muted-foreground">{label}</span>
|
||||||
<span className="font-medium sm:max-w-[60%] sm:text-right">{value}</span>
|
<span className="font-medium text-right max-w-[60%]">{value}</span>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ import { TagPicker } from '@/components/shared/tag-picker';
|
|||||||
import { CountryCombobox } from '@/components/shared/country-combobox';
|
import { CountryCombobox } from '@/components/shared/country-combobox';
|
||||||
import { TimezoneCombobox } from '@/components/shared/timezone-combobox';
|
import { TimezoneCombobox } from '@/components/shared/timezone-combobox';
|
||||||
import { PhoneInput } from '@/components/shared/phone-input';
|
import { PhoneInput } from '@/components/shared/phone-input';
|
||||||
|
import { DedupSuggestionPanel } from '@/components/clients/dedup-suggestion-panel';
|
||||||
import { apiFetch } from '@/lib/api/client';
|
import { apiFetch } from '@/lib/api/client';
|
||||||
import { createClientSchema, type CreateClientInput } from '@/lib/validators/clients';
|
import { createClientSchema, type CreateClientInput } from '@/lib/validators/clients';
|
||||||
import type { CountryCode } from '@/lib/i18n/countries';
|
import type { CountryCode } from '@/lib/i18n/countries';
|
||||||
@@ -30,6 +31,12 @@ import type { CountryCode } from '@/lib/i18n/countries';
|
|||||||
interface ClientFormProps {
|
interface ClientFormProps {
|
||||||
open: boolean;
|
open: boolean;
|
||||||
onOpenChange: (open: boolean) => void;
|
onOpenChange: (open: boolean) => void;
|
||||||
|
/** Optional callback fired when the dedup suggestion panel reports
|
||||||
|
* the user picked an existing client. The form closes; parent is
|
||||||
|
* responsible for navigating to the existing client's detail page
|
||||||
|
* or opening the create-interest dialog pre-filled with that
|
||||||
|
* clientId. Skipped in edit mode. */
|
||||||
|
onUseExistingClient?: (clientId: string) => void;
|
||||||
/** If provided, form is in edit mode */
|
/** If provided, form is in edit mode */
|
||||||
client?: {
|
client?: {
|
||||||
id: string;
|
id: string;
|
||||||
@@ -53,7 +60,7 @@ interface ClientFormProps {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export function ClientForm({ open, onOpenChange, client }: ClientFormProps) {
|
export function ClientForm({ open, onOpenChange, client, onUseExistingClient }: ClientFormProps) {
|
||||||
const queryClient = useQueryClient();
|
const queryClient = useQueryClient();
|
||||||
const isEdit = !!client;
|
const isEdit = !!client;
|
||||||
|
|
||||||
@@ -143,6 +150,26 @@ export function ClientForm({ open, onOpenChange, client }: ClientFormProps) {
|
|||||||
</SheetHeader>
|
</SheetHeader>
|
||||||
|
|
||||||
<form onSubmit={handleSubmit((data) => mutation.mutate(data))} className="space-y-6 py-6">
|
<form onSubmit={handleSubmit((data) => mutation.mutate(data))} className="space-y-6 py-6">
|
||||||
|
{/* Dedup suggestion — only on the create path. Watches the
|
||||||
|
live form values for email / phone / name and surfaces
|
||||||
|
an existing client when one matches. The user can
|
||||||
|
attach the new interest to that client instead of
|
||||||
|
creating a duplicate. */}
|
||||||
|
{!isEdit ? (
|
||||||
|
<DedupSuggestionPanel
|
||||||
|
email={watch('contacts')?.find((c) => c?.channel === 'email')?.value ?? null}
|
||||||
|
phone={
|
||||||
|
watch('contacts')?.find((c) => c?.channel === 'phone' || c?.channel === 'whatsapp')
|
||||||
|
?.valueE164 ?? null
|
||||||
|
}
|
||||||
|
name={watch('fullName') ?? null}
|
||||||
|
onUseExisting={(match) => {
|
||||||
|
onUseExistingClient?.(match.clientId);
|
||||||
|
onOpenChange(false);
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
) : null}
|
||||||
|
|
||||||
{/* Basic Info */}
|
{/* Basic Info */}
|
||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
<h3 className="text-sm font-medium text-muted-foreground uppercase tracking-wide">
|
<h3 className="text-sm font-medium text-muted-foreground uppercase tracking-wide">
|
||||||
|
|||||||
@@ -208,40 +208,32 @@ function ContactRow({
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Right: tag + actions.
|
{/* Right: tag + actions */}
|
||||||
When the contact value is empty (e.g. a row created from a stale
|
|
||||||
import or an aborted edit), we hide the "Add tag" + Make-primary
|
|
||||||
controls so the empty placeholder doesn't clutter the row. The
|
|
||||||
trash icon is always shown so users can clean up the empty entry. */}
|
|
||||||
<div className="flex items-center gap-2 shrink-0">
|
<div className="flex items-center gap-2 shrink-0">
|
||||||
{contact.value ? (
|
<div className="w-28 text-xs text-muted-foreground text-right">
|
||||||
<>
|
<InlineEditableField
|
||||||
<div className="w-28 text-xs text-muted-foreground text-right">
|
value={
|
||||||
<InlineEditableField
|
contact.label && contact.label.toLowerCase() !== 'primary' ? contact.label : null
|
||||||
value={
|
}
|
||||||
contact.label && contact.label.toLowerCase() !== 'primary' ? contact.label : null
|
emptyText="Add tag"
|
||||||
}
|
placeholder="work, home…"
|
||||||
emptyText="Add tag"
|
onSave={async (v) => {
|
||||||
placeholder="work, home…"
|
await onUpdate({ label: v });
|
||||||
onSave={async (v) => {
|
}}
|
||||||
await onUpdate({ label: v });
|
/>
|
||||||
}}
|
</div>
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
onClick={togglePrimary}
|
onClick={togglePrimary}
|
||||||
title={contact.isPrimary ? 'Primary' : 'Make primary'}
|
title={contact.isPrimary ? 'Primary' : 'Make primary'}
|
||||||
className={cn(
|
className={cn(
|
||||||
'p-1 rounded hover:bg-background/60 transition-colors',
|
'p-1 rounded hover:bg-background/60 transition-colors',
|
||||||
contact.isPrimary ? 'text-primary' : 'text-muted-foreground/50',
|
contact.isPrimary ? 'text-primary' : 'text-muted-foreground/50',
|
||||||
)}
|
)}
|
||||||
>
|
>
|
||||||
<Star className={cn('h-3.5 w-3.5', contact.isPrimary && 'fill-current')} />
|
<Star className={cn('h-3.5 w-3.5', contact.isPrimary && 'fill-current')} />
|
||||||
</button>
|
</button>
|
||||||
</>
|
|
||||||
) : null}
|
|
||||||
|
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
|
|||||||
183
src/components/clients/dedup-suggestion-panel.tsx
Normal file
183
src/components/clients/dedup-suggestion-panel.tsx
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
'use client';
|
||||||
|
|
||||||
|
import { useEffect, useState } from 'react';
|
||||||
|
import { useQuery } from '@tanstack/react-query';
|
||||||
|
import { AlertCircle, ArrowRight, Briefcase, X } from 'lucide-react';
|
||||||
|
|
||||||
|
import { Button } from '@/components/ui/button';
|
||||||
|
import { apiFetch } from '@/lib/api/client';
|
||||||
|
import { cn } from '@/lib/utils';
|
||||||
|
|
||||||
|
interface MatchData {
|
||||||
|
clientId: string;
|
||||||
|
fullName: string;
|
||||||
|
score: number;
|
||||||
|
confidence: 'high' | 'medium' | 'low';
|
||||||
|
reasons: string[];
|
||||||
|
interestCount: number;
|
||||||
|
emails: string[];
|
||||||
|
phonesE164: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DedupSuggestionPanelProps {
|
||||||
|
/** Free-text inputs from the in-flight new-client form. The panel
|
||||||
|
* debounces them and queries /api/v1/clients/match-candidates. */
|
||||||
|
email?: string | null;
|
||||||
|
phone?: string | null;
|
||||||
|
name?: string | null;
|
||||||
|
/** Caller wants to attach the new interest to an existing client
|
||||||
|
* rather than creating a new one. The form switches to
|
||||||
|
* interest-only mode and pre-fills the client. */
|
||||||
|
onUseExisting: (match: MatchData) => void;
|
||||||
|
/** User explicitly said "create new anyway." Hide the panel until
|
||||||
|
* they change input again. */
|
||||||
|
onDismiss?: () => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Surfaces existing clients that match the form's in-flight inputs.
|
||||||
|
*
|
||||||
|
* Renders nothing while inputs are short / no useful match found.
|
||||||
|
* On a high-confidence match, the panel interrupts visually with a
|
||||||
|
* solid border and a primary "Use this client" button.
|
||||||
|
*
|
||||||
|
* Wired into the new-client form. Skipped in edit mode.
|
||||||
|
*/
|
||||||
|
export function DedupSuggestionPanel({
|
||||||
|
email,
|
||||||
|
phone,
|
||||||
|
name,
|
||||||
|
onUseExisting,
|
||||||
|
onDismiss,
|
||||||
|
}: DedupSuggestionPanelProps) {
|
||||||
|
const [dismissed, setDismissed] = useState(false);
|
||||||
|
|
||||||
|
// Debounce inputs by 300ms so we don't fire on every keystroke. Keep
|
||||||
|
// the latest debounced values in component state.
|
||||||
|
const [debounced, setDebounced] = useState({
|
||||||
|
email: email ?? '',
|
||||||
|
phone: phone ?? '',
|
||||||
|
name: name ?? '',
|
||||||
|
});
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const t = setTimeout(() => {
|
||||||
|
setDebounced({ email: email ?? '', phone: phone ?? '', name: name ?? '' });
|
||||||
|
// Clear the dismissed flag when inputs change — the user typed
|
||||||
|
// something new, so the prior dismissal no longer applies.
|
||||||
|
setDismissed(false);
|
||||||
|
}, 300);
|
||||||
|
return () => clearTimeout(t);
|
||||||
|
}, [email, phone, name]);
|
||||||
|
|
||||||
|
const hasSomething =
|
||||||
|
debounced.email.length > 3 || debounced.phone.length > 3 || debounced.name.length > 2;
|
||||||
|
|
||||||
|
const { data, isFetching } = useQuery<{ data: MatchData[] }>({
|
||||||
|
queryKey: ['dedup-match-candidates', debounced],
|
||||||
|
queryFn: () => {
|
||||||
|
const params = new URLSearchParams();
|
||||||
|
if (debounced.email) params.set('email', debounced.email);
|
||||||
|
if (debounced.phone) params.set('phone', debounced.phone);
|
||||||
|
if (debounced.name) params.set('name', debounced.name);
|
||||||
|
return apiFetch<{ data: MatchData[] }>(`/api/v1/clients/match-candidates?${params}`);
|
||||||
|
},
|
||||||
|
enabled: hasSomething && !dismissed,
|
||||||
|
// Same query is fine to cache for a minute — moves are slow at this layer.
|
||||||
|
staleTime: 60_000,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (dismissed) return null;
|
||||||
|
if (!hasSomething) return null;
|
||||||
|
if (isFetching && !data) return null;
|
||||||
|
const matches = data?.data ?? [];
|
||||||
|
if (matches.length === 0) return null;
|
||||||
|
|
||||||
|
const top = matches[0]!;
|
||||||
|
const isHigh = top.confidence === 'high';
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className={cn(
|
||||||
|
'rounded-lg border p-3 mb-3 transition-colors',
|
||||||
|
isHigh
|
||||||
|
? 'border-amber-300 bg-amber-50/60 dark:bg-amber-950/30'
|
||||||
|
: 'border-border bg-muted/40',
|
||||||
|
)}
|
||||||
|
data-testid="dedup-suggestion"
|
||||||
|
>
|
||||||
|
<div className="flex items-start gap-3">
|
||||||
|
<div className="mt-0.5">
|
||||||
|
<AlertCircle
|
||||||
|
className={cn(
|
||||||
|
'size-5',
|
||||||
|
isHigh ? 'text-amber-700 dark:text-amber-400' : 'text-muted-foreground',
|
||||||
|
)}
|
||||||
|
aria-hidden
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="min-w-0 flex-1">
|
||||||
|
<p className="text-sm font-semibold leading-tight">
|
||||||
|
{isHigh
|
||||||
|
? 'This looks like an existing client'
|
||||||
|
: 'Possible match — check before creating'}
|
||||||
|
</p>
|
||||||
|
<div className="mt-2 rounded-md border bg-background/80 p-2.5">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<p className="truncate text-sm font-medium">{top.fullName}</p>
|
||||||
|
<span
|
||||||
|
className={cn(
|
||||||
|
'shrink-0 rounded-full px-1.5 py-0.5 text-[10px] font-medium uppercase tracking-wide',
|
||||||
|
isHigh
|
||||||
|
? 'bg-amber-200 text-amber-900 dark:bg-amber-800 dark:text-amber-100'
|
||||||
|
: 'bg-muted text-muted-foreground',
|
||||||
|
)}
|
||||||
|
>
|
||||||
|
{top.confidence}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div className="mt-0.5 flex flex-wrap items-center gap-x-3 gap-y-0.5 text-xs text-muted-foreground">
|
||||||
|
{top.emails[0] ? <span className="truncate">{top.emails[0]}</span> : null}
|
||||||
|
{top.phonesE164[0] ? <span>{top.phonesE164[0]}</span> : null}
|
||||||
|
<span className="inline-flex items-center gap-1">
|
||||||
|
<Briefcase className="size-3" aria-hidden />
|
||||||
|
{top.interestCount} {top.interestCount === 1 ? 'interest' : 'interests'}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<p className="mt-1.5 text-[11px] text-muted-foreground">{top.reasons.join(' · ')}</p>
|
||||||
|
</div>
|
||||||
|
<div className="mt-3 flex flex-wrap items-center gap-2">
|
||||||
|
<Button
|
||||||
|
type="button"
|
||||||
|
size="sm"
|
||||||
|
onClick={() => onUseExisting(top)}
|
||||||
|
data-testid="dedup-use-existing"
|
||||||
|
>
|
||||||
|
Use this client
|
||||||
|
<ArrowRight className="ml-1 size-3.5" aria-hidden />
|
||||||
|
</Button>
|
||||||
|
<Button
|
||||||
|
type="button"
|
||||||
|
size="sm"
|
||||||
|
variant="ghost"
|
||||||
|
onClick={() => {
|
||||||
|
setDismissed(true);
|
||||||
|
onDismiss?.();
|
||||||
|
}}
|
||||||
|
data-testid="dedup-dismiss"
|
||||||
|
>
|
||||||
|
<X className="mr-1 size-3.5" aria-hidden />
|
||||||
|
Create new anyway
|
||||||
|
</Button>
|
||||||
|
{matches.length > 1 ? (
|
||||||
|
<span className="text-xs text-muted-foreground">
|
||||||
|
+{matches.length - 1} other possible{' '}
|
||||||
|
{matches.length - 1 === 1 ? 'match' : 'matches'}
|
||||||
|
</span>
|
||||||
|
) : null}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -1,9 +1,16 @@
|
|||||||
'use client';
|
'use client';
|
||||||
|
|
||||||
import { useEffect, useRef, type ReactNode } from 'react';
|
import { type ReactNode } from 'react';
|
||||||
|
|
||||||
import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
|
import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
|
||||||
import { Badge } from '@/components/ui/badge';
|
import { Badge } from '@/components/ui/badge';
|
||||||
|
import {
|
||||||
|
Select,
|
||||||
|
SelectContent,
|
||||||
|
SelectItem,
|
||||||
|
SelectTrigger,
|
||||||
|
SelectValue,
|
||||||
|
} from '@/components/ui/select';
|
||||||
|
|
||||||
export interface ResponsiveTab {
|
export interface ResponsiveTab {
|
||||||
id: string;
|
id: string;
|
||||||
@@ -19,56 +26,47 @@ interface ResponsiveTabsProps {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tab strip that scrolls horizontally on narrow viewports. The active tab is
|
* Tabs that collapse to a native <Select> on phone-sized viewports.
|
||||||
* automatically scrolled into view so users can tell at a glance that more
|
* Above sm: TabsList renders. At/below sm: a Select dropdown replaces the tab strip.
|
||||||
* tabs exist beyond the visible edge.
|
|
||||||
*
|
|
||||||
* Previously this collapsed to a <Select> on phone widths, but that read as
|
|
||||||
* a generic dropdown and obscured the fact that multiple peer tabs exist.
|
|
||||||
*/
|
*/
|
||||||
export function ResponsiveTabs({ tabs, value, onValueChange }: ResponsiveTabsProps) {
|
export function ResponsiveTabs({ tabs, value, onValueChange }: ResponsiveTabsProps) {
|
||||||
const listRef = useRef<HTMLDivElement>(null);
|
|
||||||
|
|
||||||
// Keep the active trigger in view when the value changes externally
|
|
||||||
// (e.g. ?tab= in the URL or a back/forward navigation).
|
|
||||||
useEffect(() => {
|
|
||||||
const root = listRef.current;
|
|
||||||
if (!root) return;
|
|
||||||
const active = root.querySelector<HTMLButtonElement>(`[data-tab-id="${CSS.escape(value)}"]`);
|
|
||||||
if (active) {
|
|
||||||
active.scrollIntoView({ block: 'nearest', inline: 'nearest', behavior: 'smooth' });
|
|
||||||
}
|
|
||||||
}, [value]);
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Tabs value={value} onValueChange={onValueChange}>
|
<Tabs value={value} onValueChange={onValueChange}>
|
||||||
{/* Single scrollable strip for all viewport widths.
|
{/* Mobile: select dropdown */}
|
||||||
The wrapper handles horizontal overflow with momentum scroll on
|
<div className="sm:hidden">
|
||||||
touch devices; the inner TabsList stays its natural width and
|
<Select value={value} onValueChange={onValueChange}>
|
||||||
slides under the wrapper. */}
|
<SelectTrigger>
|
||||||
<div
|
<SelectValue />
|
||||||
ref={listRef}
|
</SelectTrigger>
|
||||||
className="overflow-x-auto -mx-2 px-2 [scrollbar-width:none] [&::-webkit-scrollbar]:hidden"
|
<SelectContent>
|
||||||
>
|
{tabs.map((tab) => (
|
||||||
<TabsList className="inline-flex w-max">
|
<SelectItem key={tab.id} value={tab.id}>
|
||||||
{tabs.map((tab) => (
|
<span className="flex items-center gap-1.5">
|
||||||
<TabsTrigger
|
{tab.label}
|
||||||
key={tab.id}
|
{tab.badge !== undefined && tab.badge !== null && (
|
||||||
value={tab.id}
|
<span className="text-xs text-muted-foreground">({tab.badge})</span>
|
||||||
className="gap-1.5 whitespace-nowrap"
|
)}
|
||||||
data-tab-id={tab.id}
|
</span>
|
||||||
>
|
</SelectItem>
|
||||||
{tab.label}
|
))}
|
||||||
{tab.badge !== undefined && tab.badge !== null && (
|
</SelectContent>
|
||||||
<Badge variant="secondary" className="px-1.5 py-0 text-xs">
|
</Select>
|
||||||
{tab.badge}
|
|
||||||
</Badge>
|
|
||||||
)}
|
|
||||||
</TabsTrigger>
|
|
||||||
))}
|
|
||||||
</TabsList>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Desktop / tablet: tab strip */}
|
||||||
|
<TabsList className="hidden sm:flex">
|
||||||
|
{tabs.map((tab) => (
|
||||||
|
<TabsTrigger key={tab.id} value={tab.id} className="gap-1.5">
|
||||||
|
{tab.label}
|
||||||
|
{tab.badge !== undefined && tab.badge !== null && (
|
||||||
|
<Badge variant="secondary" className="px-1.5 py-0 text-xs">
|
||||||
|
{tab.badge}
|
||||||
|
</Badge>
|
||||||
|
)}
|
||||||
|
</TabsTrigger>
|
||||||
|
))}
|
||||||
|
</TabsList>
|
||||||
|
|
||||||
{tabs.map((tab) => (
|
{tabs.map((tab) => (
|
||||||
<TabsContent key={tab.id} value={tab.id} className="mt-4">
|
<TabsContent key={tab.id} value={tab.id} className="mt-4">
|
||||||
{tab.content}
|
{tab.content}
|
||||||
|
|||||||
30
src/lib/db/migrations/0021_unusual_azazel.sql
Normal file
30
src/lib/db/migrations/0021_unusual_azazel.sql
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
CREATE TABLE "client_merge_candidates" (
|
||||||
|
"id" text PRIMARY KEY NOT NULL,
|
||||||
|
"port_id" text NOT NULL,
|
||||||
|
"client_a_id" text NOT NULL,
|
||||||
|
"client_b_id" text NOT NULL,
|
||||||
|
"score" integer NOT NULL,
|
||||||
|
"reasons" jsonb NOT NULL,
|
||||||
|
"status" text DEFAULT 'pending' NOT NULL,
|
||||||
|
"created_at" timestamp with time zone DEFAULT now() NOT NULL,
|
||||||
|
"resolved_at" timestamp with time zone,
|
||||||
|
"resolved_by" text
|
||||||
|
);
|
||||||
|
--> statement-breakpoint
|
||||||
|
CREATE TABLE "migration_source_links" (
|
||||||
|
"id" text PRIMARY KEY NOT NULL,
|
||||||
|
"source_system" text NOT NULL,
|
||||||
|
"source_id" text NOT NULL,
|
||||||
|
"target_entity_type" text NOT NULL,
|
||||||
|
"target_entity_id" text NOT NULL,
|
||||||
|
"applied_id" text NOT NULL,
|
||||||
|
"applied_by" text,
|
||||||
|
"applied_at" timestamp with time zone DEFAULT now() NOT NULL
|
||||||
|
);
|
||||||
|
--> statement-breakpoint
|
||||||
|
ALTER TABLE "client_merge_candidates" ADD CONSTRAINT "client_merge_candidates_port_id_ports_id_fk" FOREIGN KEY ("port_id") REFERENCES "public"."ports"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint
|
||||||
|
ALTER TABLE "client_merge_candidates" ADD CONSTRAINT "client_merge_candidates_client_a_id_clients_id_fk" FOREIGN KEY ("client_a_id") REFERENCES "public"."clients"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
|
||||||
|
ALTER TABLE "client_merge_candidates" ADD CONSTRAINT "client_merge_candidates_client_b_id_clients_id_fk" FOREIGN KEY ("client_b_id") REFERENCES "public"."clients"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
|
||||||
|
CREATE INDEX "idx_cmc_port_status" ON "client_merge_candidates" USING btree ("port_id","status");--> statement-breakpoint
|
||||||
|
CREATE UNIQUE INDEX "idx_cmc_pair" ON "client_merge_candidates" USING btree ("port_id","client_a_id","client_b_id");--> statement-breakpoint
|
||||||
|
CREATE UNIQUE INDEX "idx_msl_source_target" ON "migration_source_links" USING btree ("source_system","source_id","target_entity_type");
|
||||||
2
src/lib/db/migrations/0022_magenta_madame_hydra.sql
Normal file
2
src/lib/db/migrations/0022_magenta_madame_hydra.sql
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
ALTER TABLE "clients" ADD COLUMN "merged_into_client_id" text;--> statement-breakpoint
|
||||||
|
CREATE INDEX "idx_clients_merged_into" ON "clients" USING btree ("merged_into_client_id");
|
||||||
10482
src/lib/db/migrations/meta/0021_snapshot.json
Normal file
10482
src/lib/db/migrations/meta/0021_snapshot.json
Normal file
File diff suppressed because it is too large
Load Diff
10503
src/lib/db/migrations/meta/0022_snapshot.json
Normal file
10503
src/lib/db/migrations/meta/0022_snapshot.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -141,6 +141,20 @@
|
|||||||
"when": 1777671562738,
|
"when": 1777671562738,
|
||||||
"tag": "0019_lazy_vampiro",
|
"tag": "0019_lazy_vampiro",
|
||||||
"breakpoints": true
|
"breakpoints": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"idx": 21,
|
||||||
|
"version": "7",
|
||||||
|
"when": 1777811835982,
|
||||||
|
"tag": "0021_unusual_azazel",
|
||||||
|
"breakpoints": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"idx": 22,
|
||||||
|
"version": "7",
|
||||||
|
"when": 1777812671833,
|
||||||
|
"tag": "0022_magenta_madame_hydra",
|
||||||
|
"breakpoints": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import {
|
|||||||
pgTable,
|
pgTable,
|
||||||
text,
|
text,
|
||||||
boolean,
|
boolean,
|
||||||
|
integer,
|
||||||
timestamp,
|
timestamp,
|
||||||
jsonb,
|
jsonb,
|
||||||
index,
|
index,
|
||||||
@@ -30,6 +31,11 @@ export const clients = pgTable(
|
|||||||
source: text('source'), // website, manual, referral, broker
|
source: text('source'), // website, manual, referral, broker
|
||||||
sourceDetails: text('source_details'),
|
sourceDetails: text('source_details'),
|
||||||
archivedAt: timestamp('archived_at', { withTimezone: true }),
|
archivedAt: timestamp('archived_at', { withTimezone: true }),
|
||||||
|
/** When this client was merged into another (the "loser" of a dedup
|
||||||
|
* merge), this points at the surviving client. Used by the
|
||||||
|
* /admin/duplicates review queue to redirect any stragglers, and by
|
||||||
|
* the unmerge flow to restore. Null for live clients. */
|
||||||
|
mergedIntoClientId: text('merged_into_client_id'),
|
||||||
createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
|
createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
|
||||||
updatedAt: timestamp('updated_at', { withTimezone: true }).notNull().defaultNow(),
|
updatedAt: timestamp('updated_at', { withTimezone: true }).notNull().defaultNow(),
|
||||||
},
|
},
|
||||||
@@ -38,6 +44,7 @@ export const clients = pgTable(
|
|||||||
index('idx_clients_name').on(table.portId, table.fullName),
|
index('idx_clients_name').on(table.portId, table.fullName),
|
||||||
index('idx_clients_archived').on(table.portId, table.archivedAt),
|
index('idx_clients_archived').on(table.portId, table.archivedAt),
|
||||||
index('idx_clients_nationality_iso').on(table.nationalityIso),
|
index('idx_clients_nationality_iso').on(table.nationalityIso),
|
||||||
|
index('idx_clients_merged_into').on(table.mergedIntoClientId),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -145,6 +152,54 @@ export const clientMergeLog = pgTable(
|
|||||||
(table) => [index('idx_cml_port').on(table.portId)],
|
(table) => [index('idx_cml_port').on(table.portId)],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pairs of clients flagged by the background scoring job as potential
|
||||||
|
* duplicates. The `/admin/duplicates` review queue reads from here.
|
||||||
|
*
|
||||||
|
* Lifecycle:
|
||||||
|
* - Background job inserts a row when a pair scores >= the
|
||||||
|
* `dedup_review_queue_threshold` system setting.
|
||||||
|
* - User reviews in the admin UI and either merges (status='merged')
|
||||||
|
* or dismisses (status='dismissed').
|
||||||
|
* - Subsequent runs of the scoring job skip pairs already
|
||||||
|
* `dismissed` so the same false-positive doesn't keep reappearing.
|
||||||
|
* A future score increase recreates the row.
|
||||||
|
*
|
||||||
|
* Pairs are stored canonically with `clientAId < clientBId` (string
|
||||||
|
* comparison) so the same pair only generates one row regardless of
|
||||||
|
* scoring direction.
|
||||||
|
*/
|
||||||
|
export const clientMergeCandidates = pgTable(
|
||||||
|
'client_merge_candidates',
|
||||||
|
{
|
||||||
|
id: text('id')
|
||||||
|
.primaryKey()
|
||||||
|
.$defaultFn(() => crypto.randomUUID()),
|
||||||
|
portId: text('port_id')
|
||||||
|
.notNull()
|
||||||
|
.references(() => ports.id),
|
||||||
|
clientAId: text('client_a_id')
|
||||||
|
.notNull()
|
||||||
|
.references(() => clients.id, { onDelete: 'cascade' }),
|
||||||
|
clientBId: text('client_b_id')
|
||||||
|
.notNull()
|
||||||
|
.references(() => clients.id, { onDelete: 'cascade' }),
|
||||||
|
score: integer('score').notNull(),
|
||||||
|
/** Human-readable rule list, e.g. ["email match", "phone match"]. */
|
||||||
|
reasons: jsonb('reasons').notNull(),
|
||||||
|
status: text('status').notNull().default('pending'), // pending | dismissed | merged
|
||||||
|
createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
|
||||||
|
resolvedAt: timestamp('resolved_at', { withTimezone: true }),
|
||||||
|
resolvedBy: text('resolved_by'),
|
||||||
|
},
|
||||||
|
(table) => [
|
||||||
|
index('idx_cmc_port_status').on(table.portId, table.status),
|
||||||
|
// Same pair shouldn't surface twice — enforce uniqueness on the
|
||||||
|
// canonical (a < b) ordering.
|
||||||
|
uniqueIndex('idx_cmc_pair').on(table.portId, table.clientAId, table.clientBId),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
export const clientAddresses = pgTable(
|
export const clientAddresses = pgTable(
|
||||||
'client_addresses',
|
'client_addresses',
|
||||||
{
|
{
|
||||||
@@ -190,3 +245,5 @@ export type ClientMergeLog = typeof clientMergeLog.$inferSelect;
|
|||||||
export type NewClientMergeLog = typeof clientMergeLog.$inferInsert;
|
export type NewClientMergeLog = typeof clientMergeLog.$inferInsert;
|
||||||
export type ClientAddress = typeof clientAddresses.$inferSelect;
|
export type ClientAddress = typeof clientAddresses.$inferSelect;
|
||||||
export type NewClientAddress = typeof clientAddresses.$inferInsert;
|
export type NewClientAddress = typeof clientAddresses.$inferInsert;
|
||||||
|
export type ClientMergeCandidate = typeof clientMergeCandidates.$inferSelect;
|
||||||
|
export type NewClientMergeCandidate = typeof clientMergeCandidates.$inferInsert;
|
||||||
|
|||||||
@@ -56,5 +56,8 @@ export * from './ai-usage';
|
|||||||
// GDPR export tracking (Phase 3d)
|
// GDPR export tracking (Phase 3d)
|
||||||
export * from './gdpr';
|
export * from './gdpr';
|
||||||
|
|
||||||
|
// Migration ledger (one-shot scripts — NocoDB import etc.)
|
||||||
|
export * from './migration';
|
||||||
|
|
||||||
// Relations (must come last — references all tables)
|
// Relations (must come last — references all tables)
|
||||||
export * from './relations';
|
export * from './relations';
|
||||||
|
|||||||
48
src/lib/db/schema/migration.ts
Normal file
48
src/lib/db/schema/migration.ts
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
import { pgTable, text, timestamp, uniqueIndex } from 'drizzle-orm/pg-core';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Idempotency ledger for one-shot data migrations from external sources
|
||||||
|
* (e.g. the legacy NocoDB Interests table).
|
||||||
|
*
|
||||||
|
* Every entity created during a migration script's `--apply` run gets a
|
||||||
|
* row here mapping the source-system row identifier to the new-system
|
||||||
|
* entity id. Re-running `--apply` against the same report skips rows
|
||||||
|
* already linked, so partial-failure resumption is just "run again."
|
||||||
|
*
|
||||||
|
* One source row can generate multiple new entities (e.g. one NocoDB
|
||||||
|
* Interests row → one client + one interest + one yacht), so the
|
||||||
|
* uniqueness constraint includes `target_entity_type`.
|
||||||
|
*/
|
||||||
|
export const migrationSourceLinks = pgTable(
|
||||||
|
'migration_source_links',
|
||||||
|
{
|
||||||
|
id: text('id')
|
||||||
|
.primaryKey()
|
||||||
|
.$defaultFn(() => crypto.randomUUID()),
|
||||||
|
/** e.g. 'nocodb_interests', 'nocodb_residences', 'nocodb_website_submissions'. */
|
||||||
|
sourceSystem: text('source_system').notNull(),
|
||||||
|
/** Source row identifier as a string (NocoDB IDs are integers; we keep
|
||||||
|
* text here for forward compat with other sources). */
|
||||||
|
sourceId: text('source_id').notNull(),
|
||||||
|
/** e.g. 'client', 'interest', 'yacht', 'document'. */
|
||||||
|
targetEntityType: text('target_entity_type').notNull(),
|
||||||
|
/** UUID of the new-system entity (clients.id, interests.id, etc.). */
|
||||||
|
targetEntityId: text('target_entity_id').notNull(),
|
||||||
|
/** Apply-id from the migration run that created this link — pairs with
|
||||||
|
* the on-disk apply manifest so `--rollback --apply-id <id>` knows
|
||||||
|
* exactly which links to remove. */
|
||||||
|
appliedId: text('applied_id').notNull(),
|
||||||
|
appliedBy: text('applied_by'),
|
||||||
|
appliedAt: timestamp('applied_at', { withTimezone: true }).notNull().defaultNow(),
|
||||||
|
},
|
||||||
|
(table) => [
|
||||||
|
uniqueIndex('idx_msl_source_target').on(
|
||||||
|
table.sourceSystem,
|
||||||
|
table.sourceId,
|
||||||
|
table.targetEntityType,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
export type MigrationSourceLink = typeof migrationSourceLinks.$inferSelect;
|
||||||
|
export type NewMigrationSourceLink = typeof migrationSourceLinks.$inferInsert;
|
||||||
255
src/lib/dedup/find-matches.ts
Normal file
255
src/lib/dedup/find-matches.ts
Normal file
@@ -0,0 +1,255 @@
|
|||||||
|
/**
|
||||||
|
* Client-match finder — pure scoring logic.
|
||||||
|
*
|
||||||
|
* Compares one input candidate against a pool of existing candidates and
|
||||||
|
* returns scored matches. Used by:
|
||||||
|
* - the at-create suggestion in client/interest forms (Layer 1)
|
||||||
|
* - the public-form auto-link path (when score >= block threshold)
|
||||||
|
* - the nightly background scoring job (Layer 3)
|
||||||
|
* - the migration script's dedup pass
|
||||||
|
*
|
||||||
|
* Performance shape: blocking via email / phone / surname-token reduces
|
||||||
|
* the pairwise scan from O(n²) to ~O(n) for any pool size we'll see in
|
||||||
|
* production. See `findClientMatches` for the blocking implementation.
|
||||||
|
*
|
||||||
|
* Design reference: docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §4.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { parsePhoneScriptSafe as parsePhone } from './phone-parse';
|
||||||
|
|
||||||
|
import { levenshtein } from './normalize';
|
||||||
|
|
||||||
|
// ─── Types ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface MatchCandidate {
|
||||||
|
id: string;
|
||||||
|
fullName: string | null;
|
||||||
|
/** Lowercased last non-particle token from `normalizeName(...).surnameToken`.
|
||||||
|
* Used as a blocking key. */
|
||||||
|
surnameToken: string | null;
|
||||||
|
/** Already lowercased + validated via `normalizeEmail`. */
|
||||||
|
emails: string[];
|
||||||
|
/** Already canonical E.164 via `normalizePhone`. */
|
||||||
|
phonesE164: string[];
|
||||||
|
/** Address country (NOT phone country) — used for tiebreaking, not scoring. */
|
||||||
|
countryIso: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type MatchConfidence = 'high' | 'medium' | 'low';
|
||||||
|
|
||||||
|
export interface MatchResult {
|
||||||
|
candidate: MatchCandidate;
|
||||||
|
/** 0–100 after capping. */
|
||||||
|
score: number;
|
||||||
|
/** Human-readable list of which rules contributed. Useful for the
|
||||||
|
* review queue UI ("matched on email + phone + surname token"). */
|
||||||
|
reasons: string[];
|
||||||
|
confidence: MatchConfidence;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DedupThresholds {
|
||||||
|
/** Inclusive lower bound for `'high'` confidence. */
|
||||||
|
highScore: number;
|
||||||
|
/** Inclusive lower bound for `'medium'` confidence. Below this is `'low'`. */
|
||||||
|
mediumScore: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Public entry point ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compare `input` against every reachable candidate in `pool` and return
|
||||||
|
* scored matches, sorted by score descending. The result list includes
|
||||||
|
* low-confidence hits — caller filters by `confidence` or `score`
|
||||||
|
* depending on use case.
|
||||||
|
*
|
||||||
|
* Self-matches (an entry with `id === input.id`, e.g. when re-scoring an
|
||||||
|
* existing client during a background job) are excluded.
|
||||||
|
*/
|
||||||
|
export function findClientMatches(
|
||||||
|
input: MatchCandidate,
|
||||||
|
pool: MatchCandidate[],
|
||||||
|
thresholds: DedupThresholds,
|
||||||
|
): MatchResult[] {
|
||||||
|
if (pool.length === 0) return [];
|
||||||
|
|
||||||
|
// ── Phase 1: build blocking indexes off the pool. ─────────────────────────
|
||||||
|
//
|
||||||
|
// Three indexes mean any candidate that shares ANY of (email / phone /
|
||||||
|
// surname-token) with the input shows up in the comparison set. Anything
|
||||||
|
// that shares NONE is structurally too different to be a duplicate and
|
||||||
|
// is skipped — this is what keeps the algorithm O(n) at scale.
|
||||||
|
const byEmail = new Map<string, MatchCandidate[]>();
|
||||||
|
const byPhone = new Map<string, MatchCandidate[]>();
|
||||||
|
const bySurnameToken = new Map<string, MatchCandidate[]>();
|
||||||
|
|
||||||
|
for (const c of pool) {
|
||||||
|
if (c.id === input.id) continue;
|
||||||
|
for (const email of c.emails) {
|
||||||
|
pushTo(byEmail, email, c);
|
||||||
|
}
|
||||||
|
for (const phone of c.phonesE164) {
|
||||||
|
pushTo(byPhone, phone, c);
|
||||||
|
}
|
||||||
|
if (c.surnameToken) {
|
||||||
|
pushTo(bySurnameToken, c.surnameToken, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Phase 2: gather the comparison set via the blocking indexes. ─────────
|
||||||
|
const comparisonSet = new Map<string, MatchCandidate>();
|
||||||
|
for (const email of input.emails) {
|
||||||
|
for (const c of byEmail.get(email) ?? []) {
|
||||||
|
comparisonSet.set(c.id, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const phone of input.phonesE164) {
|
||||||
|
for (const c of byPhone.get(phone) ?? []) {
|
||||||
|
comparisonSet.set(c.id, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (input.surnameToken) {
|
||||||
|
for (const c of bySurnameToken.get(input.surnameToken) ?? []) {
|
||||||
|
comparisonSet.set(c.id, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Phase 3: score every candidate that survived blocking. ───────────────
|
||||||
|
const results: MatchResult[] = [];
|
||||||
|
for (const candidate of comparisonSet.values()) {
|
||||||
|
const r = scorePair(input, candidate);
|
||||||
|
results.push(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Phase 4: sort by score desc + assign confidence tier. ────────────────
|
||||||
|
results.sort((a, b) => b.score - a.score);
|
||||||
|
for (const r of results) {
|
||||||
|
r.confidence = classify(r.score, thresholds);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Scoring ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Score one (input, candidate) pair against the rule set in design §4.2.
|
||||||
|
* Compounding: positive rules sum, negative rules subtract; the result is
|
||||||
|
* clamped to [0, 100]. Reasons accumulate in the order rules fire so the
|
||||||
|
* review-queue UI can show "matched on email + phone".
|
||||||
|
*/
|
||||||
|
function scorePair(a: MatchCandidate, b: MatchCandidate): MatchResult {
|
||||||
|
let score = 0;
|
||||||
|
const reasons: string[] = [];
|
||||||
|
|
||||||
|
// ── Positive rules. ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const sharedEmail = a.emails.find((e) => b.emails.includes(e));
|
||||||
|
const emailMatch = !!sharedEmail;
|
||||||
|
if (emailMatch) {
|
||||||
|
score += 60;
|
||||||
|
reasons.push('email match');
|
||||||
|
}
|
||||||
|
|
||||||
|
const sharedPhone = a.phonesE164.find((p) => b.phonesE164.includes(p) && countDigits(p) >= 8);
|
||||||
|
const phoneMatch = !!sharedPhone;
|
||||||
|
if (phoneMatch) {
|
||||||
|
score += 50;
|
||||||
|
reasons.push('phone match');
|
||||||
|
}
|
||||||
|
|
||||||
|
const aNameNorm = (a.fullName ?? '').toLowerCase().trim();
|
||||||
|
const bNameNorm = (b.fullName ?? '').toLowerCase().trim();
|
||||||
|
const nameExactMatch = aNameNorm.length > 0 && aNameNorm === bNameNorm;
|
||||||
|
if (nameExactMatch) {
|
||||||
|
score += 20;
|
||||||
|
reasons.push('name match');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Surname + given-name fuzzy. Only fires when names are NOT exactly
|
||||||
|
// equal — avoids double-counting with the rule above. Catches
|
||||||
|
// 'Constanzo' / 'Costanzo', 'Marc' / 'Marcus' etc. when other contact
|
||||||
|
// signals confirm them.
|
||||||
|
if (!nameExactMatch && a.surnameToken && b.surnameToken && a.surnameToken === b.surnameToken) {
|
||||||
|
const aGiven = (a.fullName ?? '').toLowerCase().split(/\s+/)[0] ?? '';
|
||||||
|
const bGiven = (b.fullName ?? '').toLowerCase().split(/\s+/)[0] ?? '';
|
||||||
|
if (aGiven && bGiven && levenshtein(aGiven, bGiven) <= 1) {
|
||||||
|
score += 15;
|
||||||
|
reasons.push('surname + given-name fuzzy match');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Negative rules. ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// Same email but the two parties' phone numbers belong to different
|
||||||
|
// countries. Common when one inbox is shared by spouses / coworkers
|
||||||
|
// and the actual phone owners are distinct people. Don't auto-merge.
|
||||||
|
if (emailMatch && !phoneMatch && a.phonesE164.length > 0 && b.phonesE164.length > 0) {
|
||||||
|
const aCountries = phoneCountriesOf(a);
|
||||||
|
const bCountries = phoneCountriesOf(b);
|
||||||
|
const overlap = [...aCountries].some((c) => bCountries.has(c));
|
||||||
|
if (!overlap && aCountries.size > 0 && bCountries.size > 0) {
|
||||||
|
score -= 15;
|
||||||
|
reasons.push('phone country mismatch (negative)');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Same name but no contact match. Two distinct people with the same
|
||||||
|
// name (common for "John Smith") sneak through name-based blocking;
|
||||||
|
// penalize so the score lands below the auto-merge threshold.
|
||||||
|
if (nameExactMatch && !emailMatch && !phoneMatch) {
|
||||||
|
score -= 20;
|
||||||
|
reasons.push('name match but no shared contact (negative)');
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
candidate: b,
|
||||||
|
score: clamp(score, 0, 100),
|
||||||
|
reasons,
|
||||||
|
confidence: 'low', // assigned by caller after threshold lookup
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Helpers ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function pushTo<K, V>(map: Map<K, V[]>, key: K, value: V): void {
|
||||||
|
const existing = map.get(key);
|
||||||
|
if (existing) {
|
||||||
|
existing.push(value);
|
||||||
|
} else {
|
||||||
|
map.set(key, [value]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function classify(score: number, thresholds: DedupThresholds): MatchConfidence {
|
||||||
|
if (score >= thresholds.highScore) return 'high';
|
||||||
|
if (score >= thresholds.mediumScore) return 'medium';
|
||||||
|
return 'low';
|
||||||
|
}
|
||||||
|
|
||||||
|
function clamp(value: number, min: number, max: number): number {
|
||||||
|
if (value < min) return min;
|
||||||
|
if (value > max) return max;
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
function countDigits(s: string): number {
|
||||||
|
let count = 0;
|
||||||
|
for (let i = 0; i < s.length; i += 1) {
|
||||||
|
const code = s.charCodeAt(i);
|
||||||
|
if (code >= 48 && code <= 57) count += 1;
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve each phone in a candidate to its ISO country code (via
|
||||||
|
* libphonenumber-js). Cached per call; the surrounding caller doesn't
|
||||||
|
* batch so we accept the parse cost.
|
||||||
|
*/
|
||||||
|
function phoneCountriesOf(c: MatchCandidate): Set<string> {
|
||||||
|
const out = new Set<string>();
|
||||||
|
for (const p of c.phonesE164) {
|
||||||
|
const parsed = parsePhone(p);
|
||||||
|
if (parsed.country) out.add(parsed.country);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
274
src/lib/dedup/migration-report.ts
Normal file
274
src/lib/dedup/migration-report.ts
Normal file
@@ -0,0 +1,274 @@
|
|||||||
|
/**
|
||||||
|
* Migration report writer — turns a `MigrationPlan` (from
|
||||||
|
* `migration-transform.ts`) into a CSV + a human-readable Markdown
|
||||||
|
* summary on disk under `.migration/<timestamp>/`.
|
||||||
|
*
|
||||||
|
* The CSV format is intentionally machine-friendly (one row per
|
||||||
|
* planned operation) so it can be diffed across runs and inspected
|
||||||
|
* by hand. The summary is designed for "open this in your editor and
|
||||||
|
* eyeball it for 5 minutes before --apply."
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { promises as fs } from 'node:fs';
|
||||||
|
import path from 'node:path';
|
||||||
|
|
||||||
|
import type { MigrationPlan } from './migration-transform';
|
||||||
|
|
||||||
|
// ─── Output directory ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface ReportPaths {
|
||||||
|
rootDir: string;
|
||||||
|
csvPath: string;
|
||||||
|
summaryPath: string;
|
||||||
|
planJsonPath: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Resolve report paths relative to the worktree root. The timestamped
|
||||||
|
* directory is created lazily by `writeReport`. */
|
||||||
|
export function resolveReportPaths(
|
||||||
|
rootDir: string,
|
||||||
|
timestamp: string = new Date().toISOString().replace(/[:.]/g, '-'),
|
||||||
|
): ReportPaths {
|
||||||
|
const dir = path.join(rootDir, '.migration', timestamp);
|
||||||
|
return {
|
||||||
|
rootDir: dir,
|
||||||
|
csvPath: path.join(dir, 'report.csv'),
|
||||||
|
summaryPath: path.join(dir, 'summary.md'),
|
||||||
|
planJsonPath: path.join(dir, 'plan.json'),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── CSV row shape ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
interface CsvRow {
|
||||||
|
op: string; // create_client / create_contact / create_interest / auto_link / flag / needs_review
|
||||||
|
reason: string;
|
||||||
|
source_id: string;
|
||||||
|
target_table: string;
|
||||||
|
target_value: string;
|
||||||
|
confidence: string;
|
||||||
|
manual_review: 'true' | 'false';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trivial CSV escape: quote any cell that contains comma / quote / newline,
|
||||||
|
// double up internal quotes per RFC 4180. No need for a dependency.
|
||||||
|
function csvEscape(s: string): string {
|
||||||
|
if (/[",\n\r]/.test(s)) {
|
||||||
|
return `"${s.replace(/"/g, '""')}"`;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
function rowToCsvLine(r: CsvRow): string {
|
||||||
|
return [
|
||||||
|
r.op,
|
||||||
|
r.reason,
|
||||||
|
r.source_id,
|
||||||
|
r.target_table,
|
||||||
|
r.target_value,
|
||||||
|
r.confidence,
|
||||||
|
r.manual_review,
|
||||||
|
]
|
||||||
|
.map(csvEscape)
|
||||||
|
.join(',');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Build CSV ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export function buildCsv(plan: MigrationPlan): string {
|
||||||
|
const lines: string[] = [];
|
||||||
|
lines.push(
|
||||||
|
[
|
||||||
|
'op',
|
||||||
|
'reason',
|
||||||
|
'source_id',
|
||||||
|
'target_table',
|
||||||
|
'target_value',
|
||||||
|
'confidence',
|
||||||
|
'manual_review',
|
||||||
|
].join(','),
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const client of plan.clients) {
|
||||||
|
lines.push(
|
||||||
|
rowToCsvLine({
|
||||||
|
op: 'create_client',
|
||||||
|
reason: client.sourceIds.length > 1 ? 'auto-merged cluster' : 'new',
|
||||||
|
source_id: client.sourceIds.join('|'),
|
||||||
|
target_table: 'clients.fullName',
|
||||||
|
target_value: client.fullName,
|
||||||
|
confidence: 'N/A',
|
||||||
|
manual_review: 'false',
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
for (const c of client.contacts) {
|
||||||
|
lines.push(
|
||||||
|
rowToCsvLine({
|
||||||
|
op: 'create_contact',
|
||||||
|
reason: c.flagged ?? 'new',
|
||||||
|
source_id: client.sourceIds.join('|'),
|
||||||
|
target_table: `clientContacts.${c.channel}`,
|
||||||
|
target_value: c.value,
|
||||||
|
confidence: 'N/A',
|
||||||
|
manual_review: c.flagged ? 'true' : 'false',
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
for (const a of client.addresses) {
|
||||||
|
lines.push(
|
||||||
|
rowToCsvLine({
|
||||||
|
op: 'create_address',
|
||||||
|
reason: 'address text present',
|
||||||
|
source_id: client.sourceIds.join('|'),
|
||||||
|
target_table: 'clientAddresses.countryIso',
|
||||||
|
target_value: a.countryIso ?? '(unresolved)',
|
||||||
|
confidence: a.countryConfidence ?? 'fallback',
|
||||||
|
manual_review: a.countryConfidence === 'fallback' || !a.countryIso ? 'true' : 'false',
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const interest of plan.interests) {
|
||||||
|
lines.push(
|
||||||
|
rowToCsvLine({
|
||||||
|
op: 'create_interest',
|
||||||
|
reason: `pipelineStage=${interest.pipelineStage}`,
|
||||||
|
source_id: String(interest.sourceId),
|
||||||
|
target_table: 'interests',
|
||||||
|
target_value: `${interest.berthMooringNumber ?? '(no berth)'} / ${interest.yachtName ?? '(no yacht)'}`,
|
||||||
|
confidence: 'N/A',
|
||||||
|
manual_review: 'false',
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const link of plan.autoLinks) {
|
||||||
|
lines.push(
|
||||||
|
rowToCsvLine({
|
||||||
|
op: 'auto_link',
|
||||||
|
reason: link.reasons.join(' + '),
|
||||||
|
source_id: `${link.leadSourceId}<-${link.mergedSourceIds.join(',')}`,
|
||||||
|
target_table: 'clients',
|
||||||
|
target_value: '(merged into lead)',
|
||||||
|
confidence: `score=${link.score}`,
|
||||||
|
manual_review: 'false',
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const pair of plan.needsReview) {
|
||||||
|
lines.push(
|
||||||
|
rowToCsvLine({
|
||||||
|
op: 'needs_review',
|
||||||
|
reason: pair.reasons.join(' + '),
|
||||||
|
source_id: `${pair.aSourceId}<->${pair.bSourceId}`,
|
||||||
|
target_table: 'clients',
|
||||||
|
target_value: '(human review required)',
|
||||||
|
confidence: `score=${pair.score}`,
|
||||||
|
manual_review: 'true',
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const flag of plan.flags) {
|
||||||
|
lines.push(
|
||||||
|
rowToCsvLine({
|
||||||
|
op: 'flag',
|
||||||
|
reason: flag.reason,
|
||||||
|
source_id: String(flag.sourceId),
|
||||||
|
target_table: flag.sourceTable,
|
||||||
|
target_value: JSON.stringify(flag.details ?? {}),
|
||||||
|
confidence: 'N/A',
|
||||||
|
manual_review: 'true',
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines.join('\n') + '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Build summary markdown ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export function buildSummary(plan: MigrationPlan, generatedAt: string): string {
|
||||||
|
const s = plan.stats;
|
||||||
|
const lines: string[] = [];
|
||||||
|
lines.push(`# Migration Dry-Run — ${generatedAt}`);
|
||||||
|
lines.push('');
|
||||||
|
lines.push('## Input');
|
||||||
|
lines.push(`- ${s.inputInterestRows} NocoDB Interests`);
|
||||||
|
lines.push(`- ${s.inputResidentialRows} NocoDB Residential Interests`);
|
||||||
|
lines.push('');
|
||||||
|
lines.push('## Outcome');
|
||||||
|
lines.push(`- ${s.outputClients} clients`);
|
||||||
|
lines.push(`- ${s.outputInterests} interests (one per source row, linked to deduped client)`);
|
||||||
|
lines.push(`- ${s.outputContacts} client_contacts`);
|
||||||
|
lines.push(`- ${s.outputAddresses} client_addresses`);
|
||||||
|
lines.push('');
|
||||||
|
lines.push('## Auto-linked clusters');
|
||||||
|
if (plan.autoLinks.length === 0) {
|
||||||
|
lines.push('_None — every input row maps to a unique client._');
|
||||||
|
} else {
|
||||||
|
for (const link of plan.autoLinks) {
|
||||||
|
const merged = link.mergedSourceIds.length;
|
||||||
|
lines.push(
|
||||||
|
`- Lead row \`${link.leadSourceId}\` ← merged ${merged} other row${merged === 1 ? '' : 's'} (\`${link.mergedSourceIds.join(', ')}\`) — score ${link.score} via ${link.reasons.join(' + ')}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lines.push('');
|
||||||
|
lines.push('## Pairs flagged for human review');
|
||||||
|
if (plan.needsReview.length === 0) {
|
||||||
|
lines.push('_None._');
|
||||||
|
} else {
|
||||||
|
for (const pair of plan.needsReview) {
|
||||||
|
lines.push(
|
||||||
|
`- Rows \`${pair.aSourceId}\` ↔ \`${pair.bSourceId}\` — score ${pair.score} (${pair.reasons.join(' + ')})`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lines.push('');
|
||||||
|
lines.push('## Data quality flags');
|
||||||
|
if (plan.flags.length === 0) {
|
||||||
|
lines.push('_No quality issues._');
|
||||||
|
} else {
|
||||||
|
const byReason = new Map<string, number>();
|
||||||
|
for (const f of plan.flags) {
|
||||||
|
byReason.set(f.reason, (byReason.get(f.reason) ?? 0) + 1);
|
||||||
|
}
|
||||||
|
for (const [reason, count] of [...byReason].sort((a, b) => b[1] - a[1])) {
|
||||||
|
lines.push(`- **${count}× ${reason}**`);
|
||||||
|
}
|
||||||
|
lines.push('');
|
||||||
|
lines.push('### Detail');
|
||||||
|
for (const f of plan.flags.slice(0, 30)) {
|
||||||
|
lines.push(
|
||||||
|
`- \`${f.sourceTable}#${f.sourceId}\`: ${f.reason}${f.details ? ` — \`${JSON.stringify(f.details)}\`` : ''}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (plan.flags.length > 30) {
|
||||||
|
lines.push(`- _… and ${plan.flags.length - 30} more (see report.csv for full list)_`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lines.push('');
|
||||||
|
lines.push('## Next step');
|
||||||
|
lines.push('');
|
||||||
|
lines.push('Eyeball the auto-linked + flagged-for-review pairs above.');
|
||||||
|
lines.push('When satisfied, re-run the script with `--apply --report .migration/<this-dir>/`.');
|
||||||
|
lines.push('Apply will refuse to run if the source NocoDB has changed since this dry-run.');
|
||||||
|
|
||||||
|
return lines.join('\n') + '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Write to disk ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function writeReport(
|
||||||
|
paths: ReportPaths,
|
||||||
|
plan: MigrationPlan,
|
||||||
|
generatedAt: string,
|
||||||
|
): Promise<void> {
|
||||||
|
await fs.mkdir(paths.rootDir, { recursive: true });
|
||||||
|
await fs.writeFile(paths.csvPath, buildCsv(plan), 'utf-8');
|
||||||
|
await fs.writeFile(paths.summaryPath, buildSummary(plan, generatedAt), 'utf-8');
|
||||||
|
await fs.writeFile(paths.planJsonPath, JSON.stringify(plan, null, 2), 'utf-8');
|
||||||
|
}
|
||||||
576
src/lib/dedup/migration-transform.ts
Normal file
576
src/lib/dedup/migration-transform.ts
Normal file
@@ -0,0 +1,576 @@
|
|||||||
|
/**
|
||||||
|
* Pure transform: NocoDB snapshot → planned new-system entities + dedup result.
|
||||||
|
*
|
||||||
|
* Used by the migration script's `--dry-run` (to produce the report) and
|
||||||
|
* `--apply` (to actually write). Keeping this pure means the same code
|
||||||
|
* runs in both modes, in tests against the frozen fixture, and in the
|
||||||
|
* one-off CLI run against the live base.
|
||||||
|
*
|
||||||
|
* No side effects, no DB calls, no external services.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
normalizeName,
|
||||||
|
normalizeEmail,
|
||||||
|
normalizePhone,
|
||||||
|
resolveCountry,
|
||||||
|
type NormalizedPhone,
|
||||||
|
} from './normalize';
|
||||||
|
import { findClientMatches, type MatchCandidate } from './find-matches';
|
||||||
|
import type { CountryCode } from '@/lib/i18n/countries';
|
||||||
|
import type { NocoDbRow, NocoDbSnapshot } from './nocodb-source';
|
||||||
|
|
||||||
|
// ─── Plan output ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface PlannedClient {
|
||||||
|
/** Stable id derived from the deduped cluster's lead row. Used by the
|
||||||
|
* apply phase to reference newly-created clients before they exist
|
||||||
|
* in the DB. */
|
||||||
|
tempId: string;
|
||||||
|
/** Source row IDs that contributed to this client (one if no duplicates,
|
||||||
|
* many if dedup merged a cluster). */
|
||||||
|
sourceIds: number[];
|
||||||
|
fullName: string;
|
||||||
|
surnameToken?: string;
|
||||||
|
countryIso: CountryCode | null;
|
||||||
|
preferredContactMethod: string | null;
|
||||||
|
source: string | null;
|
||||||
|
contacts: PlannedContact[];
|
||||||
|
addresses: PlannedAddress[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PlannedContact {
|
||||||
|
channel: 'email' | 'phone' | 'whatsapp' | 'other';
|
||||||
|
value: string;
|
||||||
|
valueE164?: string | null;
|
||||||
|
valueCountry?: CountryCode | null;
|
||||||
|
isPrimary: boolean;
|
||||||
|
flagged?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PlannedAddress {
|
||||||
|
streetAddress: string | null;
|
||||||
|
city: string | null;
|
||||||
|
countryIso: CountryCode | null;
|
||||||
|
/** When confidence is low, the migration script flags the row for
|
||||||
|
* human review. */
|
||||||
|
countryConfidence: 'exact' | 'fuzzy' | 'city' | 'fallback' | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PlannedInterest {
|
||||||
|
/** NocoDB row id this interest came from. */
|
||||||
|
sourceId: number;
|
||||||
|
/** tempId of the planned client this interest hangs off. */
|
||||||
|
clientTempId: string;
|
||||||
|
pipelineStage: string;
|
||||||
|
leadCategory: string | null;
|
||||||
|
source: string | null;
|
||||||
|
notes: string | null;
|
||||||
|
/** Mooring number; the apply phase resolves this to a berthId via the
|
||||||
|
* new-system Berths table. */
|
||||||
|
berthMooringNumber: string | null;
|
||||||
|
yachtName: string | null;
|
||||||
|
/** Date stamps for milestone columns. ISO strings if parseable. */
|
||||||
|
dateEoiSent: string | null;
|
||||||
|
dateEoiSigned: string | null;
|
||||||
|
dateDepositReceived: string | null;
|
||||||
|
dateContractSent: string | null;
|
||||||
|
dateContractSigned: string | null;
|
||||||
|
dateLastContact: string | null;
|
||||||
|
/** Documenso linkage carried forward when present so the document
|
||||||
|
* record can be stitched up downstream. */
|
||||||
|
documensoId: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface MigrationFlag {
|
||||||
|
sourceTable: 'interests' | 'residential_interests' | 'website_interest_submissions';
|
||||||
|
sourceId: number;
|
||||||
|
reason: string;
|
||||||
|
details?: Record<string, unknown>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface MigrationPlan {
|
||||||
|
clients: PlannedClient[];
|
||||||
|
interests: PlannedInterest[];
|
||||||
|
flags: MigrationFlag[];
|
||||||
|
/** Pairs that the migration would auto-link (high score). */
|
||||||
|
autoLinks: Array<{
|
||||||
|
leadSourceId: number;
|
||||||
|
mergedSourceIds: number[];
|
||||||
|
score: number;
|
||||||
|
reasons: string[];
|
||||||
|
}>;
|
||||||
|
/** Pairs that need human review (medium score). Each pair shows up
|
||||||
|
* in the migration report; the user resolves before --apply. */
|
||||||
|
needsReview: Array<{ aSourceId: number; bSourceId: number; score: number; reasons: string[] }>;
|
||||||
|
stats: MigrationStats;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface MigrationStats {
|
||||||
|
inputInterestRows: number;
|
||||||
|
inputResidentialRows: number;
|
||||||
|
outputClients: number;
|
||||||
|
outputInterests: number;
|
||||||
|
outputContacts: number;
|
||||||
|
outputAddresses: number;
|
||||||
|
flaggedRows: number;
|
||||||
|
autoLinkedClusters: number;
|
||||||
|
needsReviewPairs: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TransformOptions {
|
||||||
|
/** ISO country used when a phone has no prefix and the row has no
|
||||||
|
* Place of Residence. Defaults to AI (Anguilla / Port Nimara's home). */
|
||||||
|
defaultPhoneCountry: CountryCode;
|
||||||
|
/** Score thresholds for auto-link vs human review. Should match the
|
||||||
|
* per-port `system_settings` values once the runtime UI is in place. */
|
||||||
|
thresholds: {
|
||||||
|
autoLink: number;
|
||||||
|
needsReview: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_OPTIONS: TransformOptions = {
|
||||||
|
defaultPhoneCountry: 'AI',
|
||||||
|
thresholds: { autoLink: 90, needsReview: 50 },
|
||||||
|
};
|
||||||
|
|
||||||
|
// ─── Stage mapping ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const STAGE_MAP: Record<string, string> = {
|
||||||
|
'General Qualified Interest': 'open',
|
||||||
|
'Specific Qualified Interest': 'details_sent',
|
||||||
|
'EOI and NDA Sent': 'eoi_sent',
|
||||||
|
'Signed EOI and NDA': 'eoi_signed',
|
||||||
|
'Made Reservation': 'deposit_10pct',
|
||||||
|
'Contract Negotiation': 'contract_sent',
|
||||||
|
'Contract Negotiations Finalized': 'contract_sent',
|
||||||
|
'Contract Signed': 'contract_signed',
|
||||||
|
};
|
||||||
|
|
||||||
|
const LEAD_CATEGORY_MAP: Record<string, string> = {
|
||||||
|
General: 'general_interest',
|
||||||
|
'Friends and Family': 'general_interest',
|
||||||
|
};
|
||||||
|
|
||||||
|
const SOURCE_MAP: Record<string, string> = {
|
||||||
|
portal: 'website',
|
||||||
|
Form: 'website',
|
||||||
|
External: 'manual',
|
||||||
|
};
|
||||||
|
|
||||||
|
// ─── Date parsing ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a date the legacy NocoDB might have stored in DD-MM-YYYY,
|
||||||
|
* DD/MM/YYYY, YYYY-MM-DD, or ISO format. Returns ISO string or null.
|
||||||
|
*/
|
||||||
|
function parseFlexibleDate(input: unknown): string | null {
|
||||||
|
if (typeof input !== 'string' || input.trim() === '') return null;
|
||||||
|
const s = input.trim();
|
||||||
|
|
||||||
|
// Already ISO
|
||||||
|
if (/^\d{4}-\d{2}-\d{2}/.test(s)) {
|
||||||
|
const d = new Date(s);
|
||||||
|
return Number.isNaN(d.getTime()) ? null : d.toISOString();
|
||||||
|
}
|
||||||
|
|
||||||
|
// DD-MM-YYYY or DD/MM/YYYY
|
||||||
|
const m = s.match(/^(\d{1,2})[-/](\d{1,2})[-/](\d{4})$/);
|
||||||
|
if (m) {
|
||||||
|
const [, day, month, year] = m;
|
||||||
|
const iso = `${year}-${month!.padStart(2, '0')}-${day!.padStart(2, '0')}`;
|
||||||
|
const d = new Date(iso);
|
||||||
|
return Number.isNaN(d.getTime()) ? null : d.toISOString();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Anything else: try Date constructor as a last resort
|
||||||
|
const d = new Date(s);
|
||||||
|
return Number.isNaN(d.getTime()) ? null : d.toISOString();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Main transform ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the full transform pipeline against a NocoDB snapshot. Pure
|
||||||
|
* function — same input always produces the same plan.
|
||||||
|
*/
|
||||||
|
export function transformSnapshot(
|
||||||
|
snapshot: NocoDbSnapshot,
|
||||||
|
options: Partial<TransformOptions> = {},
|
||||||
|
): MigrationPlan {
|
||||||
|
const opts = { ...DEFAULT_OPTIONS, ...options };
|
||||||
|
|
||||||
|
const flags: MigrationFlag[] = [];
|
||||||
|
// Build per-row candidates first so we can run dedup before assigning
|
||||||
|
// tempIds (clients with multiple source rows merge into one tempId).
|
||||||
|
const perRow = snapshot.interests.map((row) => rowToCandidate(row, 'interests', opts, flags));
|
||||||
|
|
||||||
|
// Dedup pass 1: every row scored against every other row (within the
|
||||||
|
// same pool). The blocking strategy in `findClientMatches` keeps this
|
||||||
|
// cheap even for the full 252-row dataset.
|
||||||
|
const clusters = clusterByDedup(perRow, opts);
|
||||||
|
|
||||||
|
// Build the planned clients + interests from the clusters.
|
||||||
|
const clients: PlannedClient[] = [];
|
||||||
|
const interests: PlannedInterest[] = [];
|
||||||
|
const autoLinks: MigrationPlan['autoLinks'] = [];
|
||||||
|
const needsReview: MigrationPlan['needsReview'] = [];
|
||||||
|
|
||||||
|
for (const cluster of clusters) {
|
||||||
|
const lead = cluster.leadCandidate;
|
||||||
|
const tempId = `client-${lead.row.Id}`;
|
||||||
|
|
||||||
|
// Build the client record from the lead row, then merge in any
|
||||||
|
// contact info / address info from the other rows in the cluster.
|
||||||
|
const planned = buildPlannedClient(tempId, cluster, opts);
|
||||||
|
clients.push(planned);
|
||||||
|
|
||||||
|
// Each row in the cluster becomes its own interest record.
|
||||||
|
for (const member of cluster.members) {
|
||||||
|
const interest = buildPlannedInterest(member.row, tempId);
|
||||||
|
interests.push(interest);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cluster.members.length > 1) {
|
||||||
|
autoLinks.push({
|
||||||
|
leadSourceId: lead.row.Id,
|
||||||
|
mergedSourceIds: cluster.members.filter((m) => m !== lead).map((m) => m.row.Id),
|
||||||
|
score: cluster.maxScore,
|
||||||
|
reasons: cluster.reasons,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const pair of cluster.reviewPairs) {
|
||||||
|
needsReview.push(pair);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
clients,
|
||||||
|
interests,
|
||||||
|
flags,
|
||||||
|
autoLinks,
|
||||||
|
needsReview,
|
||||||
|
stats: {
|
||||||
|
inputInterestRows: snapshot.interests.length,
|
||||||
|
inputResidentialRows: snapshot.residentialInterests.length,
|
||||||
|
outputClients: clients.length,
|
||||||
|
outputInterests: interests.length,
|
||||||
|
outputContacts: clients.reduce((sum, c) => sum + c.contacts.length, 0),
|
||||||
|
outputAddresses: clients.reduce((sum, c) => sum + c.addresses.length, 0),
|
||||||
|
flaggedRows: flags.length,
|
||||||
|
autoLinkedClusters: autoLinks.length,
|
||||||
|
needsReviewPairs: needsReview.length,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Helpers ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
interface RowCandidate {
|
||||||
|
row: NocoDbRow;
|
||||||
|
candidate: MatchCandidate;
|
||||||
|
/** Phone normalize result for the row's primary phone; used downstream
|
||||||
|
* to attach valueE164 + country to the planned contact. */
|
||||||
|
phoneResult: NormalizedPhone | null;
|
||||||
|
/** Country resolved from "Place of Residence". */
|
||||||
|
countryIso: CountryCode | null;
|
||||||
|
countryConfidence: 'exact' | 'fuzzy' | 'city' | null;
|
||||||
|
/** Normalized email or null. */
|
||||||
|
email: string | null;
|
||||||
|
/** Display name from `normalizeName`. */
|
||||||
|
displayName: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function rowToCandidate(
|
||||||
|
row: NocoDbRow,
|
||||||
|
sourceTable: MigrationFlag['sourceTable'],
|
||||||
|
opts: TransformOptions,
|
||||||
|
flags: MigrationFlag[],
|
||||||
|
): RowCandidate {
|
||||||
|
const rawName = (row['Full Name'] as string | undefined) ?? '';
|
||||||
|
const rawEmail = (row['Email Address'] as string | undefined) ?? '';
|
||||||
|
const rawPhone = (row['Phone Number'] as string | undefined) ?? '';
|
||||||
|
const rawCountry = (row['Place of Residence'] as string | undefined) ?? '';
|
||||||
|
|
||||||
|
const normName = normalizeName(rawName);
|
||||||
|
const email = normalizeEmail(rawEmail);
|
||||||
|
const country = resolveCountry(rawCountry);
|
||||||
|
const phoneCountry = country.iso ?? opts.defaultPhoneCountry;
|
||||||
|
const phoneResult = normalizePhone(rawPhone, phoneCountry as CountryCode);
|
||||||
|
|
||||||
|
// Surface anything weird so the report can show it.
|
||||||
|
if (rawPhone && !phoneResult?.e164) {
|
||||||
|
flags.push({
|
||||||
|
sourceTable,
|
||||||
|
sourceId: row.Id,
|
||||||
|
reason: phoneResult?.flagged ? `phone ${phoneResult.flagged}` : 'phone unparseable',
|
||||||
|
details: { rawPhone },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (rawEmail && !email) {
|
||||||
|
flags.push({
|
||||||
|
sourceTable,
|
||||||
|
sourceId: row.Id,
|
||||||
|
reason: 'email invalid',
|
||||||
|
details: { rawEmail },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (rawCountry && !country.iso) {
|
||||||
|
flags.push({
|
||||||
|
sourceTable,
|
||||||
|
sourceId: row.Id,
|
||||||
|
reason: 'country unresolved',
|
||||||
|
details: { rawCountry },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const candidate: MatchCandidate = {
|
||||||
|
id: String(row.Id),
|
||||||
|
fullName: normName.display || null,
|
||||||
|
surnameToken: normName.surnameToken ?? null,
|
||||||
|
emails: email ? [email] : [],
|
||||||
|
phonesE164: phoneResult?.e164 ? [phoneResult.e164] : [],
|
||||||
|
countryIso: country.iso ?? null,
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
row,
|
||||||
|
candidate,
|
||||||
|
phoneResult,
|
||||||
|
countryIso: country.iso ?? null,
|
||||||
|
countryConfidence: country.confidence,
|
||||||
|
email,
|
||||||
|
displayName: normName.display,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Cluster {
|
||||||
|
/** The cluster's "lead" row (most complete + most recent). */
|
||||||
|
leadCandidate: RowCandidate;
|
||||||
|
members: RowCandidate[];
|
||||||
|
maxScore: number;
|
||||||
|
reasons: string[];
|
||||||
|
/** Pairs in this cluster that scored medium (need review). */
|
||||||
|
reviewPairs: Array<{ aSourceId: number; bSourceId: number; score: number; reasons: string[] }>;
|
||||||
|
}
|
||||||
|
|
||||||
|
function clusterByDedup(rows: RowCandidate[], opts: TransformOptions): Cluster[] {
|
||||||
|
// Use a union-find structure indexed by row id. Every pair with a
|
||||||
|
// score >= autoLink threshold gets unioned. Pairs in [needsReview,
|
||||||
|
// autoLink) accumulate onto the cluster's reviewPairs list — they're
|
||||||
|
// surfaced for human triage but not auto-merged.
|
||||||
|
const parent = new Map<string, string>();
|
||||||
|
for (const r of rows) parent.set(r.candidate.id, r.candidate.id);
|
||||||
|
const find = (id: string): string => {
|
||||||
|
let cur = id;
|
||||||
|
while (parent.get(cur) !== cur) {
|
||||||
|
const next = parent.get(cur)!;
|
||||||
|
parent.set(cur, parent.get(next)!); // path compression
|
||||||
|
cur = parent.get(cur)!;
|
||||||
|
}
|
||||||
|
return cur;
|
||||||
|
};
|
||||||
|
const union = (a: string, b: string) => {
|
||||||
|
const rootA = find(a);
|
||||||
|
const rootB = find(b);
|
||||||
|
if (rootA !== rootB) parent.set(rootA, rootB);
|
||||||
|
};
|
||||||
|
|
||||||
|
const clusterReasons = new Map<string, string[]>();
|
||||||
|
const clusterMaxScore = new Map<string, number>();
|
||||||
|
const clusterReviewPairs = new Map<string, Cluster['reviewPairs']>();
|
||||||
|
|
||||||
|
// Score every candidate against every other candidate. The find-matches
|
||||||
|
// function does its own blocking so this is cheap.
|
||||||
|
for (let i = 0; i < rows.length; i += 1) {
|
||||||
|
const left = rows[i]!;
|
||||||
|
const remainingPool = rows.slice(i + 1).map((r) => r.candidate);
|
||||||
|
if (remainingPool.length === 0) continue;
|
||||||
|
const matches = findClientMatches(left.candidate, remainingPool, {
|
||||||
|
highScore: opts.thresholds.autoLink,
|
||||||
|
mediumScore: opts.thresholds.needsReview,
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const m of matches) {
|
||||||
|
if (m.score >= opts.thresholds.autoLink) {
|
||||||
|
union(left.candidate.id, m.candidate.id);
|
||||||
|
const root = find(left.candidate.id);
|
||||||
|
clusterMaxScore.set(root, Math.max(clusterMaxScore.get(root) ?? 0, m.score));
|
||||||
|
const existing = clusterReasons.get(root) ?? [];
|
||||||
|
for (const reason of m.reasons) {
|
||||||
|
if (!existing.includes(reason)) existing.push(reason);
|
||||||
|
}
|
||||||
|
clusterReasons.set(root, existing);
|
||||||
|
} else if (m.score >= opts.thresholds.needsReview) {
|
||||||
|
// Medium — track on whichever cluster `left` belongs to.
|
||||||
|
const root = find(left.candidate.id);
|
||||||
|
const list = clusterReviewPairs.get(root) ?? [];
|
||||||
|
list.push({
|
||||||
|
aSourceId: parseInt(left.candidate.id, 10),
|
||||||
|
bSourceId: parseInt(m.candidate.id, 10),
|
||||||
|
score: m.score,
|
||||||
|
reasons: m.reasons,
|
||||||
|
});
|
||||||
|
clusterReviewPairs.set(root, list);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Group rows by their cluster root.
|
||||||
|
const byRoot = new Map<string, RowCandidate[]>();
|
||||||
|
for (const r of rows) {
|
||||||
|
const root = find(r.candidate.id);
|
||||||
|
const list = byRoot.get(root) ?? [];
|
||||||
|
list.push(r);
|
||||||
|
byRoot.set(root, list);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build cluster objects, choosing the most-complete row as the lead.
|
||||||
|
const clusters: Cluster[] = [];
|
||||||
|
for (const [root, members] of byRoot) {
|
||||||
|
const lead = pickLead(members);
|
||||||
|
clusters.push({
|
||||||
|
leadCandidate: lead,
|
||||||
|
members,
|
||||||
|
maxScore: clusterMaxScore.get(root) ?? 0,
|
||||||
|
reasons: clusterReasons.get(root) ?? [],
|
||||||
|
reviewPairs: clusterReviewPairs.get(root) ?? [],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return clusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
function pickLead(rows: RowCandidate[]): RowCandidate {
|
||||||
|
// Pick the row with the most populated fields, breaking ties by
|
||||||
|
// recency (highest Id, since NocoDB IDs are monotonic).
|
||||||
|
return rows.reduce((best, current) => {
|
||||||
|
const bestScore = completenessScore(best);
|
||||||
|
const currentScore = completenessScore(current);
|
||||||
|
if (currentScore > bestScore) return current;
|
||||||
|
if (currentScore === bestScore && current.row.Id > best.row.Id) return current;
|
||||||
|
return best;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function completenessScore(r: RowCandidate): number {
|
||||||
|
let score = 0;
|
||||||
|
if (r.email) score += 1;
|
||||||
|
if (r.phoneResult?.e164) score += 1;
|
||||||
|
if (r.row['Address']) score += 0.5;
|
||||||
|
if (r.row['Yacht Name']) score += 0.5;
|
||||||
|
if (r.row['Source']) score += 0.25;
|
||||||
|
if (r.row['Lead Category']) score += 0.25;
|
||||||
|
if (r.row['Internal Notes']) score += 0.25;
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildPlannedClient(
|
||||||
|
tempId: string,
|
||||||
|
cluster: Cluster,
|
||||||
|
opts: TransformOptions,
|
||||||
|
): PlannedClient {
|
||||||
|
const lead = cluster.leadCandidate;
|
||||||
|
|
||||||
|
// Collect distinct emails + phones from across the cluster — duplicate
|
||||||
|
// submissions often come with different contact methods we want to
|
||||||
|
// preserve as multiple rows in `client_contacts`.
|
||||||
|
const seenEmails = new Set<string>();
|
||||||
|
const seenPhones = new Set<string>();
|
||||||
|
const contacts: PlannedContact[] = [];
|
||||||
|
|
||||||
|
for (const member of cluster.members) {
|
||||||
|
if (member.email && !seenEmails.has(member.email)) {
|
||||||
|
seenEmails.add(member.email);
|
||||||
|
contacts.push({
|
||||||
|
channel: 'email',
|
||||||
|
value: member.email,
|
||||||
|
isPrimary: contacts.length === 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (member.phoneResult?.e164 && !seenPhones.has(member.phoneResult.e164)) {
|
||||||
|
seenPhones.add(member.phoneResult.e164);
|
||||||
|
const isFirstPhone = !contacts.some((c) => c.channel === 'phone');
|
||||||
|
contacts.push({
|
||||||
|
channel: 'phone',
|
||||||
|
value: member.phoneResult.e164,
|
||||||
|
valueE164: member.phoneResult.e164,
|
||||||
|
valueCountry: member.phoneResult.country,
|
||||||
|
isPrimary: isFirstPhone && contacts.every((c) => !c.isPrimary || c.channel === 'email'),
|
||||||
|
flagged: member.phoneResult.flagged,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Demote the email-primary if a more-completable phone exists.
|
||||||
|
// Simpler invariant: the first contact is primary unless the row
|
||||||
|
// explicitly preferred phone.
|
||||||
|
const preferredMethod = (lead.row['Contact Method Preferred'] as string | undefined)
|
||||||
|
?.toLowerCase()
|
||||||
|
?.trim();
|
||||||
|
|
||||||
|
// Address: only build if the lead row has a meaningful address text.
|
||||||
|
const rawAddress = (lead.row['Address'] as string | undefined)?.trim();
|
||||||
|
const addresses: PlannedAddress[] = [];
|
||||||
|
if (rawAddress) {
|
||||||
|
addresses.push({
|
||||||
|
streetAddress: rawAddress,
|
||||||
|
city: null,
|
||||||
|
countryIso: lead.countryIso ?? opts.defaultPhoneCountry,
|
||||||
|
countryConfidence: lead.countryConfidence ?? 'fallback',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const sourceFromRow = (lead.row['Source'] as string | undefined) ?? null;
|
||||||
|
const mappedSource = sourceFromRow ? (SOURCE_MAP[sourceFromRow] ?? 'manual') : null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
tempId,
|
||||||
|
sourceIds: cluster.members.map((m) => m.row.Id),
|
||||||
|
fullName: lead.displayName,
|
||||||
|
surnameToken: lead.candidate.surnameToken ?? undefined,
|
||||||
|
countryIso: lead.countryIso,
|
||||||
|
preferredContactMethod: preferredMethod ?? null,
|
||||||
|
source: mappedSource,
|
||||||
|
contacts,
|
||||||
|
addresses,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildPlannedInterest(row: NocoDbRow, clientTempId: string): PlannedInterest {
|
||||||
|
const stage = (row['Sales Process Level'] as string | undefined) ?? '';
|
||||||
|
const cat = (row['Lead Category'] as string | undefined) ?? '';
|
||||||
|
|
||||||
|
const notesParts: string[] = [];
|
||||||
|
const internalNotes = row['Internal Notes'] as string | undefined;
|
||||||
|
const extraComments = row['Extra Comments'] as string | undefined;
|
||||||
|
if (internalNotes?.trim()) notesParts.push(internalNotes.trim());
|
||||||
|
if (extraComments?.trim()) notesParts.push(`Extra Comments: ${extraComments.trim()}`);
|
||||||
|
const berthSize = row['Berth Size Desired'] as string | undefined;
|
||||||
|
if (berthSize?.trim()) notesParts.push(`Berth size desired: ${berthSize.trim()}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
sourceId: row.Id,
|
||||||
|
clientTempId,
|
||||||
|
pipelineStage: STAGE_MAP[stage] ?? 'open',
|
||||||
|
leadCategory: LEAD_CATEGORY_MAP[cat] ?? null,
|
||||||
|
source: ((row['Source'] as string | undefined) ?? null) || null,
|
||||||
|
notes: notesParts.join('\n\n') || null,
|
||||||
|
berthMooringNumber: (row['Berth Number'] as string | undefined) ?? null,
|
||||||
|
yachtName: (() => {
|
||||||
|
const n = (row['Yacht Name'] as string | undefined)?.trim();
|
||||||
|
// Filter placeholder values used by sales reps for "we don't know yet".
|
||||||
|
if (!n) return null;
|
||||||
|
if (['TBC', 'Na', 'NA', 'na', 'N/A', 'TBD', 'tbd'].includes(n)) return null;
|
||||||
|
return n;
|
||||||
|
})(),
|
||||||
|
dateEoiSent: parseFlexibleDate(row['EOI Time Sent']),
|
||||||
|
dateEoiSigned: parseFlexibleDate(row['all_signed_notified_at'] ?? row['developerSignTime']),
|
||||||
|
dateDepositReceived: null, // not directly tracked in legacy schema
|
||||||
|
dateContractSent: parseFlexibleDate(row['Time LOI Sent']),
|
||||||
|
dateContractSigned: parseFlexibleDate(row['developerSignTime']),
|
||||||
|
dateLastContact: parseFlexibleDate(row['Created At'] ?? row['Date Added']),
|
||||||
|
documensoId: (row['documensoID'] as string | undefined) ?? null,
|
||||||
|
};
|
||||||
|
}
|
||||||
152
src/lib/dedup/nocodb-source.ts
Normal file
152
src/lib/dedup/nocodb-source.ts
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
/**
|
||||||
|
* Read-only adapter for the legacy NocoDB Port Nimara base.
|
||||||
|
*
|
||||||
|
* Used by the one-shot migration script (`scripts/migrate-from-nocodb.ts`)
|
||||||
|
* to pull every Interest, Residential Interest, and Website Submission
|
||||||
|
* row from the source-of-truth NocoDB tables. No mutations.
|
||||||
|
*
|
||||||
|
* Auth: `xc-token` header per NocoDB v2 API.
|
||||||
|
*
|
||||||
|
* The shape returned is a verbatim record of the row's fields — caller
|
||||||
|
* is responsible for mapping to the new schema via `nocodb-transform.ts`.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
// ─── Configuration ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const ConfigSchema = z.object({
|
||||||
|
url: z.string().url(),
|
||||||
|
token: z.string().min(1),
|
||||||
|
});
|
||||||
|
|
||||||
|
export interface NocoDbConfig {
|
||||||
|
url: string;
|
||||||
|
token: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function loadNocoDbConfig(env: NodeJS.ProcessEnv = process.env): NocoDbConfig {
|
||||||
|
return ConfigSchema.parse({
|
||||||
|
url: env.NOCODB_URL,
|
||||||
|
token: env.NOCODB_TOKEN,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Table identifiers ──────────────────────────────────────────────────────
|
||||||
|
//
|
||||||
|
// These IDs are stable per the NocoDB base — they were captured during the
|
||||||
|
// 2026-05-03 audit and won't change unless the base is rebuilt. If the
|
||||||
|
// base is reset, regenerate them from `getTablesList`.
|
||||||
|
export const NOCO_TABLES = {
|
||||||
|
interests: 'mbs9hjauug4eseo',
|
||||||
|
residentialInterests: 'mscfpwwwjuds4nt',
|
||||||
|
websiteInterestSubmissions: 'mevkpcih67c6jsm',
|
||||||
|
websiteContactFormSubmissions: 'mxk5cd0pmwnwlcl',
|
||||||
|
websiteBerthEoiSupplements: 'mglmioo0ku8zgqj',
|
||||||
|
berths: 'mczgos9hr3oa9qc',
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
// ─── HTTP shape ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
interface NocoDbListResponse<T> {
|
||||||
|
list: T[];
|
||||||
|
pageInfo: {
|
||||||
|
totalRows: number;
|
||||||
|
page: number;
|
||||||
|
pageSize: number;
|
||||||
|
isFirstPage: boolean;
|
||||||
|
isLastPage: boolean;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** A row's `Id` is always present. The rest of the fields vary per table. */
|
||||||
|
export type NocoDbRow = Record<string, unknown> & { Id: number };
|
||||||
|
|
||||||
|
// ─── Public API ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch all rows from a NocoDB table. Auto-paginates until the API
|
||||||
|
* reports `isLastPage`. The legacy base is small (252 Interests rows
|
||||||
|
* being the largest table) so we keep this simple — no streaming.
|
||||||
|
*/
|
||||||
|
export async function fetchAllRows(
|
||||||
|
tableId: string,
|
||||||
|
config: NocoDbConfig,
|
||||||
|
pageSize = 250,
|
||||||
|
): Promise<NocoDbRow[]> {
|
||||||
|
const all: NocoDbRow[] = [];
|
||||||
|
let page = 1;
|
||||||
|
// Hard cap to prevent infinite-loop bugs if pageInfo lies. Each page
|
||||||
|
// pulls up to `pageSize` rows, so 200 pages * 250 = 50k rows is the
|
||||||
|
// maximum we'll ever fetch from one table.
|
||||||
|
const MAX_PAGES = 200;
|
||||||
|
|
||||||
|
while (page <= MAX_PAGES) {
|
||||||
|
const url = new URL(`${config.url}/api/v2/tables/${tableId}/records`);
|
||||||
|
url.searchParams.set('limit', String(pageSize));
|
||||||
|
url.searchParams.set('offset', String((page - 1) * pageSize));
|
||||||
|
|
||||||
|
const res = await fetch(url, {
|
||||||
|
headers: {
|
||||||
|
'xc-token': config.token,
|
||||||
|
accept: 'application/json',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(
|
||||||
|
`NocoDB fetch failed: ${res.status} ${res.statusText} — table ${tableId} page ${page}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const json = (await res.json()) as NocoDbListResponse<NocoDbRow>;
|
||||||
|
all.push(...json.list);
|
||||||
|
|
||||||
|
if (json.pageInfo.isLastPage || json.list.length === 0) break;
|
||||||
|
page += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return all;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience snapshot — pulls every table the migration cares about
|
||||||
|
* in parallel. Returned shape is the input the transform layer expects.
|
||||||
|
*/
|
||||||
|
export interface NocoDbSnapshot {
|
||||||
|
interests: NocoDbRow[];
|
||||||
|
residentialInterests: NocoDbRow[];
|
||||||
|
websiteInterestSubmissions: NocoDbRow[];
|
||||||
|
websiteContactFormSubmissions: NocoDbRow[];
|
||||||
|
websiteBerthEoiSupplements: NocoDbRow[];
|
||||||
|
berths: NocoDbRow[];
|
||||||
|
fetchedAt: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function fetchSnapshot(config: NocoDbConfig): Promise<NocoDbSnapshot> {
|
||||||
|
const [
|
||||||
|
interests,
|
||||||
|
residentialInterests,
|
||||||
|
websiteInterestSubmissions,
|
||||||
|
websiteContactFormSubmissions,
|
||||||
|
websiteBerthEoiSupplements,
|
||||||
|
berths,
|
||||||
|
] = await Promise.all([
|
||||||
|
fetchAllRows(NOCO_TABLES.interests, config),
|
||||||
|
fetchAllRows(NOCO_TABLES.residentialInterests, config),
|
||||||
|
fetchAllRows(NOCO_TABLES.websiteInterestSubmissions, config),
|
||||||
|
fetchAllRows(NOCO_TABLES.websiteContactFormSubmissions, config),
|
||||||
|
fetchAllRows(NOCO_TABLES.websiteBerthEoiSupplements, config),
|
||||||
|
fetchAllRows(NOCO_TABLES.berths, config),
|
||||||
|
]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
interests,
|
||||||
|
residentialInterests,
|
||||||
|
websiteInterestSubmissions,
|
||||||
|
websiteContactFormSubmissions,
|
||||||
|
websiteBerthEoiSupplements,
|
||||||
|
berths,
|
||||||
|
fetchedAt: new Date().toISOString(),
|
||||||
|
};
|
||||||
|
}
|
||||||
418
src/lib/dedup/normalize.ts
Normal file
418
src/lib/dedup/normalize.ts
Normal file
@@ -0,0 +1,418 @@
|
|||||||
|
/**
|
||||||
|
* Normalization helpers for the dedup pipeline.
|
||||||
|
*
|
||||||
|
* Pure functions (no DB, no React). Used by both the runtime at-create
|
||||||
|
* surfaces and the one-shot NocoDB migration script. Every transform
|
||||||
|
* here has a fixture in `tests/unit/dedup/normalize.test.ts` drawn from
|
||||||
|
* real dirty values observed in the legacy NocoDB Interests table.
|
||||||
|
*
|
||||||
|
* Design reference: docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §3.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
import { ALL_COUNTRY_CODES, getCountryName, type CountryCode } from '@/lib/i18n/countries';
|
||||||
|
import { parsePhoneScriptSafe as parsePhone } from './phone-parse';
|
||||||
|
|
||||||
|
// ─── Names ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tokens that should stay lowercase mid-name. Covers the common Romance,
|
||||||
|
* Germanic, and Iberian particles seen in client records. The first token
|
||||||
|
* of a name is always title-cased even if it appears in this set.
|
||||||
|
*/
|
||||||
|
const PARTICLES: ReadonlySet<string> = new Set([
|
||||||
|
'van',
|
||||||
|
'von',
|
||||||
|
'de',
|
||||||
|
'del',
|
||||||
|
'da',
|
||||||
|
'das',
|
||||||
|
'do',
|
||||||
|
'dos',
|
||||||
|
'di',
|
||||||
|
'le',
|
||||||
|
'la',
|
||||||
|
'el',
|
||||||
|
'al',
|
||||||
|
'der',
|
||||||
|
'den',
|
||||||
|
'des',
|
||||||
|
'du',
|
||||||
|
'dalla',
|
||||||
|
'della',
|
||||||
|
'st',
|
||||||
|
'st.',
|
||||||
|
'y',
|
||||||
|
]);
|
||||||
|
|
||||||
|
export interface NormalizedName {
|
||||||
|
/** Human-readable form preserved for UI display. Trims, collapses
|
||||||
|
* whitespace, fixes case, but never destroys the user's intent —
|
||||||
|
* slash-with-company structure ("Daniel Wainstein / 7 Knots, LLC")
|
||||||
|
* is left intact. */
|
||||||
|
display: string;
|
||||||
|
/** Lowercased form for matching. */
|
||||||
|
normalized: string;
|
||||||
|
/** Last non-particle token, lowercased. Used as a blocking key by the
|
||||||
|
* dedup algorithm so we only compare candidates with similar surnames. */
|
||||||
|
surnameToken?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize a free-text full name. Trims and collapses whitespace,
|
||||||
|
* replaces \r/\n/\t with single spaces, intelligently title-cases
|
||||||
|
* ALL-CAPS surnames while keeping particles (van / de / dalla / etc.)
|
||||||
|
* lowercase mid-name, and preserves Irish O' surnames as O'Brien.
|
||||||
|
*
|
||||||
|
* If the input contains a `/` (slash-with-company structure like
|
||||||
|
* "Daniel Wainstein / 7 Knots, LLC"), the trailing company text is
|
||||||
|
* preserved verbatim — it's signal, not noise.
|
||||||
|
*/
|
||||||
|
export function normalizeName(raw: string | null | undefined): NormalizedName {
|
||||||
|
const safe = (raw ?? '').toString();
|
||||||
|
// Replace \r, \n, \t with single spaces, then collapse runs of whitespace.
|
||||||
|
const cleaned = safe
|
||||||
|
.replace(/[\r\n\t]/g, ' ')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
if (!cleaned) {
|
||||||
|
return { display: '', normalized: '', surnameToken: undefined };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Slash-with-company: title-case the part before the slash, leave the
|
||||||
|
// company segment untouched (it's typically already a brand we shouldn't
|
||||||
|
// mangle: "SAS TIKI", "7 Knots, LLC").
|
||||||
|
const slashIdx = cleaned.indexOf('/');
|
||||||
|
let displayCore: string;
|
||||||
|
if (slashIdx !== -1) {
|
||||||
|
const personPart = cleaned.slice(0, slashIdx).trim();
|
||||||
|
const companyPart = cleaned.slice(slashIdx + 1).trim();
|
||||||
|
displayCore = `${titleCaseTokens(personPart)} / ${companyPart}`;
|
||||||
|
} else {
|
||||||
|
displayCore = titleCaseTokens(cleaned);
|
||||||
|
}
|
||||||
|
|
||||||
|
const display = displayCore;
|
||||||
|
const normalized = display.toLowerCase();
|
||||||
|
const surnameToken = computeSurnameToken(slashIdx !== -1 ? cleaned.slice(0, slashIdx) : cleaned);
|
||||||
|
|
||||||
|
return { display, normalized, surnameToken };
|
||||||
|
}
|
||||||
|
|
||||||
|
function titleCaseTokens(s: string): string {
|
||||||
|
const tokens = s.split(' ').filter(Boolean);
|
||||||
|
if (tokens.length === 0) return '';
|
||||||
|
return tokens.map((tok, idx) => titleCaseOneToken(tok, idx === 0)).join(' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
function titleCaseOneToken(token: string, isFirst: boolean): string {
|
||||||
|
if (!token) return '';
|
||||||
|
const lower = token.toLowerCase();
|
||||||
|
if (!isFirst && PARTICLES.has(lower)) return lower;
|
||||||
|
// O'Brien / D'Angelo / l'Estrange — capitalize the segment after each
|
||||||
|
// apostrophe so a lowercased input round-trips to readable Irish caps.
|
||||||
|
if (lower.includes("'")) {
|
||||||
|
return lower
|
||||||
|
.split("'")
|
||||||
|
.map((part) => (part.length > 0 ? part[0]!.toUpperCase() + part.slice(1) : part))
|
||||||
|
.join("'");
|
||||||
|
}
|
||||||
|
return lower[0]!.toUpperCase() + lower.slice(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
function computeSurnameToken(personPart: string): string | undefined {
|
||||||
|
const cleaned = personPart
|
||||||
|
.replace(/[\r\n\t]/g, ' ')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim();
|
||||||
|
if (!cleaned) return undefined;
|
||||||
|
const tokens = cleaned.split(' ').map((t) => t.toLowerCase());
|
||||||
|
// Walk from the right past particles to find the last "real" surname token.
|
||||||
|
for (let i = tokens.length - 1; i >= 0; i -= 1) {
|
||||||
|
const tok = tokens[i]!;
|
||||||
|
if (!PARTICLES.has(tok)) return tok;
|
||||||
|
}
|
||||||
|
// All tokens are particles? Fall back to the last token verbatim.
|
||||||
|
return tokens[tokens.length - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Emails ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const emailSchema = z.string().email();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize a free-text email. Trims + lowercases. Returns null for empty
|
||||||
|
* or malformed input — caller decides whether to flag, store, or drop.
|
||||||
|
*
|
||||||
|
* Plus-aliases (`user+tag@domain.com`) are NOT stripped: they're real
|
||||||
|
* distinct addresses, and stripping them would auto-merge legitimately
|
||||||
|
* separate accounts.
|
||||||
|
*/
|
||||||
|
export function normalizeEmail(raw: string | null | undefined): string | null {
|
||||||
|
if (raw == null) return null;
|
||||||
|
const trimmed = raw.toString().trim().toLowerCase();
|
||||||
|
if (!trimmed) return null;
|
||||||
|
const result = emailSchema.safeParse(trimmed);
|
||||||
|
return result.success ? trimmed : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Phones ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export type PhoneFlag = 'multi_number' | 'placeholder' | 'unparseable';
|
||||||
|
|
||||||
|
export interface NormalizedPhone {
|
||||||
|
/** Canonical E.164 form, e.g. '+15742740548'. Null when unparseable
|
||||||
|
* or flagged as placeholder. */
|
||||||
|
e164: string | null;
|
||||||
|
/** ISO-3166-1 alpha-2 of the country the number was parsed against. */
|
||||||
|
country: CountryCode | null;
|
||||||
|
/** Display-friendly international format. Useful for migration reports. */
|
||||||
|
display: string | null;
|
||||||
|
/** Set when the input had a quirk worth surfacing in the migration
|
||||||
|
* report or runtime audit log. Absent on clean parses. */
|
||||||
|
flagged?: PhoneFlag;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize a raw user-entered phone string for comparison + storage.
|
||||||
|
*
|
||||||
|
* Pipeline:
|
||||||
|
* 1. strip leading apostrophe (spreadsheet copy-paste artifact)
|
||||||
|
* 2. strip \r / \n / \t (real values seen in NocoDB had carriage returns)
|
||||||
|
* 3. detect multi-number fields ("+33611111111;+33622222222",
|
||||||
|
* "0677580750/0690511494") — flag and take first segment
|
||||||
|
* 4. strip whitespace, dots, dashes, parens, single quotes
|
||||||
|
* 5. convert leading "00" → "+" (international dialling code)
|
||||||
|
* 6. detect placeholder fakes (8+ consecutive zeros) — flag, return null e164
|
||||||
|
* 7. parse via libphonenumber-js
|
||||||
|
* 8. on parse failure or invalid number → flag 'unparseable'
|
||||||
|
*
|
||||||
|
* Returns null for empty inputs (cheaper to short-circuit than to wrap).
|
||||||
|
*/
|
||||||
|
export function normalizePhone(
|
||||||
|
raw: string | null | undefined,
|
||||||
|
defaultCountry?: CountryCode,
|
||||||
|
): NormalizedPhone | null {
|
||||||
|
if (raw == null) return null;
|
||||||
|
let cleaned = raw.toString().trim();
|
||||||
|
if (!cleaned) return null;
|
||||||
|
|
||||||
|
// 1. Spreadsheet apostrophe prefix.
|
||||||
|
if (cleaned.startsWith("'")) cleaned = cleaned.slice(1);
|
||||||
|
|
||||||
|
// 2. Strip carriage returns / newlines / tabs.
|
||||||
|
cleaned = cleaned.replace(/[\r\n\t]/g, '');
|
||||||
|
|
||||||
|
// 3. Multi-number detection — split on /, ;, , (in that order of priority).
|
||||||
|
let flagged: PhoneFlag | undefined;
|
||||||
|
if (/[/;,]/.test(cleaned)) {
|
||||||
|
flagged = 'multi_number';
|
||||||
|
cleaned = cleaned.split(/[/;,]/)[0]!.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Strip whitespace, dots, dashes, parens. Keep + for E.164 prefix.
|
||||||
|
cleaned = cleaned.replace(/[\s.\-()]/g, '');
|
||||||
|
if (!cleaned) return { e164: null, country: null, display: null, flagged: 'unparseable' };
|
||||||
|
|
||||||
|
// 5. 00 international prefix → +.
|
||||||
|
if (cleaned.startsWith('00')) {
|
||||||
|
cleaned = '+' + cleaned.slice(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. Placeholder fakes — runs of 8+ consecutive zeros, e.g. +447000000000.
|
||||||
|
if (/0{8,}/.test(cleaned)) {
|
||||||
|
return { e164: null, country: null, display: null, flagged: 'placeholder' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// 7. Parse via the existing i18n helper (libphonenumber-js under the hood).
|
||||||
|
const parsed = parsePhone(cleaned, defaultCountry);
|
||||||
|
if (!parsed.e164) {
|
||||||
|
// Couldn't even produce a canonical form — genuinely garbage.
|
||||||
|
return { e164: null, country: null, display: null, flagged: 'unparseable' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: we deliberately don't gate on `parsed.isValid`. The
|
||||||
|
// libphonenumber-js `min` build returns isValid=false for many real
|
||||||
|
// numbers (NANP territories share +1; some country metadata is
|
||||||
|
// truncated). For dedup we only need a canonical E.164 string to
|
||||||
|
// compare; strict validity is the form layer's problem, not ours.
|
||||||
|
// If a string-only test (e.g. \"abc-not-a-phone\") gets here, parse
|
||||||
|
// returns null e164 anyway and the branch above handles it.
|
||||||
|
return {
|
||||||
|
e164: parsed.e164,
|
||||||
|
country: parsed.country,
|
||||||
|
display: parsed.international,
|
||||||
|
flagged,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Countries ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Aliases for canonical country names that don't match
|
||||||
|
* `Intl.DisplayNames(en)` output verbatim. Keys are pre-normalized
|
||||||
|
* (lowercase, diacritic-free, hyphens/dots → spaces, collapsed whitespace).
|
||||||
|
*
|
||||||
|
* Kept opinionated and small — only entries we've actually seen in legacy
|
||||||
|
* data. Adding a new alias is cheap; trying to be exhaustive isn't.
|
||||||
|
*/
|
||||||
|
const COUNTRY_ALIASES: Record<string, CountryCode> = {
|
||||||
|
// Generic abbreviations
|
||||||
|
usa: 'US',
|
||||||
|
us: 'US',
|
||||||
|
uk: 'GB',
|
||||||
|
// Saint-Barthélemy variants seen in production
|
||||||
|
'saint barthelemy': 'BL',
|
||||||
|
'saint barth': 'BL',
|
||||||
|
'st barth': 'BL',
|
||||||
|
'st barths': 'BL',
|
||||||
|
'st barthelemy': 'BL',
|
||||||
|
// Caribbean short-forms whose canonical Intl names are awkward
|
||||||
|
// ("Antigua and Barbuda", "Saint Vincent and the Grenadines", etc.).
|
||||||
|
antigua: 'AG',
|
||||||
|
barbuda: 'AG',
|
||||||
|
'st kitts': 'KN',
|
||||||
|
'saint kitts': 'KN',
|
||||||
|
nevis: 'KN',
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* High-frequency cities → country, used as a last-resort fallback when
|
||||||
|
* exact / alias / fuzzy country matching all miss. Keys are normalized.
|
||||||
|
*
|
||||||
|
* Order matters: an entry's key is also matched as a substring of the
|
||||||
|
* input ("Sag Harbor Y" contains "sag harbor"), so the most specific
|
||||||
|
* city appears first to avoid a wrong partial hit.
|
||||||
|
*/
|
||||||
|
const CITY_TO_COUNTRY: Record<string, CountryCode> = {
|
||||||
|
'kansas city': 'US',
|
||||||
|
'sag harbor': 'US',
|
||||||
|
'new york': 'US',
|
||||||
|
// Cities that came out unresolved from the 2026-05-03 NocoDB dry-run.
|
||||||
|
// Using lowercase (post-normalize keys).
|
||||||
|
boston: 'US',
|
||||||
|
tampa: 'US',
|
||||||
|
'fort lauderdale': 'US',
|
||||||
|
'port jefferson': 'US',
|
||||||
|
nantucket: 'US',
|
||||||
|
// US state abbreviations that often appear standalone or as suffix:
|
||||||
|
' fl': 'US',
|
||||||
|
' ma': 'US',
|
||||||
|
' ny': 'US',
|
||||||
|
' tx': 'US',
|
||||||
|
' ca': 'US',
|
||||||
|
// International
|
||||||
|
london: 'GB',
|
||||||
|
paris: 'FR',
|
||||||
|
};
|
||||||
|
|
||||||
|
export type CountryConfidence = 'exact' | 'fuzzy' | 'city';
|
||||||
|
|
||||||
|
export interface ResolvedCountry {
|
||||||
|
iso: CountryCode | null;
|
||||||
|
confidence: CountryConfidence | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map free-text country / region input to an ISO-3166-1 alpha-2 code.
|
||||||
|
*
|
||||||
|
* Lookup order: alias → exact (vs. all locale country names) → city →
|
||||||
|
* fuzzy (Levenshtein ≤ 2). Anything beyond fuzzy returns null and the
|
||||||
|
* migration script flags the row for human review.
|
||||||
|
*/
|
||||||
|
export function resolveCountry(text: string | null | undefined): ResolvedCountry {
|
||||||
|
if (text == null) return { iso: null, confidence: null };
|
||||||
|
const normalized = normalizeForLookup(text.toString());
|
||||||
|
if (!normalized) return { iso: null, confidence: null };
|
||||||
|
|
||||||
|
// 1. Aliases — covers USA / UK / St Barth and friends.
|
||||||
|
const alias = COUNTRY_ALIASES[normalized];
|
||||||
|
if (alias) return { iso: alias, confidence: 'exact' };
|
||||||
|
|
||||||
|
// 2. Exact match against Intl-derived country names. We compare against
|
||||||
|
// diacritic-stripped + lowercased canonical names so 'United States'
|
||||||
|
// and 'united states' both resolve.
|
||||||
|
for (const code of ALL_COUNTRY_CODES) {
|
||||||
|
const cleanName = normalizeForLookup(getCountryName(code, 'en'));
|
||||||
|
if (cleanName === normalized) return { iso: code, confidence: 'exact' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. City → country fallback, exact or substring.
|
||||||
|
const cityExact = CITY_TO_COUNTRY[normalized];
|
||||||
|
if (cityExact) return { iso: cityExact, confidence: 'city' };
|
||||||
|
for (const [city, iso] of Object.entries(CITY_TO_COUNTRY)) {
|
||||||
|
if (normalized.includes(city)) return { iso, confidence: 'city' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Fuzzy fallback (Levenshtein ≤ 2). Skipped for short inputs because
|
||||||
|
// a 4-char string like "Mars" sits within distance 2 of multiple
|
||||||
|
// short country names (Mali, Laos, Iran, …) — false-positive city.
|
||||||
|
if (normalized.length >= 6) {
|
||||||
|
let bestCode: CountryCode | null = null;
|
||||||
|
let bestDistance = Number.POSITIVE_INFINITY;
|
||||||
|
for (const code of ALL_COUNTRY_CODES) {
|
||||||
|
const cleanName = normalizeForLookup(getCountryName(code, 'en'));
|
||||||
|
const d = levenshtein(cleanName, normalized);
|
||||||
|
if (d < bestDistance) {
|
||||||
|
bestDistance = d;
|
||||||
|
bestCode = code;
|
||||||
|
if (d === 0) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (bestDistance <= 2 && bestCode) {
|
||||||
|
return { iso: bestCode, confidence: 'fuzzy' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { iso: null, confidence: null };
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Lowercase + strip diacritics + replace hyphens/dots with spaces +
|
||||||
|
* collapse whitespace. Used by both the input and the canonical-name
|
||||||
|
* side of the country comparison so they meet on the same shape. */
|
||||||
|
function normalizeForLookup(s: string): string {
|
||||||
|
return s
|
||||||
|
.normalize('NFD')
|
||||||
|
.replace(/[̀-ͯ]/g, '')
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[-.]/g, ' ')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Levenshtein ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Standard iterative Levenshtein. Used by the country fuzzy match and by
|
||||||
|
* the dedup algorithm's name-similarity rule. Allocates O(n*m) so callers
|
||||||
|
* shouldn't run it against pathological inputs — the dedup blocking
|
||||||
|
* strategy keeps comparison sets small.
|
||||||
|
*
|
||||||
|
* Exported so the find-matches module can reuse the same implementation
|
||||||
|
* without relying on an external dep.
|
||||||
|
*/
|
||||||
|
export function levenshtein(a: string, b: string): number {
|
||||||
|
if (a === b) return 0;
|
||||||
|
if (!a) return b.length;
|
||||||
|
if (!b) return a.length;
|
||||||
|
|
||||||
|
const m = a.length;
|
||||||
|
const n = b.length;
|
||||||
|
// Two rolling rows is enough — keeps memory at O(n) instead of O(n*m).
|
||||||
|
let prev = new Array<number>(n + 1);
|
||||||
|
let curr = new Array<number>(n + 1);
|
||||||
|
for (let j = 0; j <= n; j += 1) prev[j] = j;
|
||||||
|
|
||||||
|
for (let i = 1; i <= m; i += 1) {
|
||||||
|
curr[0] = i;
|
||||||
|
for (let j = 1; j <= n; j += 1) {
|
||||||
|
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
||||||
|
curr[j] = Math.min(curr[j - 1]! + 1, prev[j]! + 1, prev[j - 1]! + cost);
|
||||||
|
}
|
||||||
|
[prev, curr] = [curr, prev];
|
||||||
|
}
|
||||||
|
|
||||||
|
return prev[n]!;
|
||||||
|
}
|
||||||
66
src/lib/dedup/phone-parse.ts
Normal file
66
src/lib/dedup/phone-parse.ts
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
/**
|
||||||
|
* Script-safe phone parser.
|
||||||
|
*
|
||||||
|
* The project's existing `src/lib/i18n/phone.ts` imports from
|
||||||
|
* `libphonenumber-js`, which under Node 25 + tsx loader hits a
|
||||||
|
* metadata-shape interop bug (`{ default }` wrapping the JSON). It
|
||||||
|
* works fine in Next.js + vitest, but a `tsx scripts/...` invocation
|
||||||
|
* blows up.
|
||||||
|
*
|
||||||
|
* This wrapper bypasses the bundled `index.cjs.js` and calls
|
||||||
|
* `libphonenumber-js/core` directly with metadata loaded as raw JSON.
|
||||||
|
* Same surface as the i18n helper; usable from both runtimes.
|
||||||
|
*
|
||||||
|
* Used by the dedup library's `normalizePhone`. The runtime UI still
|
||||||
|
* imports `i18n/phone` directly — no reason to touch a working path.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||||
|
const core: typeof import('libphonenumber-js/core') = require('libphonenumber-js/core');
|
||||||
|
// Load the JSON directly. The bundled `index.cjs.js` does the same
|
||||||
|
// thing but its `require('../metadata.min.json')` hits a Node 25 ESM
|
||||||
|
// interop bug that wraps the JSON in `{ default }`. Importing the
|
||||||
|
// JSON file by absolute path through the package root sidesteps it.
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/no-explicit-any
|
||||||
|
const metadata: any = require('libphonenumber-js/metadata.min.json');
|
||||||
|
|
||||||
|
import type { CountryCode } from '@/lib/i18n/countries';
|
||||||
|
|
||||||
|
export interface ParsedPhone {
|
||||||
|
e164: string | null;
|
||||||
|
country: CountryCode | null;
|
||||||
|
national: string | null;
|
||||||
|
international: string | null;
|
||||||
|
isValid: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
const EMPTY: ParsedPhone = {
|
||||||
|
e164: null,
|
||||||
|
country: null,
|
||||||
|
national: null,
|
||||||
|
international: null,
|
||||||
|
isValid: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
export function parsePhoneScriptSafe(raw: string, defaultCountry?: CountryCode): ParsedPhone {
|
||||||
|
const trimmed = raw.trim();
|
||||||
|
if (!trimmed) return EMPTY;
|
||||||
|
try {
|
||||||
|
// The core entry expects its own `CountryCode` type from
|
||||||
|
// libphonenumber-js. Our `CountryCode` type is the same set of ISO
|
||||||
|
// alpha-2 codes (we re-derive from the same Intl source) so this
|
||||||
|
// cast is structural-equivalent, not lossy.
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
const parsed = core.parsePhoneNumberFromString(trimmed, defaultCountry as any, metadata);
|
||||||
|
if (!parsed) return EMPTY;
|
||||||
|
return {
|
||||||
|
e164: parsed.number,
|
||||||
|
country: (parsed.country ?? null) as CountryCode | null,
|
||||||
|
national: parsed.formatNational(),
|
||||||
|
international: parsed.formatInternational(),
|
||||||
|
isValid: parsed.isValid(),
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
return EMPTY;
|
||||||
|
}
|
||||||
|
}
|
||||||
393
src/lib/services/client-merge.service.ts
Normal file
393
src/lib/services/client-merge.service.ts
Normal file
@@ -0,0 +1,393 @@
|
|||||||
|
/**
|
||||||
|
* Client merge service — atomically combines two client records.
|
||||||
|
*
|
||||||
|
* Used by:
|
||||||
|
* - /admin/duplicates review queue (when an admin confirms a merge)
|
||||||
|
* - the at-create suggestion path ("use existing client") — though
|
||||||
|
* that path uses the lighter `attachInterestToClient` and never
|
||||||
|
* actually merges two pre-existing clients
|
||||||
|
* - the migration script's `--apply` (eventually)
|
||||||
|
*
|
||||||
|
* Reversibility: every merge writes a `client_merge_log` row containing
|
||||||
|
* the loser's full pre-merge state. Within the configured undo window
|
||||||
|
* (default 7 days, see `dedup_undo_window_days` in system_settings) a
|
||||||
|
* follow-up `unmergeClients` call can restore the loser and detach
|
||||||
|
* everything that was reattached.
|
||||||
|
*
|
||||||
|
* Design reference: docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §6.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { and, eq, sql } from 'drizzle-orm';
|
||||||
|
|
||||||
|
import { db } from '@/lib/db';
|
||||||
|
import {
|
||||||
|
clients,
|
||||||
|
clientContacts,
|
||||||
|
clientAddresses,
|
||||||
|
clientNotes,
|
||||||
|
clientTags,
|
||||||
|
clientRelationships,
|
||||||
|
clientMergeLog,
|
||||||
|
clientMergeCandidates,
|
||||||
|
} from '@/lib/db/schema/clients';
|
||||||
|
import { interests } from '@/lib/db/schema/interests';
|
||||||
|
import { berthReservations } from '@/lib/db/schema/reservations';
|
||||||
|
import { auditLogs } from '@/lib/db/schema/system';
|
||||||
|
|
||||||
|
// ─── Public API ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface MergeFieldChoices {
|
||||||
|
/** Per-field overrides — `winner` keeps the surviving client's value;
|
||||||
|
* `loser` copies the loser's value over. Fields not listed default
|
||||||
|
* to `winner` (no change). */
|
||||||
|
fullName?: 'winner' | 'loser';
|
||||||
|
nationalityIso?: 'winner' | 'loser';
|
||||||
|
preferredContactMethod?: 'winner' | 'loser';
|
||||||
|
preferredLanguage?: 'winner' | 'loser';
|
||||||
|
timezone?: 'winner' | 'loser';
|
||||||
|
source?: 'winner' | 'loser';
|
||||||
|
sourceDetails?: 'winner' | 'loser';
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface MergeOptions {
|
||||||
|
winnerId: string;
|
||||||
|
loserId: string;
|
||||||
|
/** ID of the user performing the merge (for audit + clientMergeLog.mergedBy). */
|
||||||
|
mergedBy: string;
|
||||||
|
/** Per-field choice overrides. Multi-value fields (contacts, addresses,
|
||||||
|
* notes, tags) are always preserved from both sides; this only
|
||||||
|
* affects single-value scalar fields on the `clients` row. */
|
||||||
|
fieldChoices?: MergeFieldChoices;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface MergeResult {
|
||||||
|
mergeLogId: string;
|
||||||
|
movedRows: {
|
||||||
|
interests: number;
|
||||||
|
contacts: number;
|
||||||
|
addresses: number;
|
||||||
|
notes: number;
|
||||||
|
tags: number;
|
||||||
|
relationships: number;
|
||||||
|
reservations: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomically merge `loserId` into `winnerId`. Throws if:
|
||||||
|
* - either id doesn't exist or belongs to a different port
|
||||||
|
* - the loser has already been merged (mergedIntoClientId set)
|
||||||
|
* - the winner is itself archived
|
||||||
|
*/
|
||||||
|
export async function mergeClients(opts: MergeOptions): Promise<MergeResult> {
|
||||||
|
if (opts.winnerId === opts.loserId) {
|
||||||
|
throw new Error('Cannot merge a client into itself');
|
||||||
|
}
|
||||||
|
|
||||||
|
return await db.transaction(async (tx) => {
|
||||||
|
// ── Lock both rows for the duration. The first FOR UPDATE that
|
||||||
|
// arrives wins; a concurrent second merge of the same loser
|
||||||
|
// will see `mergedIntoClientId` set and bail. ──────────────────────
|
||||||
|
const [winnerRow] = await tx
|
||||||
|
.select()
|
||||||
|
.from(clients)
|
||||||
|
.where(eq(clients.id, opts.winnerId))
|
||||||
|
.for('update');
|
||||||
|
const [loserRow] = await tx
|
||||||
|
.select()
|
||||||
|
.from(clients)
|
||||||
|
.where(eq(clients.id, opts.loserId))
|
||||||
|
.for('update');
|
||||||
|
|
||||||
|
if (!winnerRow) throw new Error(`Winner client ${opts.winnerId} not found`);
|
||||||
|
if (!loserRow) throw new Error(`Loser client ${opts.loserId} not found`);
|
||||||
|
if (winnerRow.portId !== loserRow.portId) {
|
||||||
|
throw new Error('Cannot merge clients across different ports');
|
||||||
|
}
|
||||||
|
if (loserRow.mergedIntoClientId) {
|
||||||
|
throw new Error(`Loser ${opts.loserId} already merged into ${loserRow.mergedIntoClientId}`);
|
||||||
|
}
|
||||||
|
if (winnerRow.archivedAt) {
|
||||||
|
throw new Error('Cannot merge into an archived client');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Snapshot the loser's full state before any mutation. Used by
|
||||||
|
// `unmergeClients` to restore within the undo window. ──────────────
|
||||||
|
const loserContacts = await tx
|
||||||
|
.select()
|
||||||
|
.from(clientContacts)
|
||||||
|
.where(eq(clientContacts.clientId, opts.loserId));
|
||||||
|
const loserAddresses = await tx
|
||||||
|
.select()
|
||||||
|
.from(clientAddresses)
|
||||||
|
.where(eq(clientAddresses.clientId, opts.loserId));
|
||||||
|
const loserNotes = await tx
|
||||||
|
.select()
|
||||||
|
.from(clientNotes)
|
||||||
|
.where(eq(clientNotes.clientId, opts.loserId));
|
||||||
|
const loserTags = await tx
|
||||||
|
.select()
|
||||||
|
.from(clientTags)
|
||||||
|
.where(eq(clientTags.clientId, opts.loserId));
|
||||||
|
const loserInterests = await tx
|
||||||
|
.select({ id: interests.id })
|
||||||
|
.from(interests)
|
||||||
|
.where(eq(interests.clientId, opts.loserId));
|
||||||
|
const loserReservations = await tx
|
||||||
|
.select({ id: berthReservations.id })
|
||||||
|
.from(berthReservations)
|
||||||
|
.where(eq(berthReservations.clientId, opts.loserId));
|
||||||
|
const loserRelationshipsAsA = await tx
|
||||||
|
.select()
|
||||||
|
.from(clientRelationships)
|
||||||
|
.where(eq(clientRelationships.clientAId, opts.loserId));
|
||||||
|
const loserRelationshipsAsB = await tx
|
||||||
|
.select()
|
||||||
|
.from(clientRelationships)
|
||||||
|
.where(eq(clientRelationships.clientBId, opts.loserId));
|
||||||
|
|
||||||
|
const snapshot = {
|
||||||
|
loser: loserRow,
|
||||||
|
contacts: loserContacts,
|
||||||
|
addresses: loserAddresses,
|
||||||
|
notes: loserNotes,
|
||||||
|
tags: loserTags,
|
||||||
|
interests: loserInterests.map((r) => r.id),
|
||||||
|
reservations: loserReservations.map((r) => r.id),
|
||||||
|
relationshipsAsA: loserRelationshipsAsA,
|
||||||
|
relationshipsAsB: loserRelationshipsAsB,
|
||||||
|
fieldChoices: opts.fieldChoices ?? {},
|
||||||
|
mergedAt: new Date().toISOString(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// ── Apply field choices on the winner. We only touch fields the
|
||||||
|
// caller explicitly asked to copy from the loser; everything
|
||||||
|
// else stays as-is. ────────────────────────────────────────────────
|
||||||
|
const fieldUpdates: Partial<typeof winnerRow> = {};
|
||||||
|
if (opts.fieldChoices?.fullName === 'loser') fieldUpdates.fullName = loserRow.fullName;
|
||||||
|
if (opts.fieldChoices?.nationalityIso === 'loser')
|
||||||
|
fieldUpdates.nationalityIso = loserRow.nationalityIso;
|
||||||
|
if (opts.fieldChoices?.preferredContactMethod === 'loser')
|
||||||
|
fieldUpdates.preferredContactMethod = loserRow.preferredContactMethod;
|
||||||
|
if (opts.fieldChoices?.preferredLanguage === 'loser')
|
||||||
|
fieldUpdates.preferredLanguage = loserRow.preferredLanguage;
|
||||||
|
if (opts.fieldChoices?.timezone === 'loser') fieldUpdates.timezone = loserRow.timezone;
|
||||||
|
if (opts.fieldChoices?.source === 'loser') fieldUpdates.source = loserRow.source;
|
||||||
|
if (opts.fieldChoices?.sourceDetails === 'loser')
|
||||||
|
fieldUpdates.sourceDetails = loserRow.sourceDetails;
|
||||||
|
|
||||||
|
if (Object.keys(fieldUpdates).length > 0) {
|
||||||
|
await tx
|
||||||
|
.update(clients)
|
||||||
|
.set({ ...fieldUpdates, updatedAt: new Date() })
|
||||||
|
.where(eq(clients.id, opts.winnerId));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Reattach. Each table that points at the loser via clientId
|
||||||
|
// gets pointed at the winner instead. ─────────────────────────────
|
||||||
|
|
||||||
|
const movedInterests = (
|
||||||
|
await tx
|
||||||
|
.update(interests)
|
||||||
|
.set({ clientId: opts.winnerId, updatedAt: new Date() })
|
||||||
|
.where(eq(interests.clientId, opts.loserId))
|
||||||
|
.returning({ id: interests.id })
|
||||||
|
).length;
|
||||||
|
|
||||||
|
const movedReservations = (
|
||||||
|
await tx
|
||||||
|
.update(berthReservations)
|
||||||
|
.set({ clientId: opts.winnerId, updatedAt: new Date() })
|
||||||
|
.where(eq(berthReservations.clientId, opts.loserId))
|
||||||
|
.returning({ id: berthReservations.id })
|
||||||
|
).length;
|
||||||
|
|
||||||
|
// Contacts: move loser's contacts to winner, but DON'T duplicate any
|
||||||
|
// already-present (channel, value) pair. Loser-only ones get
|
||||||
|
// demoted to non-primary so the winner's primary stays intact.
|
||||||
|
const winnerContacts = await tx
|
||||||
|
.select({ channel: clientContacts.channel, value: clientContacts.value })
|
||||||
|
.from(clientContacts)
|
||||||
|
.where(eq(clientContacts.clientId, opts.winnerId));
|
||||||
|
const winnerContactKeys = new Set(
|
||||||
|
winnerContacts.map((c) => `${c.channel}::${c.value.toLowerCase()}`),
|
||||||
|
);
|
||||||
|
|
||||||
|
let movedContacts = 0;
|
||||||
|
for (const c of loserContacts) {
|
||||||
|
const key = `${c.channel}::${c.value.toLowerCase()}`;
|
||||||
|
if (winnerContactKeys.has(key)) {
|
||||||
|
// Winner already has this contact — drop loser's row (cascade
|
||||||
|
// will clean up when loser is archived). But we keep snapshot
|
||||||
|
// so undo restores it.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
await tx
|
||||||
|
.update(clientContacts)
|
||||||
|
.set({ clientId: opts.winnerId, isPrimary: false, updatedAt: new Date() })
|
||||||
|
.where(eq(clientContacts.id, c.id));
|
||||||
|
movedContacts += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Addresses: same shape as contacts, but uniqueness is harder to
|
||||||
|
// detect cleanly (free-text street). Just move them all and let the
|
||||||
|
// user dedupe in the UI later.
|
||||||
|
const movedAddresses = (
|
||||||
|
await tx
|
||||||
|
.update(clientAddresses)
|
||||||
|
.set({ clientId: opts.winnerId, isPrimary: false, updatedAt: new Date() })
|
||||||
|
.where(eq(clientAddresses.clientId, opts.loserId))
|
||||||
|
.returning({ id: clientAddresses.id })
|
||||||
|
).length;
|
||||||
|
|
||||||
|
const movedNotes = (
|
||||||
|
await tx
|
||||||
|
.update(clientNotes)
|
||||||
|
.set({ clientId: opts.winnerId, updatedAt: new Date() })
|
||||||
|
.where(eq(clientNotes.clientId, opts.loserId))
|
||||||
|
.returning({ id: clientNotes.id })
|
||||||
|
).length;
|
||||||
|
|
||||||
|
// Tags: copy any loser-only tag to the winner; drop overlap.
|
||||||
|
const winnerTags = await tx
|
||||||
|
.select({ tagId: clientTags.tagId })
|
||||||
|
.from(clientTags)
|
||||||
|
.where(eq(clientTags.clientId, opts.winnerId));
|
||||||
|
const winnerTagSet = new Set(winnerTags.map((t) => t.tagId));
|
||||||
|
let movedTags = 0;
|
||||||
|
for (const t of loserTags) {
|
||||||
|
if (!winnerTagSet.has(t.tagId)) {
|
||||||
|
await tx.insert(clientTags).values({ clientId: opts.winnerId, tagId: t.tagId });
|
||||||
|
movedTags += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
await tx.delete(clientTags).where(eq(clientTags.clientId, opts.loserId));
|
||||||
|
|
||||||
|
// Relationships: rewrite each FK side to point at the winner. Keep
|
||||||
|
// both sides regardless — even if A and B both end up as the same
|
||||||
|
// person, the row is preserved for audit; the UI hides self-loops.
|
||||||
|
const movedRelationships =
|
||||||
|
(
|
||||||
|
await tx
|
||||||
|
.update(clientRelationships)
|
||||||
|
.set({ clientAId: opts.winnerId })
|
||||||
|
.where(eq(clientRelationships.clientAId, opts.loserId))
|
||||||
|
.returning({ id: clientRelationships.id })
|
||||||
|
).length +
|
||||||
|
(
|
||||||
|
await tx
|
||||||
|
.update(clientRelationships)
|
||||||
|
.set({ clientBId: opts.winnerId })
|
||||||
|
.where(eq(clientRelationships.clientBId, opts.loserId))
|
||||||
|
.returning({ id: clientRelationships.id })
|
||||||
|
).length;
|
||||||
|
|
||||||
|
// ── Archive the loser. Row stays in DB for the undo window;
|
||||||
|
// `mergedIntoClientId` is the redirect pointer for any stragglers
|
||||||
|
// (links / direct queries / saved views). ──────────────────────────
|
||||||
|
await tx
|
||||||
|
.update(clients)
|
||||||
|
.set({
|
||||||
|
archivedAt: new Date(),
|
||||||
|
mergedIntoClientId: opts.winnerId,
|
||||||
|
updatedAt: new Date(),
|
||||||
|
})
|
||||||
|
.where(eq(clients.id, opts.loserId));
|
||||||
|
|
||||||
|
// ── Mark any open merge candidate row for this pair as resolved. ───
|
||||||
|
await tx
|
||||||
|
.update(clientMergeCandidates)
|
||||||
|
.set({
|
||||||
|
status: 'merged',
|
||||||
|
resolvedAt: new Date(),
|
||||||
|
resolvedBy: opts.mergedBy,
|
||||||
|
})
|
||||||
|
.where(
|
||||||
|
and(
|
||||||
|
eq(clientMergeCandidates.portId, winnerRow.portId),
|
||||||
|
// pair stored in canonical order — match either direction
|
||||||
|
sql`(
|
||||||
|
(${clientMergeCandidates.clientAId} = ${opts.winnerId}
|
||||||
|
AND ${clientMergeCandidates.clientBId} = ${opts.loserId})
|
||||||
|
OR
|
||||||
|
(${clientMergeCandidates.clientAId} = ${opts.loserId}
|
||||||
|
AND ${clientMergeCandidates.clientBId} = ${opts.winnerId})
|
||||||
|
)`,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
// ── Write the merge log + audit log. ────────────────────────────────
|
||||||
|
const [logRow] = await tx
|
||||||
|
.insert(clientMergeLog)
|
||||||
|
.values({
|
||||||
|
portId: winnerRow.portId,
|
||||||
|
survivingClientId: opts.winnerId,
|
||||||
|
mergedClientId: opts.loserId,
|
||||||
|
mergedBy: opts.mergedBy,
|
||||||
|
mergeDetails: snapshot,
|
||||||
|
})
|
||||||
|
.returning({ id: clientMergeLog.id });
|
||||||
|
|
||||||
|
await tx.insert(auditLogs).values({
|
||||||
|
portId: winnerRow.portId,
|
||||||
|
userId: opts.mergedBy,
|
||||||
|
entityType: 'client',
|
||||||
|
entityId: opts.winnerId,
|
||||||
|
action: 'merge',
|
||||||
|
newValue: {
|
||||||
|
loserId: opts.loserId,
|
||||||
|
loserName: loserRow.fullName,
|
||||||
|
movedInterests,
|
||||||
|
movedReservations,
|
||||||
|
movedContacts,
|
||||||
|
movedAddresses,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
mergeLogId: logRow!.id,
|
||||||
|
movedRows: {
|
||||||
|
interests: movedInterests,
|
||||||
|
contacts: movedContacts,
|
||||||
|
addresses: movedAddresses,
|
||||||
|
notes: movedNotes,
|
||||||
|
tags: movedTags,
|
||||||
|
relationships: movedRelationships,
|
||||||
|
reservations: movedReservations,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Convenience: list merge candidates for a port ──────────────────────────
|
||||||
|
|
||||||
|
export interface MergeCandidatePair {
|
||||||
|
id: string;
|
||||||
|
clientAId: string;
|
||||||
|
clientBId: string;
|
||||||
|
score: number;
|
||||||
|
reasons: string[];
|
||||||
|
status: string;
|
||||||
|
createdAt: Date;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fetch pending merge candidate pairs for the admin review queue. */
|
||||||
|
export async function listPendingMergeCandidates(portId: string): Promise<MergeCandidatePair[]> {
|
||||||
|
const rows = await db
|
||||||
|
.select()
|
||||||
|
.from(clientMergeCandidates)
|
||||||
|
.where(
|
||||||
|
and(eq(clientMergeCandidates.portId, portId), eq(clientMergeCandidates.status, 'pending')),
|
||||||
|
)
|
||||||
|
.orderBy(sql`${clientMergeCandidates.score} DESC`);
|
||||||
|
|
||||||
|
return rows.map((r) => ({
|
||||||
|
id: r.id,
|
||||||
|
clientAId: r.clientAId,
|
||||||
|
clientBId: r.clientBId,
|
||||||
|
score: r.score,
|
||||||
|
reasons: Array.isArray(r.reasons) ? (r.reasons as string[]) : [],
|
||||||
|
status: r.status,
|
||||||
|
createdAt: r.createdAt,
|
||||||
|
}));
|
||||||
|
}
|
||||||
183
tests/integration/dedup/client-merge.test.ts
Normal file
183
tests/integration/dedup/client-merge.test.ts
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
/**
|
||||||
|
* Client merge service — end-to-end integration test.
|
||||||
|
*
|
||||||
|
* Spins up two real clients in a real port via the factory helpers,
|
||||||
|
* attaches a few satellites (interest, contact, address, note),
|
||||||
|
* merges them, and asserts everything survived in the right place
|
||||||
|
* with the merge log written.
|
||||||
|
*/
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import { eq } from 'drizzle-orm';
|
||||||
|
|
||||||
|
import { db } from '@/lib/db';
|
||||||
|
import { clients, clientContacts, clientNotes, clientMergeLog } from '@/lib/db/schema/clients';
|
||||||
|
import { interests } from '@/lib/db/schema/interests';
|
||||||
|
import { mergeClients } from '@/lib/services/client-merge.service';
|
||||||
|
import { makeClient, makePort, makeBerth } from '../../helpers/factories';
|
||||||
|
|
||||||
|
describe('mergeClients', () => {
|
||||||
|
it('moves interests and contacts from loser to winner; archives loser; writes merge log', async () => {
|
||||||
|
const port = await makePort();
|
||||||
|
const winner = await makeClient({
|
||||||
|
portId: port.id,
|
||||||
|
overrides: { fullName: 'Marcus Laurent' },
|
||||||
|
});
|
||||||
|
const loser = await makeClient({
|
||||||
|
portId: port.id,
|
||||||
|
overrides: { fullName: 'Marcus Laurent (dup)' },
|
||||||
|
});
|
||||||
|
|
||||||
|
// Attach contact + interest to loser
|
||||||
|
await db.insert(clientContacts).values({
|
||||||
|
clientId: loser.id,
|
||||||
|
channel: 'email',
|
||||||
|
value: 'marcus@example.com',
|
||||||
|
isPrimary: true,
|
||||||
|
});
|
||||||
|
await db.insert(clientNotes).values({
|
||||||
|
clientId: loser.id,
|
||||||
|
authorId: 'test-user',
|
||||||
|
content: 'Loser-side note',
|
||||||
|
});
|
||||||
|
const berth = await makeBerth({ portId: port.id });
|
||||||
|
await db.insert(interests).values({
|
||||||
|
portId: port.id,
|
||||||
|
clientId: loser.id,
|
||||||
|
berthId: berth.id,
|
||||||
|
pipelineStage: 'open',
|
||||||
|
leadCategory: 'general_interest',
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Merge ─────────────────────────────────────────────────────────────
|
||||||
|
const result = await mergeClients({
|
||||||
|
winnerId: winner.id,
|
||||||
|
loserId: loser.id,
|
||||||
|
mergedBy: 'test-user',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.movedRows.interests).toBe(1);
|
||||||
|
expect(result.movedRows.contacts).toBe(1);
|
||||||
|
expect(result.movedRows.notes).toBe(1);
|
||||||
|
|
||||||
|
// ── Loser should be archived with mergedIntoClientId set ──────────────
|
||||||
|
const [archivedLoser] = await db.select().from(clients).where(eq(clients.id, loser.id));
|
||||||
|
expect(archivedLoser?.archivedAt).not.toBeNull();
|
||||||
|
expect(archivedLoser?.mergedIntoClientId).toBe(winner.id);
|
||||||
|
|
||||||
|
// ── All loser-side rows now point at the winner ───────────────────────
|
||||||
|
const winnerInterests = await db
|
||||||
|
.select()
|
||||||
|
.from(interests)
|
||||||
|
.where(eq(interests.clientId, winner.id));
|
||||||
|
expect(winnerInterests).toHaveLength(1);
|
||||||
|
|
||||||
|
const winnerContacts = await db
|
||||||
|
.select()
|
||||||
|
.from(clientContacts)
|
||||||
|
.where(eq(clientContacts.clientId, winner.id));
|
||||||
|
expect(winnerContacts.find((c) => c.value === 'marcus@example.com')).toBeDefined();
|
||||||
|
|
||||||
|
const winnerNotes = await db
|
||||||
|
.select()
|
||||||
|
.from(clientNotes)
|
||||||
|
.where(eq(clientNotes.clientId, winner.id));
|
||||||
|
expect(winnerNotes.find((n) => n.content === 'Loser-side note')).toBeDefined();
|
||||||
|
|
||||||
|
// ── Merge log row exists with snapshot ────────────────────────────────
|
||||||
|
const [log] = await db
|
||||||
|
.select()
|
||||||
|
.from(clientMergeLog)
|
||||||
|
.where(eq(clientMergeLog.id, result.mergeLogId));
|
||||||
|
expect(log?.survivingClientId).toBe(winner.id);
|
||||||
|
expect(log?.mergedClientId).toBe(loser.id);
|
||||||
|
expect(log?.mergedBy).toBe('test-user');
|
||||||
|
expect(log?.mergeDetails).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('refuses to merge a client into itself', async () => {
|
||||||
|
const port = await makePort();
|
||||||
|
const c = await makeClient({ portId: port.id });
|
||||||
|
await expect(mergeClients({ winnerId: c.id, loserId: c.id, mergedBy: 'u' })).rejects.toThrow(
|
||||||
|
/itself/i,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('refuses to merge across different ports', async () => {
|
||||||
|
const portA = await makePort();
|
||||||
|
const portB = await makePort();
|
||||||
|
const a = await makeClient({ portId: portA.id });
|
||||||
|
const b = await makeClient({ portId: portB.id });
|
||||||
|
await expect(mergeClients({ winnerId: a.id, loserId: b.id, mergedBy: 'u' })).rejects.toThrow(
|
||||||
|
/different ports/i,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('refuses to merge a client that has already been merged', async () => {
|
||||||
|
const port = await makePort();
|
||||||
|
const winner = await makeClient({ portId: port.id });
|
||||||
|
const loser = await makeClient({ portId: port.id });
|
||||||
|
// First merge succeeds.
|
||||||
|
await mergeClients({ winnerId: winner.id, loserId: loser.id, mergedBy: 'u' });
|
||||||
|
// Second merge of the same loser should refuse.
|
||||||
|
const winner2 = await makeClient({ portId: port.id });
|
||||||
|
await expect(
|
||||||
|
mergeClients({ winnerId: winner2.id, loserId: loser.id, mergedBy: 'u' }),
|
||||||
|
).rejects.toThrow(/already merged/i);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('drops duplicate contact rows during reattach', async () => {
|
||||||
|
const port = await makePort();
|
||||||
|
const winner = await makeClient({ portId: port.id });
|
||||||
|
const loser = await makeClient({ portId: port.id });
|
||||||
|
|
||||||
|
// Both have the same email contact.
|
||||||
|
await db.insert(clientContacts).values({
|
||||||
|
clientId: winner.id,
|
||||||
|
channel: 'email',
|
||||||
|
value: 'same@example.com',
|
||||||
|
isPrimary: true,
|
||||||
|
});
|
||||||
|
await db.insert(clientContacts).values({
|
||||||
|
clientId: loser.id,
|
||||||
|
channel: 'email',
|
||||||
|
value: 'same@example.com',
|
||||||
|
isPrimary: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await mergeClients({
|
||||||
|
winnerId: winner.id,
|
||||||
|
loserId: loser.id,
|
||||||
|
mergedBy: 'u',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.movedRows.contacts).toBe(0); // duplicate dropped
|
||||||
|
const winnerEmails = await db
|
||||||
|
.select()
|
||||||
|
.from(clientContacts)
|
||||||
|
.where(eq(clientContacts.clientId, winner.id));
|
||||||
|
// Winner kept exactly one copy of the shared email.
|
||||||
|
expect(winnerEmails.filter((c) => c.value === 'same@example.com')).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('applies fieldChoices to copy loser values onto the winner', async () => {
|
||||||
|
const port = await makePort();
|
||||||
|
const winner = await makeClient({
|
||||||
|
portId: port.id,
|
||||||
|
overrides: { fullName: 'Marcus L.' },
|
||||||
|
});
|
||||||
|
const loser = await makeClient({
|
||||||
|
portId: port.id,
|
||||||
|
overrides: { fullName: 'Marcus Laurent' },
|
||||||
|
});
|
||||||
|
|
||||||
|
await mergeClients({
|
||||||
|
winnerId: winner.id,
|
||||||
|
loserId: loser.id,
|
||||||
|
mergedBy: 'u',
|
||||||
|
fieldChoices: { fullName: 'loser' },
|
||||||
|
});
|
||||||
|
|
||||||
|
const [updatedWinner] = await db.select().from(clients).where(eq(clients.id, winner.id));
|
||||||
|
expect(updatedWinner?.fullName).toBe('Marcus Laurent');
|
||||||
|
});
|
||||||
|
});
|
||||||
157
tests/integration/dedup/match-candidates-api.test.ts
Normal file
157
tests/integration/dedup/match-candidates-api.test.ts
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
/**
|
||||||
|
* Match-candidates API — integration test.
|
||||||
|
*
|
||||||
|
* Exercises the GET /api/v1/clients/match-candidates handler against a
|
||||||
|
* real port + clients pool. Verifies the dedup library's at-create
|
||||||
|
* suggestion path returns the right candidates and confidence tiers
|
||||||
|
* for the "use existing client?" form interruption.
|
||||||
|
*/
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
|
||||||
|
import { db } from '@/lib/db';
|
||||||
|
import { clientContacts } from '@/lib/db/schema/clients';
|
||||||
|
import { getMatchCandidatesHandler } from '@/app/api/v1/clients/match-candidates/handlers';
|
||||||
|
import { makeMockCtx, makeMockRequest } from '../../helpers/route-tester';
|
||||||
|
import { makeClient, makePort } from '../../helpers/factories';
|
||||||
|
|
||||||
|
interface MatchData {
|
||||||
|
clientId: string;
|
||||||
|
fullName: string;
|
||||||
|
score: number;
|
||||||
|
confidence: 'high' | 'medium' | 'low';
|
||||||
|
reasons: string[];
|
||||||
|
interestCount: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function callHandler(
|
||||||
|
ctx: ReturnType<typeof makeMockCtx>,
|
||||||
|
query: Record<string, string>,
|
||||||
|
): Promise<MatchData[]> {
|
||||||
|
const url = new URL('http://localhost/api/v1/clients/match-candidates');
|
||||||
|
for (const [k, v] of Object.entries(query)) url.searchParams.set(k, v);
|
||||||
|
const req = makeMockRequest('GET', url.toString());
|
||||||
|
const res = await getMatchCandidatesHandler(req, ctx);
|
||||||
|
expect(res.status).toBe(200);
|
||||||
|
const body = await res.json();
|
||||||
|
return body.data as MatchData[];
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('GET /api/v1/clients/match-candidates', () => {
|
||||||
|
it('returns empty when nothing actionable was provided', async () => {
|
||||||
|
const port = await makePort();
|
||||||
|
const ctx = makeMockCtx({ portId: port.id });
|
||||||
|
const data = await callHandler(ctx, {});
|
||||||
|
expect(data).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('finds an existing client by exact email match (high confidence)', async () => {
|
||||||
|
const port = await makePort();
|
||||||
|
const ctx = makeMockCtx({ portId: port.id });
|
||||||
|
const existing = await makeClient({
|
||||||
|
portId: port.id,
|
||||||
|
overrides: { fullName: 'Marcus Laurent' },
|
||||||
|
});
|
||||||
|
await db.insert(clientContacts).values({
|
||||||
|
clientId: existing.id,
|
||||||
|
channel: 'email',
|
||||||
|
value: 'marcus@example.com',
|
||||||
|
isPrimary: true,
|
||||||
|
});
|
||||||
|
await db.insert(clientContacts).values({
|
||||||
|
clientId: existing.id,
|
||||||
|
channel: 'phone',
|
||||||
|
value: '+15551234567',
|
||||||
|
valueE164: '+15551234567',
|
||||||
|
isPrimary: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = await callHandler(ctx, {
|
||||||
|
email: 'Marcus@example.com',
|
||||||
|
phone: '+15551234567',
|
||||||
|
name: 'Marcus Laurent',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(data).toHaveLength(1);
|
||||||
|
expect(data[0]!.clientId).toBe(existing.id);
|
||||||
|
expect(data[0]!.confidence).toBe('high');
|
||||||
|
expect(data[0]!.reasons).toEqual(expect.arrayContaining(['email match', 'phone match']));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not surface unrelated clients in the same port', async () => {
|
||||||
|
const port = await makePort();
|
||||||
|
const ctx = makeMockCtx({ portId: port.id });
|
||||||
|
const target = await makeClient({
|
||||||
|
portId: port.id,
|
||||||
|
overrides: { fullName: 'Marcus Laurent' },
|
||||||
|
});
|
||||||
|
await db.insert(clientContacts).values({
|
||||||
|
clientId: target.id,
|
||||||
|
channel: 'email',
|
||||||
|
value: 'marcus@example.com',
|
||||||
|
isPrimary: true,
|
||||||
|
});
|
||||||
|
// An unrelated client.
|
||||||
|
const unrelated = await makeClient({
|
||||||
|
portId: port.id,
|
||||||
|
overrides: { fullName: 'Bob Smith' },
|
||||||
|
});
|
||||||
|
await db.insert(clientContacts).values({
|
||||||
|
clientId: unrelated.id,
|
||||||
|
channel: 'email',
|
||||||
|
value: 'bob@example.org',
|
||||||
|
isPrimary: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = await callHandler(ctx, { email: 'marcus@example.com' });
|
||||||
|
expect(data.map((d) => d.clientId)).toEqual([target.id]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns medium-confidence partial matches', async () => {
|
||||||
|
// Same name, different contact info — Pattern F territory.
|
||||||
|
const port = await makePort();
|
||||||
|
const ctx = makeMockCtx({ portId: port.id });
|
||||||
|
const existing = await makeClient({
|
||||||
|
portId: port.id,
|
||||||
|
overrides: { fullName: 'Etiennette Clamouze' },
|
||||||
|
});
|
||||||
|
await db.insert(clientContacts).values({
|
||||||
|
clientId: existing.id,
|
||||||
|
channel: 'email',
|
||||||
|
value: 'clamouze.etiennette@gmail.com',
|
||||||
|
isPrimary: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = await callHandler(ctx, {
|
||||||
|
// Different email + phone, same name.
|
||||||
|
email: 'etiennette@the-manoah.com',
|
||||||
|
name: 'Etiennette Clamouze',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Either no match (low confidence filtered out) or a medium one —
|
||||||
|
// either is fine. Critically, NOT high.
|
||||||
|
if (data.length > 0) {
|
||||||
|
expect(data[0]!.confidence).not.toBe('high');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not leak across ports', async () => {
|
||||||
|
const portA = await makePort();
|
||||||
|
const portB = await makePort();
|
||||||
|
|
||||||
|
const ctxA = makeMockCtx({ portId: portA.id });
|
||||||
|
const inB = await makeClient({
|
||||||
|
portId: portB.id,
|
||||||
|
overrides: { fullName: 'In Port B' },
|
||||||
|
});
|
||||||
|
await db.insert(clientContacts).values({
|
||||||
|
clientId: inB.id,
|
||||||
|
channel: 'email',
|
||||||
|
value: 'b@example.com',
|
||||||
|
isPrimary: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Caller is in port A, asking for an email that lives in port B.
|
||||||
|
const data = await callHandler(ctxA, { email: 'b@example.com' });
|
||||||
|
expect(data).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
379
tests/unit/dedup/find-matches.test.ts
Normal file
379
tests/unit/dedup/find-matches.test.ts
Normal file
@@ -0,0 +1,379 @@
|
|||||||
|
/**
|
||||||
|
* Match-finding library — unit tests.
|
||||||
|
*
|
||||||
|
* Each duplicate cluster from the legacy NocoDB Interests audit (see
|
||||||
|
* docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §1.2)
|
||||||
|
* is encoded as a fixture here. The expected scoring tier (high / medium
|
||||||
|
* / low) is the design contract — if the algorithm starts returning
|
||||||
|
* "high" for a Pattern F case (Etiennette / Bruno+Bruce) it has lost
|
||||||
|
* the false-positive guard and we'll know immediately.
|
||||||
|
*/
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
|
||||||
|
import { findClientMatches, type MatchCandidate } from '@/lib/dedup/find-matches';
|
||||||
|
|
||||||
|
// Sensible defaults for tests — match the design's recommended thresholds.
|
||||||
|
const THRESHOLDS = {
|
||||||
|
highScore: 90,
|
||||||
|
mediumScore: 50,
|
||||||
|
};
|
||||||
|
|
||||||
|
function candidate(partial: Partial<MatchCandidate> & { id: string }): MatchCandidate {
|
||||||
|
return {
|
||||||
|
id: partial.id,
|
||||||
|
fullName: partial.fullName ?? null,
|
||||||
|
surnameToken: partial.surnameToken ?? null,
|
||||||
|
emails: partial.emails ?? [],
|
||||||
|
phonesE164: partial.phonesE164 ?? [],
|
||||||
|
countryIso: partial.countryIso ?? null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('findClientMatches', () => {
|
||||||
|
describe('Pattern A — pure double-submit (high confidence)', () => {
|
||||||
|
it('flags identical email + phone as high', () => {
|
||||||
|
// From real data: Deepak Ramchandani #624/#625, identical fields.
|
||||||
|
const incoming = candidate({
|
||||||
|
id: 'b',
|
||||||
|
fullName: 'Deepak Ramchandani',
|
||||||
|
surnameToken: 'ramchandani',
|
||||||
|
emails: ['dannyrams8888@gmail.com'],
|
||||||
|
phonesE164: ['+17215868888'],
|
||||||
|
});
|
||||||
|
const pool = [
|
||||||
|
candidate({
|
||||||
|
id: 'a',
|
||||||
|
fullName: 'Deepak Ramchandani',
|
||||||
|
surnameToken: 'ramchandani',
|
||||||
|
emails: ['dannyrams8888@gmail.com'],
|
||||||
|
phonesE164: ['+17215868888'],
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
const matches = findClientMatches(incoming, pool, THRESHOLDS);
|
||||||
|
|
||||||
|
expect(matches).toHaveLength(1);
|
||||||
|
expect(matches[0]!.candidate.id).toBe('a');
|
||||||
|
expect(matches[0]!.score).toBeGreaterThanOrEqual(90);
|
||||||
|
expect(matches[0]!.confidence).toBe('high');
|
||||||
|
expect(matches[0]!.reasons).toEqual(expect.arrayContaining(['email match', 'phone match']));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Pattern B — same email, different phone format (high)', () => {
|
||||||
|
it('high confidence when phones already normalize-equal', () => {
|
||||||
|
// From real data: Howard Wiarda #236/#536, "574-274-0548" vs "+15742740548".
|
||||||
|
// After normalization both phones are the same E.164, so the rule fires.
|
||||||
|
const incoming = candidate({
|
||||||
|
id: 'b',
|
||||||
|
fullName: 'Howard Wiarda',
|
||||||
|
surnameToken: 'wiarda',
|
||||||
|
emails: ['hwiarda@hotmail.com'],
|
||||||
|
phonesE164: ['+15742740548'],
|
||||||
|
});
|
||||||
|
const pool = [
|
||||||
|
candidate({
|
||||||
|
id: 'a',
|
||||||
|
fullName: 'Howard Wiarda',
|
||||||
|
surnameToken: 'wiarda',
|
||||||
|
emails: ['hwiarda@hotmail.com'],
|
||||||
|
phonesE164: ['+15742740548'],
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
const matches = findClientMatches(incoming, pool, THRESHOLDS);
|
||||||
|
|
||||||
|
expect(matches[0]!.confidence).toBe('high');
|
||||||
|
expect(matches[0]!.score).toBeGreaterThanOrEqual(90);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Pattern C — name capitalization variant (high)', () => {
|
||||||
|
it('treats lowercase + uppercase as the same person when surname-token + email + phone all match', () => {
|
||||||
|
// From real data: Nicolas Ruiz #681/#682/#683, email differs only by case.
|
||||||
|
const incoming = candidate({
|
||||||
|
id: 'b',
|
||||||
|
fullName: 'Nicolas Ruiz',
|
||||||
|
surnameToken: 'ruiz',
|
||||||
|
emails: ['ruiz.nicolas@ufl.edu'],
|
||||||
|
phonesE164: ['+17862006617'],
|
||||||
|
});
|
||||||
|
const pool = [
|
||||||
|
candidate({
|
||||||
|
id: 'a',
|
||||||
|
fullName: 'Nicolas Ruiz',
|
||||||
|
surnameToken: 'ruiz',
|
||||||
|
emails: ['ruiz.nicolas@ufl.edu'],
|
||||||
|
phonesE164: ['+17862006617'],
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
const matches = findClientMatches(incoming, pool, THRESHOLDS);
|
||||||
|
|
||||||
|
expect(matches[0]!.confidence).toBe('high');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Pattern D — name shortening (high)', () => {
|
||||||
|
it('Chris vs Christopher with same email + phone scores high', () => {
|
||||||
|
// From real data: Chris Allen #700 vs Christopher Allen #534.
|
||||||
|
const incoming = candidate({
|
||||||
|
id: 'b',
|
||||||
|
fullName: 'Chris Allen',
|
||||||
|
surnameToken: 'allen',
|
||||||
|
emails: ['chris@thundercatsports.com'],
|
||||||
|
phonesE164: ['+17814548950'],
|
||||||
|
});
|
||||||
|
const pool = [
|
||||||
|
candidate({
|
||||||
|
id: 'a',
|
||||||
|
fullName: 'Christopher Allen',
|
||||||
|
surnameToken: 'allen',
|
||||||
|
emails: ['chris@thundercatsports.com'],
|
||||||
|
phonesE164: ['+17814548950'],
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
const matches = findClientMatches(incoming, pool, THRESHOLDS);
|
||||||
|
|
||||||
|
expect(matches[0]!.confidence).toBe('high');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Pattern E — typo on resubmit', () => {
|
||||||
|
it('same email + nearly-identical phone (typo in last digits) scores high', () => {
|
||||||
|
// Christopher Camazou #649/#650 — phone differs in last 4 digits but
|
||||||
|
// everything else matches. Exact phone equality fails; email exact
|
||||||
|
// match alone (60) + name-token match (20) puts us in medium tier.
|
||||||
|
// The user can confirm the merge.
|
||||||
|
const incoming = candidate({
|
||||||
|
id: 'b',
|
||||||
|
fullName: 'Christopher Camazou',
|
||||||
|
surnameToken: 'camazou',
|
||||||
|
emails: ['camazou11@gmail.com'],
|
||||||
|
phonesE164: ['+33608334455'],
|
||||||
|
});
|
||||||
|
const pool = [
|
||||||
|
candidate({
|
||||||
|
id: 'a',
|
||||||
|
fullName: 'Christopher Camazou',
|
||||||
|
surnameToken: 'camazou',
|
||||||
|
emails: ['camazou11@gmail.com'],
|
||||||
|
phonesE164: ['+33608336549'],
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
const matches = findClientMatches(incoming, pool, THRESHOLDS);
|
||||||
|
|
||||||
|
expect(matches).toHaveLength(1);
|
||||||
|
// Email + name match without phone match — strong but not certain.
|
||||||
|
expect(matches[0]!.confidence).toMatch(/^(high|medium)$/);
|
||||||
|
expect(matches[0]!.score).toBeGreaterThanOrEqual(70);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('Constanzo / Costanzo surname typo with same email + phone scores high', () => {
|
||||||
|
// Gianfranco Di Constanzo #585 vs Di Costanzo #336 — same email + phone
|
||||||
|
// and only a 1-letter surname typo. This is a strong "same client,
|
||||||
|
// multiple yachts" signal — the design's signature win.
|
||||||
|
const incoming = candidate({
|
||||||
|
id: 'b',
|
||||||
|
fullName: 'Gianfranco Di Constanzo',
|
||||||
|
surnameToken: 'constanzo',
|
||||||
|
emails: ['gdc@nauticall.com'],
|
||||||
|
phonesE164: ['+17542628669'],
|
||||||
|
});
|
||||||
|
const pool = [
|
||||||
|
candidate({
|
||||||
|
id: 'a',
|
||||||
|
fullName: 'Gianfranco Di Costanzo',
|
||||||
|
surnameToken: 'costanzo',
|
||||||
|
emails: ['gdc@nauticall.com'],
|
||||||
|
phonesE164: ['+17542628669'],
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
const matches = findClientMatches(incoming, pool, THRESHOLDS);
|
||||||
|
|
||||||
|
expect(matches[0]!.confidence).toBe('high');
|
||||||
|
expect(matches[0]!.score).toBeGreaterThanOrEqual(90);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Pattern F — hard cases (must NOT auto-merge)', () => {
|
||||||
|
it('same name with different country phone + different email scores at most medium', () => {
|
||||||
|
// Etiennette Clamouze #188/#717 — same name but completely different
|
||||||
|
// email + phone (and the phones are in different country codes,
|
||||||
|
// suggesting either a relative, a coworker, or a name-collision).
|
||||||
|
// We must NOT classify this as "high" or it would force-merge two
|
||||||
|
// distinct people.
|
||||||
|
const incoming = candidate({
|
||||||
|
id: 'b',
|
||||||
|
fullName: 'Etiennette Clamouze',
|
||||||
|
surnameToken: 'clamouze',
|
||||||
|
emails: ['etiennette@the-manoah.com'],
|
||||||
|
phonesE164: ['+12645815607'],
|
||||||
|
countryIso: 'AI',
|
||||||
|
});
|
||||||
|
const pool = [
|
||||||
|
candidate({
|
||||||
|
id: 'a',
|
||||||
|
fullName: 'Etiennette Clamouze',
|
||||||
|
surnameToken: 'clamouze',
|
||||||
|
emails: ['clamouze.etiennette@gmail.com'],
|
||||||
|
phonesE164: ['+33767780640'],
|
||||||
|
countryIso: 'FR',
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
const matches = findClientMatches(incoming, pool, THRESHOLDS);
|
||||||
|
|
||||||
|
// Surname-token + name-exact match should score in medium tier so
|
||||||
|
// the pair lands in the review queue but doesn't auto-merge.
|
||||||
|
if (matches.length > 0) {
|
||||||
|
expect(matches[0]!.confidence).not.toBe('high');
|
||||||
|
expect(matches[0]!.score).toBeLessThan(90);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('shared email between two clearly different names is medium not high', () => {
|
||||||
|
// Bruno Joyerot #18 vs Bruce Hearn #19 — Bruno's row shows email
|
||||||
|
// belonging to "catherine elaine hearn" (Bruce's spouse). Same
|
||||||
|
// household phone area code. Name overlap is partial. Don't merge.
|
||||||
|
const incoming = candidate({
|
||||||
|
id: 'b',
|
||||||
|
fullName: 'Bruce Hearn',
|
||||||
|
surnameToken: 'hearn',
|
||||||
|
emails: ['bhearn1063@gmail.com'],
|
||||||
|
phonesE164: ['+12642358840'],
|
||||||
|
});
|
||||||
|
const pool = [
|
||||||
|
candidate({
|
||||||
|
id: 'a',
|
||||||
|
fullName: 'Bruno Joyerot',
|
||||||
|
surnameToken: 'joyerot',
|
||||||
|
emails: ['catherineelainehearn@gmail.com'],
|
||||||
|
phonesE164: ['+12642352816'],
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
const matches = findClientMatches(incoming, pool, THRESHOLDS);
|
||||||
|
|
||||||
|
// Names don't match, emails don't match, phones differ — there's
|
||||||
|
// no reason for this to surface at all. Either no match or low.
|
||||||
|
if (matches.length > 0) {
|
||||||
|
expect(matches[0]!.confidence).toBe('low');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Negative evidence — same email but different country phone', () => {
|
||||||
|
it('reduces score when email matches but phone country differs', () => {
|
||||||
|
// Constructed: same email, but one phone is +33 (FR) and the other
|
||||||
|
// is +1 (US). Likely a shared-inbox spouse situation. We want
|
||||||
|
// medium tier so it lands in review, not high tier.
|
||||||
|
const incoming = candidate({
|
||||||
|
id: 'b',
|
||||||
|
fullName: 'Test User',
|
||||||
|
surnameToken: 'user',
|
||||||
|
emails: ['shared@example.com'],
|
||||||
|
phonesE164: ['+15551234567'],
|
||||||
|
countryIso: 'US',
|
||||||
|
});
|
||||||
|
const pool = [
|
||||||
|
candidate({
|
||||||
|
id: 'a',
|
||||||
|
fullName: 'Test User',
|
||||||
|
surnameToken: 'user',
|
||||||
|
emails: ['shared@example.com'],
|
||||||
|
phonesE164: ['+33611111111'],
|
||||||
|
countryIso: 'FR',
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
const matches = findClientMatches(incoming, pool, THRESHOLDS);
|
||||||
|
|
||||||
|
// Email match alone would be 60 + name token match 20 = 80 (medium).
|
||||||
|
// Negative evidence (different phone country) brings it down further.
|
||||||
|
expect(matches[0]!.confidence).toBe('medium');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Blocking — only relevant candidates are scored', () => {
|
||||||
|
it('does not score candidates with no shared emails / phones / surname token', () => {
|
||||||
|
const incoming = candidate({
|
||||||
|
id: 'newbie',
|
||||||
|
fullName: 'Alice Smith',
|
||||||
|
surnameToken: 'smith',
|
||||||
|
emails: ['alice@example.com'],
|
||||||
|
phonesE164: ['+15551234567'],
|
||||||
|
});
|
||||||
|
const pool = [
|
||||||
|
candidate({
|
||||||
|
id: 'unrelated1',
|
||||||
|
fullName: 'Bob Jones',
|
||||||
|
surnameToken: 'jones',
|
||||||
|
emails: ['bob@example.org'],
|
||||||
|
phonesE164: ['+33611111111'],
|
||||||
|
}),
|
||||||
|
candidate({
|
||||||
|
id: 'unrelated2',
|
||||||
|
fullName: 'Carol White',
|
||||||
|
surnameToken: 'white',
|
||||||
|
emails: ['carol@example.net'],
|
||||||
|
phonesE164: ['+447700900111'],
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
const matches = findClientMatches(incoming, pool, THRESHOLDS);
|
||||||
|
|
||||||
|
expect(matches).toHaveLength(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Empty pool', () => {
|
||||||
|
it('returns no matches when the pool is empty', () => {
|
||||||
|
const incoming = candidate({
|
||||||
|
id: 'a',
|
||||||
|
fullName: 'Alice',
|
||||||
|
emails: ['alice@example.com'],
|
||||||
|
});
|
||||||
|
expect(findClientMatches(incoming, [], THRESHOLDS)).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Sort order', () => {
|
||||||
|
it('returns matches sorted by score descending', () => {
|
||||||
|
const incoming = candidate({
|
||||||
|
id: 'incoming',
|
||||||
|
fullName: 'John Smith',
|
||||||
|
surnameToken: 'smith',
|
||||||
|
emails: ['john@example.com'],
|
||||||
|
phonesE164: ['+15551234567'],
|
||||||
|
});
|
||||||
|
const pool = [
|
||||||
|
candidate({
|
||||||
|
// High match — same email + phone
|
||||||
|
id: 'high-match',
|
||||||
|
fullName: 'John Smith',
|
||||||
|
surnameToken: 'smith',
|
||||||
|
emails: ['john@example.com'],
|
||||||
|
phonesE164: ['+15551234567'],
|
||||||
|
}),
|
||||||
|
candidate({
|
||||||
|
// Medium match — same email only
|
||||||
|
id: 'medium-match',
|
||||||
|
fullName: 'Different Person',
|
||||||
|
surnameToken: 'person',
|
||||||
|
emails: ['john@example.com'],
|
||||||
|
phonesE164: ['+33611111111'],
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
const matches = findClientMatches(incoming, pool, THRESHOLDS);
|
||||||
|
|
||||||
|
expect(matches.length).toBeGreaterThanOrEqual(2);
|
||||||
|
expect(matches[0]!.candidate.id).toBe('high-match');
|
||||||
|
expect(matches[0]!.score).toBeGreaterThan(matches[1]!.score);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
213
tests/unit/dedup/migration-transform.test.ts
Normal file
213
tests/unit/dedup/migration-transform.test.ts
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
/**
|
||||||
|
* Migration transform — fixture-based regression test.
|
||||||
|
*
|
||||||
|
* Feeds the transform a small frozen NocoDB snapshot containing one
|
||||||
|
* representative row from each duplicate pattern documented in
|
||||||
|
* docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §1.2,
|
||||||
|
* and asserts the resulting plan matches the algorithm's expected
|
||||||
|
* behavior. If any future change starts merging Pattern F (Etiennette
|
||||||
|
* Clamouze) or stops merging Pattern A (Deepak Ramchandani), this
|
||||||
|
* test fails immediately.
|
||||||
|
*/
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
|
||||||
|
import { transformSnapshot } from '@/lib/dedup/migration-transform';
|
||||||
|
import type { NocoDbRow, NocoDbSnapshot } from '@/lib/dedup/nocodb-source';
|
||||||
|
|
||||||
|
function row(fields: Partial<NocoDbRow> & { Id: number }): NocoDbRow {
|
||||||
|
return fields as NocoDbRow;
|
||||||
|
}
|
||||||
|
|
||||||
|
const FIXTURE: NocoDbSnapshot = {
|
||||||
|
fetchedAt: '2026-05-03T12:00:00.000Z',
|
||||||
|
berths: [],
|
||||||
|
residentialInterests: [],
|
||||||
|
websiteInterestSubmissions: [],
|
||||||
|
websiteContactFormSubmissions: [],
|
||||||
|
websiteBerthEoiSupplements: [],
|
||||||
|
interests: [
|
||||||
|
// Pattern A: pure double-submit (Deepak Ramchandani #624/#625)
|
||||||
|
row({
|
||||||
|
Id: 624,
|
||||||
|
'Full Name': 'Deepak Ramchandani',
|
||||||
|
'Email Address': 'dannyrams8888@gmail.com',
|
||||||
|
'Phone Number': '+17215868888',
|
||||||
|
'Sales Process Level': 'General Qualified Interest',
|
||||||
|
}),
|
||||||
|
row({
|
||||||
|
Id: 625,
|
||||||
|
'Full Name': 'Deepak Ramchandani',
|
||||||
|
'Email Address': 'dannyrams8888@gmail.com',
|
||||||
|
'Phone Number': '+17215868888',
|
||||||
|
'Sales Process Level': 'General Qualified Interest',
|
||||||
|
}),
|
||||||
|
|
||||||
|
// Pattern B: phone format variance (Howard Wiarda #236/#536)
|
||||||
|
row({
|
||||||
|
Id: 236,
|
||||||
|
'Full Name': 'Howard Wiarda',
|
||||||
|
'Email Address': 'hwiarda@hotmail.com',
|
||||||
|
'Phone Number': '574-274-0548',
|
||||||
|
'Place of Residence': 'USA',
|
||||||
|
'Sales Process Level': 'General Qualified Interest',
|
||||||
|
}),
|
||||||
|
row({
|
||||||
|
Id: 536,
|
||||||
|
'Full Name': 'Howard Wiarda',
|
||||||
|
'Email Address': 'hwiarda@hotmail.com',
|
||||||
|
'Phone Number': '+15742740548',
|
||||||
|
'Sales Process Level': 'General Qualified Interest',
|
||||||
|
}),
|
||||||
|
|
||||||
|
// Pattern C: name capitalization (Nicolas Ruiz #681/#682/#683 — three rows)
|
||||||
|
row({
|
||||||
|
Id: 681,
|
||||||
|
'Full Name': 'Nicolas Ruiz',
|
||||||
|
'Email Address': 'ruiz.nicolas@ufl.edu',
|
||||||
|
'Phone Number': '+17862006617',
|
||||||
|
'Sales Process Level': 'General Qualified Interest',
|
||||||
|
}),
|
||||||
|
row({
|
||||||
|
Id: 682,
|
||||||
|
'Full Name': 'Nicolas Ruiz',
|
||||||
|
'Email Address': 'ruiz.nicolas@ufl.edu',
|
||||||
|
'Phone Number': '+17862006617',
|
||||||
|
'Sales Process Level': 'Specific Qualified Interest',
|
||||||
|
}),
|
||||||
|
row({
|
||||||
|
Id: 683,
|
||||||
|
'Full Name': 'Nicolas Ruiz',
|
||||||
|
'Email Address': 'Ruiz.Nicolas@ufl.edu',
|
||||||
|
'Phone Number': '+17862006617',
|
||||||
|
'Sales Process Level': 'General Qualified Interest',
|
||||||
|
}),
|
||||||
|
|
||||||
|
// Pattern E: surname typo with same email + phone (Constanzo/Costanzo)
|
||||||
|
row({
|
||||||
|
Id: 336,
|
||||||
|
'Full Name': 'Gianfranco Di Costanzo',
|
||||||
|
'Email Address': 'gdc@nauticall.com',
|
||||||
|
'Phone Number': '+17542628669',
|
||||||
|
'Yacht Name': 'GEMINI',
|
||||||
|
'Sales Process Level': 'Contract Signed',
|
||||||
|
}),
|
||||||
|
row({
|
||||||
|
Id: 585,
|
||||||
|
'Full Name': 'Gianfranco Di Constanzo',
|
||||||
|
'Email Address': 'gdc@nauticall.com',
|
||||||
|
'Phone Number': '+17542628669',
|
||||||
|
'Yacht Name': 'CALYPSO',
|
||||||
|
'Sales Process Level': 'Signed EOI and NDA',
|
||||||
|
}),
|
||||||
|
|
||||||
|
// Pattern F: same name, different country phones (Etiennette Clamouze)
|
||||||
|
row({
|
||||||
|
Id: 188,
|
||||||
|
'Full Name': 'Etiennette Clamouze',
|
||||||
|
'Email Address': 'clamouze.etiennette@gmail.com',
|
||||||
|
'Phone Number': '+33767780640',
|
||||||
|
'Sales Process Level': 'General Qualified Interest',
|
||||||
|
}),
|
||||||
|
row({
|
||||||
|
Id: 717,
|
||||||
|
'Full Name': 'Etiennette Clamouze',
|
||||||
|
'Email Address': 'Etiennette@the-manoah.com',
|
||||||
|
'Phone Number': '+12645815607',
|
||||||
|
'Sales Process Level': 'General Qualified Interest',
|
||||||
|
}),
|
||||||
|
|
||||||
|
// Single isolated row to verify non-duplicates pass through
|
||||||
|
row({
|
||||||
|
Id: 999,
|
||||||
|
'Full Name': 'Lone Wolf',
|
||||||
|
'Email Address': 'lone@example.com',
|
||||||
|
'Phone Number': '+15551234567',
|
||||||
|
'Sales Process Level': 'General Qualified Interest',
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
describe('transformSnapshot — fixture regression', () => {
|
||||||
|
it('produces the expected number of clients + interests', () => {
|
||||||
|
const plan = transformSnapshot(FIXTURE);
|
||||||
|
|
||||||
|
// 12 input rows → 7 unique clients (Deepak: 1, Wiarda: 1, Ruiz: 1,
|
||||||
|
// Constanzo: 1, Etiennette x2: 2, Lone: 1). Etiennette stays as 2
|
||||||
|
// because Pattern F is correctly NOT auto-merged.
|
||||||
|
expect(plan.stats.outputClients).toBe(7);
|
||||||
|
expect(plan.stats.outputInterests).toBe(12); // one per source row
|
||||||
|
});
|
||||||
|
|
||||||
|
it('auto-links every Pattern A–E cluster', () => {
|
||||||
|
const plan = transformSnapshot(FIXTURE);
|
||||||
|
const linkedSourceIds = new Set<number>();
|
||||||
|
for (const link of plan.autoLinks) {
|
||||||
|
linkedSourceIds.add(link.leadSourceId);
|
||||||
|
for (const merged of link.mergedSourceIds) {
|
||||||
|
linkedSourceIds.add(merged);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pattern A: 624 + 625
|
||||||
|
expect(linkedSourceIds.has(624) && linkedSourceIds.has(625)).toBe(true);
|
||||||
|
// Pattern B: 236 + 536
|
||||||
|
expect(linkedSourceIds.has(236) && linkedSourceIds.has(536)).toBe(true);
|
||||||
|
// Pattern C: 681 + 682 + 683 (three-way)
|
||||||
|
expect(linkedSourceIds.has(681) && linkedSourceIds.has(682) && linkedSourceIds.has(683)).toBe(
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
// Pattern E: 336 + 585
|
||||||
|
expect(linkedSourceIds.has(336) && linkedSourceIds.has(585)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does NOT auto-link Pattern F (Etiennette Clamouze, different country)', () => {
|
||||||
|
const plan = transformSnapshot(FIXTURE);
|
||||||
|
const linkedSourceIds = new Set<number>();
|
||||||
|
for (const link of plan.autoLinks) {
|
||||||
|
linkedSourceIds.add(link.leadSourceId);
|
||||||
|
for (const merged of link.mergedSourceIds) {
|
||||||
|
linkedSourceIds.add(merged);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Both Etiennette rows must remain as separate clients.
|
||||||
|
expect(linkedSourceIds.has(188)).toBe(false);
|
||||||
|
expect(linkedSourceIds.has(717)).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves every interest as its own row even when clients merge', () => {
|
||||||
|
const plan = transformSnapshot(FIXTURE);
|
||||||
|
const sourceIds = plan.interests.map((i) => i.sourceId).sort((a, b) => a - b);
|
||||||
|
expect(sourceIds).toEqual([188, 236, 336, 536, 585, 624, 625, 681, 682, 683, 717, 999]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('maps the legacy 8-stage enum to new pipeline stages', () => {
|
||||||
|
const plan = transformSnapshot(FIXTURE);
|
||||||
|
const stagesById = new Map(plan.interests.map((i) => [i.sourceId, i.pipelineStage]));
|
||||||
|
expect(stagesById.get(681)).toBe('open'); // General Qualified Interest
|
||||||
|
expect(stagesById.get(682)).toBe('details_sent'); // Specific Qualified Interest
|
||||||
|
expect(stagesById.get(336)).toBe('contract_signed'); // Contract Signed
|
||||||
|
expect(stagesById.get(585)).toBe('eoi_signed'); // Signed EOI and NDA
|
||||||
|
});
|
||||||
|
|
||||||
|
it('attaches different yachts to one merged Constanzo client', () => {
|
||||||
|
const plan = transformSnapshot(FIXTURE);
|
||||||
|
const constanzoClient = plan.clients.find(
|
||||||
|
(c) => c.sourceIds.includes(336) && c.sourceIds.includes(585),
|
||||||
|
);
|
||||||
|
expect(constanzoClient).toBeDefined();
|
||||||
|
const yachtsForConstanzo = plan.interests
|
||||||
|
.filter((i) => i.clientTempId === constanzoClient!.tempId)
|
||||||
|
.map((i) => i.yachtName)
|
||||||
|
.sort();
|
||||||
|
expect(yachtsForConstanzo).toEqual(['CALYPSO', 'GEMINI']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('produces deterministic output (same input → same plan)', () => {
|
||||||
|
// The transform is pure — running it twice should yield bit-identical
|
||||||
|
// results. Catches order-dependent bugs in the dedup clustering.
|
||||||
|
const a = transformSnapshot(FIXTURE);
|
||||||
|
const b = transformSnapshot(FIXTURE);
|
||||||
|
expect(JSON.stringify(a.stats)).toBe(JSON.stringify(b.stats));
|
||||||
|
expect(a.autoLinks.length).toBe(b.autoLinks.length);
|
||||||
|
});
|
||||||
|
});
|
||||||
270
tests/unit/dedup/normalize.test.ts
Normal file
270
tests/unit/dedup/normalize.test.ts
Normal file
@@ -0,0 +1,270 @@
|
|||||||
|
/**
|
||||||
|
* Normalization library — unit tests.
|
||||||
|
*
|
||||||
|
* Every fixture here comes from real dirty values observed in the legacy
|
||||||
|
* NocoDB Interests table during the 2026-05-03 audit (see
|
||||||
|
* docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §1.3).
|
||||||
|
* The point is regression-prevention: if any of these patterns ever
|
||||||
|
* stops normalizing the way it should, dedup quality silently drops.
|
||||||
|
*/
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
|
||||||
|
import {
|
||||||
|
normalizeName,
|
||||||
|
normalizeEmail,
|
||||||
|
normalizePhone,
|
||||||
|
resolveCountry,
|
||||||
|
} from '@/lib/dedup/normalize';
|
||||||
|
|
||||||
|
describe('normalizeName', () => {
|
||||||
|
it('returns null fields for empty / null input', () => {
|
||||||
|
expect(normalizeName('')).toEqual({ display: '', normalized: '', surnameToken: undefined });
|
||||||
|
expect(normalizeName(' ')).toEqual({
|
||||||
|
display: '',
|
||||||
|
normalized: '',
|
||||||
|
surnameToken: undefined,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('trims leading/trailing whitespace', () => {
|
||||||
|
expect(normalizeName(' Marcus Laurent ')).toMatchObject({
|
||||||
|
display: 'Marcus Laurent',
|
||||||
|
normalized: 'marcus laurent',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('collapses repeated internal whitespace to a single space', () => {
|
||||||
|
// From real data: "Arthur Matthews" (#183), "Corinne Roche" (#208).
|
||||||
|
expect(normalizeName('Arthur Matthews').display).toBe('Arthur Matthews');
|
||||||
|
expect(normalizeName('Corinne Roche').display).toBe('Corinne Roche');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('replaces embedded carriage returns and newlines with single spaces', () => {
|
||||||
|
// From real data: "Andrei \nVAGNANOV" (#178), "Daniel\r PRZEDBORSKI" (#175).
|
||||||
|
expect(normalizeName('Andrei \nVAGNANOV').display).toBe('Andrei Vagnanov');
|
||||||
|
expect(normalizeName('Daniel\r PRZEDBORSKI').display).toBe('Daniel Przedborski');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('title-cases ALL-CAPS surnames while keeping given name title-cased', () => {
|
||||||
|
// From real data: "Jona ANDERSEN" (#232), "Duane SALTSGAVER" (#227),
|
||||||
|
// "Marcos DALLA PRIA" (#165).
|
||||||
|
expect(normalizeName('Jona ANDERSEN').display).toBe('Jona Andersen');
|
||||||
|
expect(normalizeName('Duane SALTSGAVER').display).toBe('Duane Saltsgaver');
|
||||||
|
// Particle 'dalla' stays lowercase mid-name.
|
||||||
|
expect(normalizeName('Marcos DALLA PRIA').display).toBe('Marcos dalla Pria');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('title-cases lowercased entries', () => {
|
||||||
|
// From real data: "antony amaral" (#665), "david rosenbloom" (#239),
|
||||||
|
// "john Tickner" (#247).
|
||||||
|
expect(normalizeName('antony amaral').display).toBe('Antony Amaral');
|
||||||
|
expect(normalizeName('david rosenbloom').display).toBe('David Rosenbloom');
|
||||||
|
expect(normalizeName('john Tickner').display).toBe('John Tickner');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps Romance and Germanic particles lowercase mid-name', () => {
|
||||||
|
// From real data: "Olav van Velsen" (#526), "Bruno Joyerot" (#18),
|
||||||
|
// "OLIVIER DAIN" (#677). Also synthetic "Carla de la Cruz".
|
||||||
|
expect(normalizeName('Olav van Velsen').display).toBe('Olav van Velsen');
|
||||||
|
expect(normalizeName('Carla de la Cruz').display).toBe('Carla de la Cruz');
|
||||||
|
expect(normalizeName('OLIVIER DAIN').display).toBe('Olivier Dain');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves O‘-prefixed Irish surnames as title-case', () => {
|
||||||
|
expect(normalizeName("liam o'brien").display).toBe("Liam O'Brien");
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps the slash-with-company structure intact', () => {
|
||||||
|
// From real data: "Daniel Wainstein / 7 Knots, LLC" (#637),
|
||||||
|
// "Bruno Joyerot / SAS TIKI" (#18).
|
||||||
|
expect(normalizeName('Daniel Wainstein / 7 Knots, LLC').display).toBe(
|
||||||
|
'Daniel Wainstein / 7 Knots, LLC',
|
||||||
|
);
|
||||||
|
expect(normalizeName('Bruno Joyerot / SAS TIKI').display).toBe('Bruno Joyerot / SAS TIKI');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('exposes the last non-particle token as surnameToken (lowercase) for blocking', () => {
|
||||||
|
expect(normalizeName('Marcus Laurent').surnameToken).toBe('laurent');
|
||||||
|
expect(normalizeName('Olav van Velsen').surnameToken).toBe('velsen');
|
||||||
|
expect(normalizeName('Carla de la Cruz').surnameToken).toBe('cruz');
|
||||||
|
expect(normalizeName("Liam O'Brien").surnameToken).toBe("o'brien");
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles single-token names — surnameToken is the only token', () => {
|
||||||
|
expect(normalizeName('Madonna').surnameToken).toBe('madonna');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('produces a normalized form that is always lowercase', () => {
|
||||||
|
expect(normalizeName('Andrei VAGNANOV').normalized).toBe('andrei vagnanov');
|
||||||
|
expect(normalizeName('Daniel Wainstein / 7 Knots, LLC').normalized).toBe(
|
||||||
|
'daniel wainstein / 7 knots, llc',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('normalizeEmail', () => {
|
||||||
|
it('returns null for empty / null inputs', () => {
|
||||||
|
expect(normalizeEmail('')).toBeNull();
|
||||||
|
expect(normalizeEmail(' ')).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('lowercases and trims', () => {
|
||||||
|
// From real data: "Arthur@laser-align.com" vs "arthur@laser-align.com" (#183/#686).
|
||||||
|
expect(normalizeEmail('Arthur@laser-align.com')).toBe('arthur@laser-align.com');
|
||||||
|
expect(normalizeEmail(' marcus@example.com ')).toBe('marcus@example.com');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('lowercases capitalized localparts', () => {
|
||||||
|
// From real data: "Bmalone850@gmail.com" (#489), "Hef355@yahoo.com" (#533),
|
||||||
|
// "Donclaytonmusic@gmail.com" (#679).
|
||||||
|
expect(normalizeEmail('Bmalone850@gmail.com')).toBe('bmalone850@gmail.com');
|
||||||
|
expect(normalizeEmail('Hef355@yahoo.com')).toBe('hef355@yahoo.com');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves plus-aliases — both legitimate and tricks', () => {
|
||||||
|
// Per design §3.2: "+aliases" are not stripped. Compare by full localpart.
|
||||||
|
expect(normalizeEmail('marcus+sales@example.com')).toBe('marcus+sales@example.com');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns null for invalid email shapes', () => {
|
||||||
|
expect(normalizeEmail('not-an-email')).toBeNull();
|
||||||
|
expect(normalizeEmail('@example.com')).toBeNull();
|
||||||
|
expect(normalizeEmail('user@')).toBeNull();
|
||||||
|
expect(normalizeEmail('user@.com')).toBeNull();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('normalizePhone', () => {
|
||||||
|
it('returns null for empty / whitespace / null', () => {
|
||||||
|
expect(normalizePhone('', 'AI')).toBeNull();
|
||||||
|
expect(normalizePhone(' ', 'AI')).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('parses a plain E.164 number', () => {
|
||||||
|
expect(normalizePhone('+15742740548', 'US')).toMatchObject({
|
||||||
|
e164: '+15742740548',
|
||||||
|
country: 'US',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips embedded carriage returns and trailing whitespace', () => {
|
||||||
|
// From real data: "+1-264-235-8840\r" (#19), "+1-264-772-3272\r" (#20).
|
||||||
|
const out = normalizePhone('+1-264-235-8840\r', 'AI');
|
||||||
|
expect(out?.e164).toBe('+12642358840');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips dashes, dots, parens, single quotes, spaces in a single pass', () => {
|
||||||
|
// From real data: "'+1.214.603.4235" (#205), "574-274-0548" (#236),
|
||||||
|
// "+1-264-235-8840" (#19), "+1 (212) 555-0123" (synthetic).
|
||||||
|
expect(normalizePhone("'+1.214.603.4235", 'US')?.e164).toBe('+12146034235');
|
||||||
|
expect(normalizePhone('574-274-0548', 'US')?.e164).toBe('+15742740548');
|
||||||
|
expect(normalizePhone('+1 (212) 555-0123', 'US')?.e164).toBe('+12125550123');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('converts a leading 00 prefix to + (international dialling)', () => {
|
||||||
|
// From real data: "00447956657022" (#216), "0033651381036" (#702).
|
||||||
|
expect(normalizePhone('00447956657022', 'GB')?.e164).toBe('+447956657022');
|
||||||
|
expect(normalizePhone('0033651381036', 'FR')?.e164).toBe('+33651381036');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses defaultCountry when input has no international prefix', () => {
|
||||||
|
// From real data: "0690699699" (#203, French local), "0651381036" (#701).
|
||||||
|
expect(normalizePhone('0690699699', 'FR')?.e164).toBe('+33690699699');
|
||||||
|
expect(normalizePhone('0651381036', 'FR')?.e164).toBe('+33651381036');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns null when there is no prefix AND no defaultCountry', () => {
|
||||||
|
// The migration script flags these for human review.
|
||||||
|
const out = normalizePhone('5742740548');
|
||||||
|
expect(out?.e164 ?? null).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('flags placeholder all-zeros numbers and returns null', () => {
|
||||||
|
// From real data: "+447000000000" (#641, "Milos Vitkovic" — clearly fake).
|
||||||
|
const out = normalizePhone('+447000000000', 'GB');
|
||||||
|
expect(out?.flagged).toBe('placeholder');
|
||||||
|
expect(out?.e164).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('flags multi-number fields and uses the first segment', () => {
|
||||||
|
// From real data: "0677580750/0690511494" (#209). Other separators: ; ,
|
||||||
|
const slash = normalizePhone('0677580750/0690511494', 'FR');
|
||||||
|
expect(slash?.flagged).toBe('multi_number');
|
||||||
|
expect(slash?.e164).toBe('+33677580750');
|
||||||
|
|
||||||
|
const semi = normalizePhone('+33611111111;+33622222222', 'FR');
|
||||||
|
expect(semi?.flagged).toBe('multi_number');
|
||||||
|
expect(semi?.e164).toBe('+33611111111');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('flags genuinely unparseable input as `unparseable`', () => {
|
||||||
|
const out = normalizePhone('xyz-not-a-phone', 'US');
|
||||||
|
expect(out?.flagged).toBe('unparseable');
|
||||||
|
expect(out?.e164).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips an apostrophe-prefix without breaking the parse', () => {
|
||||||
|
// From real data: leading "'" copy-pasted from spreadsheets escapes
|
||||||
|
// numeric-cell coercion. Should be invisible to dedup.
|
||||||
|
expect(normalizePhone("'0690699699", 'FR')?.e164).toBe('+33690699699');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns the country alongside the E.164 form', () => {
|
||||||
|
expect(normalizePhone('+33690699699', 'FR')).toMatchObject({
|
||||||
|
e164: '+33690699699',
|
||||||
|
country: 'FR',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('resolveCountry', () => {
|
||||||
|
it('returns null for empty / nullish input', () => {
|
||||||
|
expect(resolveCountry('')).toEqual({ iso: null, confidence: null });
|
||||||
|
expect(resolveCountry(' ')).toEqual({ iso: null, confidence: null });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('exact-matches a canonical English country name', () => {
|
||||||
|
expect(resolveCountry('Anguilla')).toEqual({ iso: 'AI', confidence: 'exact' });
|
||||||
|
expect(resolveCountry('United Kingdom')).toEqual({ iso: 'GB', confidence: 'exact' });
|
||||||
|
expect(resolveCountry('United States')).toEqual({ iso: 'US', confidence: 'exact' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('matches case-insensitively', () => {
|
||||||
|
expect(resolveCountry('anguilla').iso).toBe('AI');
|
||||||
|
expect(resolveCountry('UNITED KINGDOM').iso).toBe('GB');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('matches values with surrounding whitespace', () => {
|
||||||
|
expect(resolveCountry(' United States ').iso).toBe('US');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles diacritic variants of Saint-Barthélemy', () => {
|
||||||
|
// From real data: "Saint barthelemy" (#203), "St Barth" (#208), "Saint-Barthélemy".
|
||||||
|
expect(resolveCountry('Saint-Barthélemy').iso).toBe('BL');
|
||||||
|
expect(resolveCountry('Saint Barthelemy').iso).toBe('BL');
|
||||||
|
expect(resolveCountry('saint barthelemy').iso).toBe('BL');
|
||||||
|
expect(resolveCountry('St Barth').iso).toBe('BL');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('resolves common abbreviations', () => {
|
||||||
|
expect(resolveCountry('USA').iso).toBe('US');
|
||||||
|
expect(resolveCountry('UK').iso).toBe('GB');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('falls back to a city → country mapping for high-frequency cities', () => {
|
||||||
|
// From real data: "Kansas City" (#198), "Sag Harbor Y" (#239).
|
||||||
|
expect(resolveCountry('Kansas City').iso).toBe('US');
|
||||||
|
expect(resolveCountry('Sag Harbor Y').iso).toBe('US');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('marks the confidence tier appropriately', () => {
|
||||||
|
expect(resolveCountry('Anguilla').confidence).toBe('exact');
|
||||||
|
expect(resolveCountry('Kansas City').confidence).toBe('city');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns null + null for unresolvable values', () => {
|
||||||
|
// Migration script flags these for human review rather than guessing.
|
||||||
|
expect(resolveCountry('asdfghjkl xyz')).toEqual({ iso: null, confidence: null });
|
||||||
|
expect(resolveCountry('Mars')).toEqual({ iso: null, confidence: null });
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user