Merge feat/dedup-migration: client dedup library + NocoDB migration script + admin queue

# Conflicts:
#	.gitignore
#	src/lib/db/migrations/meta/_journal.json
This commit is contained in:
Matt Ciaccio
2026-05-03 16:24:13 +02:00
32 changed files with 25389 additions and 1 deletions

View File

@@ -0,0 +1,5 @@
import { DuplicatesReviewQueue } from '@/components/admin/duplicates/duplicates-review-queue';
export default function DuplicatesAdminPage() {
return <DuplicatesReviewQueue />;
}

View File

@@ -0,0 +1,4 @@
import { withAuth, withPermission } from '@/lib/api/helpers';
import { dismissHandler } from '../../handlers';
export const POST = withAuth(withPermission('clients', 'edit', dismissHandler));

View File

@@ -0,0 +1,4 @@
import { withAuth, withPermission } from '@/lib/api/helpers';
import { confirmMergeHandler } from '../../handlers';
export const POST = withAuth(withPermission('clients', 'edit', confirmMergeHandler));

View File

@@ -0,0 +1,160 @@
import { NextResponse } from 'next/server';
import { and, eq, inArray } from 'drizzle-orm';
import type { AuthContext } from '@/lib/api/helpers';
import { db } from '@/lib/db';
import { clients, clientMergeCandidates } from '@/lib/db/schema/clients';
import { errorResponse, NotFoundError } from '@/lib/errors';
import {
listPendingMergeCandidates,
mergeClients,
type MergeFieldChoices,
} from '@/lib/services/client-merge.service';
/**
* GET /api/v1/admin/duplicates
*
* Pending merge candidates for the current port, sorted by score.
* Each row hydrates its two client summaries so the review-queue UI
* can render side-by-side cards without an N+1 fetch.
*/
export async function listHandler(_req: Request, ctx: AuthContext): Promise<NextResponse> {
try {
const pairs = await listPendingMergeCandidates(ctx.portId);
if (pairs.length === 0) return NextResponse.json({ data: [] });
const ids = Array.from(new Set(pairs.flatMap((p) => [p.clientAId, p.clientBId])));
const clientRows = await db
.select({
id: clients.id,
fullName: clients.fullName,
archivedAt: clients.archivedAt,
mergedIntoClientId: clients.mergedIntoClientId,
createdAt: clients.createdAt,
})
.from(clients)
.where(inArray(clients.id, ids));
const clientById = new Map(clientRows.map((c) => [c.id, c]));
const data = pairs
.map((p) => {
const a = clientById.get(p.clientAId);
const b = clientById.get(p.clientBId);
if (!a || !b) return null; // FK orphan — shouldn't happen, but be defensive
// Skip pairs where one side has already been merged or archived.
if (a.mergedIntoClientId || b.mergedIntoClientId) return null;
return {
id: p.id,
score: p.score,
reasons: p.reasons,
createdAt: p.createdAt,
clientA: { id: a.id, fullName: a.fullName, createdAt: a.createdAt },
clientB: { id: b.id, fullName: b.fullName, createdAt: b.createdAt },
};
})
.filter((row): row is NonNullable<typeof row> => row !== null);
return NextResponse.json({ data });
} catch (error) {
return errorResponse(error);
}
}
/**
* POST /api/v1/admin/duplicates/[id]/merge
*
* Body: { winnerId: string, fieldChoices?: MergeFieldChoices }
*
* Confirms a merge candidate. The winner is the one the user picked
* to keep; the other side becomes the loser. Calls into the merge
* service which is the only path that touches client_merge_log.
*/
export async function confirmMergeHandler(
req: Request,
ctx: AuthContext,
params: { id?: string },
): Promise<NextResponse> {
try {
const id = params.id ?? '';
const body = (await req.json().catch(() => ({}))) as {
winnerId?: string;
fieldChoices?: MergeFieldChoices;
};
if (!body.winnerId) {
return NextResponse.json({ error: 'winnerId required' }, { status: 400 });
}
const [candidate] = await db
.select()
.from(clientMergeCandidates)
.where(
and(
eq(clientMergeCandidates.id, id),
eq(clientMergeCandidates.portId, ctx.portId),
eq(clientMergeCandidates.status, 'pending'),
),
);
if (!candidate) throw new NotFoundError('Merge candidate');
const loserId =
body.winnerId === candidate.clientAId
? candidate.clientBId
: body.winnerId === candidate.clientBId
? candidate.clientAId
: null;
if (!loserId) {
return NextResponse.json(
{ error: 'winnerId must match one of the candidate clients' },
{ status: 400 },
);
}
const result = await mergeClients({
winnerId: body.winnerId,
loserId,
mergedBy: ctx.userId,
fieldChoices: body.fieldChoices,
});
return NextResponse.json({ data: result });
} catch (error) {
return errorResponse(error);
}
}
/**
* POST /api/v1/admin/duplicates/[id]/dismiss
*
* Mark a merge candidate as dismissed. The background scoring job
* skips dismissed pairs on subsequent runs (a future score increase
* can re-create them).
*/
export async function dismissHandler(
_req: Request,
ctx: AuthContext,
params: { id?: string },
): Promise<NextResponse> {
try {
const id = params.id ?? '';
const result = await db
.update(clientMergeCandidates)
.set({
status: 'dismissed',
resolvedAt: new Date(),
resolvedBy: ctx.userId,
})
.where(
and(
eq(clientMergeCandidates.id, id),
eq(clientMergeCandidates.portId, ctx.portId),
eq(clientMergeCandidates.status, 'pending'),
),
)
.returning({ id: clientMergeCandidates.id });
if (result.length === 0) throw new NotFoundError('Merge candidate');
return NextResponse.json({ data: { id: result[0]!.id, status: 'dismissed' } });
} catch (error) {
return errorResponse(error);
}
}

View File

@@ -0,0 +1,4 @@
import { withAuth, withPermission } from '@/lib/api/helpers';
import { listHandler } from './handlers';
export const GET = withAuth(withPermission('clients', 'view', listHandler));

View File

@@ -0,0 +1,160 @@
import { NextResponse } from 'next/server';
import { and, eq, inArray } from 'drizzle-orm';
import type { AuthContext } from '@/lib/api/helpers';
import { db } from '@/lib/db';
import { clients, clientContacts } from '@/lib/db/schema/clients';
import { interests } from '@/lib/db/schema/interests';
import { errorResponse } from '@/lib/errors';
import { findClientMatches, type MatchCandidate } from '@/lib/dedup/find-matches';
import { normalizeEmail, normalizeName, normalizePhone } from '@/lib/dedup/normalize';
import type { CountryCode } from '@/lib/i18n/countries';
/**
* GET /api/v1/clients/match-candidates
*
* Query parameters (any combination):
* email Free-text email; gets normalized server-side.
* phone Free-text phone; gets normalized to E.164 server-side.
* name Free-text full name; used for surname-token blocking.
* country Optional ISO country hint (default: AI for Port Nimara).
*
* Returns the top candidates that scored above the soft-warn threshold,
* each with a small client summary the form's suggestion card can
* render. Confidence tiers and rules are applied server-side from the
* port's `system_settings` (when wired) or sensible defaults otherwise.
*
* Used by `useDedupSuggestion` in the new-client form. Debounced on
* the client; this endpoint must be cheap (single port pool fetch +
* an in-memory dedup pass).
*/
export async function getMatchCandidatesHandler(
req: Request,
ctx: AuthContext,
): Promise<NextResponse> {
try {
const url = new URL(req.url);
const rawEmail = url.searchParams.get('email');
const rawPhone = url.searchParams.get('phone');
const rawName = url.searchParams.get('name');
const country = (url.searchParams.get('country') ?? 'AI') as CountryCode;
const email = rawEmail ? normalizeEmail(rawEmail) : null;
const phoneResult = rawPhone ? normalizePhone(rawPhone, country) : null;
const nameResult = rawName ? normalizeName(rawName) : null;
// If the caller didn't give us anything useful to match on, return empty
// — short-circuit rather than scan every client for nothing.
if (!email && !phoneResult?.e164 && !nameResult?.surnameToken) {
return NextResponse.json({ data: [] });
}
// Build the input candidate.
const input: MatchCandidate = {
id: '__incoming__',
fullName: nameResult?.display ?? null,
surnameToken: nameResult?.surnameToken ?? null,
emails: email ? [email] : [],
phonesE164: phoneResult?.e164 ? [phoneResult.e164] : [],
countryIso: country,
};
// Fetch the live pool for this port. We keep this O(N) over clients
// since the dedup library does its own blocking; for ports with
// thousands of clients we can later restrict by surname-token /
// contact lookups, but for current scale the simple full-pool fetch
// is fine.
const liveClients = await db
.select({
id: clients.id,
fullName: clients.fullName,
nationalityIso: clients.nationalityIso,
})
.from(clients)
.where(and(eq(clients.portId, ctx.portId)));
if (liveClients.length === 0) {
return NextResponse.json({ data: [] });
}
const clientIds = liveClients.map((c) => c.id);
const contactRows = await db
.select({
clientId: clientContacts.clientId,
channel: clientContacts.channel,
value: clientContacts.value,
valueE164: clientContacts.valueE164,
})
.from(clientContacts)
.where(inArray(clientContacts.clientId, clientIds));
// Group contacts by client for the candidate map.
const emailsByClient = new Map<string, string[]>();
const phonesByClient = new Map<string, string[]>();
for (const c of contactRows) {
if (c.channel === 'email') {
const arr = emailsByClient.get(c.clientId) ?? [];
arr.push(c.value.toLowerCase());
emailsByClient.set(c.clientId, arr);
} else if (c.channel === 'phone' || c.channel === 'whatsapp') {
if (c.valueE164) {
const arr = phonesByClient.get(c.clientId) ?? [];
arr.push(c.valueE164);
phonesByClient.set(c.clientId, arr);
}
}
}
const pool: MatchCandidate[] = liveClients.map((c) => {
const named = normalizeName(c.fullName);
return {
id: c.id,
fullName: c.fullName,
surnameToken: named.surnameToken ?? null,
emails: emailsByClient.get(c.id) ?? [],
phonesE164: phonesByClient.get(c.id) ?? [],
countryIso: (c.nationalityIso as CountryCode | null) ?? null,
};
});
const matches = findClientMatches(input, pool, {
highScore: 90,
mediumScore: 50,
});
// Only return medium+ — low-confidence noise isn't useful at the
// create-form layer (background scoring queue picks those up).
const useful = matches.filter((m) => m.confidence !== 'low');
if (useful.length === 0) {
return NextResponse.json({ data: [] });
}
// Pull a quick summary for each surfaced candidate so the suggestion
// card has enough to render ("Marcus Laurent · 2 interests · last
// contact 9d ago").
const summarizedIds = useful.map((m) => m.candidate.id);
const interestCounts = await db
.select({ clientId: interests.clientId })
.from(interests)
.where(inArray(interests.clientId, summarizedIds));
const interestsByClient = new Map<string, number>();
for (const r of interestCounts) {
interestsByClient.set(r.clientId, (interestsByClient.get(r.clientId) ?? 0) + 1);
}
const data = useful.map((m) => ({
clientId: m.candidate.id,
fullName: m.candidate.fullName,
score: m.score,
confidence: m.confidence,
reasons: m.reasons,
interestCount: interestsByClient.get(m.candidate.id) ?? 0,
emails: m.candidate.emails,
phonesE164: m.candidate.phonesE164,
}));
return NextResponse.json({ data });
} catch (error) {
return errorResponse(error);
}
}

View File

@@ -0,0 +1,4 @@
import { withAuth, withPermission } from '@/lib/api/helpers';
import { getMatchCandidatesHandler } from './handlers';
export const GET = withAuth(withPermission('clients', 'view', getMatchCandidatesHandler));

View File

@@ -0,0 +1,215 @@
'use client';
import { useState } from 'react';
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
import { ArrowRight, GitMerge, X } from 'lucide-react';
import { toast } from 'sonner';
import { Button } from '@/components/ui/button';
import { PageHeader } from '@/components/shared/page-header';
import { EmptyState } from '@/components/shared/empty-state';
import { Skeleton } from '@/components/ui/skeleton';
import { apiFetch } from '@/lib/api/client';
import { cn } from '@/lib/utils';
interface CandidatePair {
id: string;
score: number;
reasons: string[];
createdAt: string;
clientA: { id: string; fullName: string; createdAt: string };
clientB: { id: string; fullName: string; createdAt: string };
}
/**
* Admin review queue for the dedup background scoring job.
*
* Lists every pending merge candidate (pairs where score >=
* `dedup_review_queue_threshold`). For each pair the admin can:
* - Pick a winner via the side-by-side card → confirms a merge
* - Dismiss → removes from the queue (a future score increase
* re-creates the pair on the next scoring run)
*
* Only minimal merge UI here: the user picks which side is the winner
* (no per-field choice), and the loser archives. A richer side-by-side
* field-merge dialog is a future enhancement.
*/
export function DuplicatesReviewQueue() {
const queryClient = useQueryClient();
const { data, isLoading } = useQuery<{ data: CandidatePair[] }>({
queryKey: ['admin', 'duplicates'],
queryFn: () => apiFetch<{ data: CandidatePair[] }>('/api/v1/admin/duplicates'),
});
const pairs = data?.data ?? [];
return (
<div className="space-y-4">
<PageHeader
title="Duplicate clients"
description={
pairs.length === 0
? 'No pending pairs to review.'
: `${pairs.length} pair${pairs.length === 1 ? '' : 's'} flagged for review.`
}
/>
{isLoading ? (
<div className="space-y-3">
{[0, 1, 2].map((i) => (
<Skeleton key={i} className="h-32 w-full" />
))}
</div>
) : pairs.length === 0 ? (
<EmptyState
title="All clear"
description="The background scoring job hasn't surfaced any potential duplicates yet."
/>
) : (
<ul className="space-y-3">
{pairs.map((pair) => (
<li key={pair.id}>
<CandidateRow pair={pair} queryClient={queryClient} />
</li>
))}
</ul>
)}
</div>
);
}
function CandidateRow({
pair,
queryClient,
}: {
pair: CandidatePair;
queryClient: ReturnType<typeof useQueryClient>;
}) {
const [busy, setBusy] = useState<'merge' | 'dismiss' | null>(null);
const [winnerId, setWinnerId] = useState<string>(pair.clientA.id);
const mergeMutation = useMutation({
mutationFn: () =>
apiFetch(`/api/v1/admin/duplicates/${pair.id}/merge`, {
method: 'POST',
body: { winnerId },
}),
onSuccess: () => {
const loserName =
winnerId === pair.clientA.id ? pair.clientB.fullName : pair.clientA.fullName;
const winnerName =
winnerId === pair.clientA.id ? pair.clientA.fullName : pair.clientB.fullName;
toast.success(`Merged "${loserName}" into "${winnerName}"`);
queryClient.invalidateQueries({ queryKey: ['admin', 'duplicates'] });
queryClient.invalidateQueries({ queryKey: ['clients'] });
},
onError: (err) => toast.error(err instanceof Error ? err.message : 'Merge failed'),
onSettled: () => setBusy(null),
});
const dismissMutation = useMutation({
mutationFn: () => apiFetch(`/api/v1/admin/duplicates/${pair.id}/dismiss`, { method: 'POST' }),
onSuccess: () => {
toast.message('Dismissed');
queryClient.invalidateQueries({ queryKey: ['admin', 'duplicates'] });
},
onError: (err) => toast.error(err instanceof Error ? err.message : 'Dismiss failed'),
onSettled: () => setBusy(null),
});
return (
<div className="rounded-lg border bg-card p-4">
<div className="mb-3 flex items-baseline justify-between gap-3">
<div>
<span className="rounded-full bg-muted px-2 py-0.5 text-[10px] font-medium uppercase tracking-wide text-muted-foreground">
score {pair.score}
</span>{' '}
<span className="text-xs text-muted-foreground">{pair.reasons.join(' · ')}</span>
</div>
<span className="text-xs text-muted-foreground">
flagged {new Date(pair.createdAt).toLocaleDateString()}
</span>
</div>
<div className="grid gap-3 sm:grid-cols-[1fr_auto_1fr]">
<ClientCard
client={pair.clientA}
isSelected={winnerId === pair.clientA.id}
onSelect={() => setWinnerId(pair.clientA.id)}
/>
<div className="flex items-center justify-center text-muted-foreground">
<ArrowRight className="size-4" aria-hidden />
</div>
<ClientCard
client={pair.clientB}
isSelected={winnerId === pair.clientB.id}
onSelect={() => setWinnerId(pair.clientB.id)}
/>
</div>
<div className="mt-3 flex flex-wrap items-center gap-2">
<Button
size="sm"
onClick={() => {
setBusy('merge');
mergeMutation.mutate();
}}
disabled={busy !== null}
>
<GitMerge className="mr-1 size-3.5" aria-hidden />
Merge into selected
</Button>
<Button
size="sm"
variant="ghost"
onClick={() => {
setBusy('dismiss');
dismissMutation.mutate();
}}
disabled={busy !== null}
>
<X className="mr-1 size-3.5" aria-hidden />
Dismiss
</Button>
<p className="text-xs text-muted-foreground">
The unselected card becomes the loser; its interests + contacts move to the selected
client and the original is archived.
</p>
</div>
</div>
);
}
function ClientCard({
client,
isSelected,
onSelect,
}: {
client: CandidatePair['clientA'];
isSelected: boolean;
onSelect: () => void;
}) {
return (
<button
type="button"
onClick={onSelect}
className={cn(
'rounded-md border p-3 text-left transition-colors',
isSelected
? 'border-primary bg-primary/5 ring-1 ring-primary/30'
: 'border-border hover:bg-muted/40',
)}
>
<p className="text-sm font-medium">{client.fullName}</p>
<p className="mt-0.5 text-[11px] text-muted-foreground">
Created {new Date(client.createdAt).toLocaleDateString()}
</p>
{isSelected ? (
<span className="mt-1 inline-block rounded-full bg-primary/10 px-1.5 py-0.5 text-[10px] font-semibold text-primary">
KEEP
</span>
) : null}
</button>
);
}

View File

@@ -23,6 +23,7 @@ import { TagPicker } from '@/components/shared/tag-picker';
import { CountryCombobox } from '@/components/shared/country-combobox';
import { TimezoneCombobox } from '@/components/shared/timezone-combobox';
import { PhoneInput } from '@/components/shared/phone-input';
import { DedupSuggestionPanel } from '@/components/clients/dedup-suggestion-panel';
import { apiFetch } from '@/lib/api/client';
import { createClientSchema, type CreateClientInput } from '@/lib/validators/clients';
import type { CountryCode } from '@/lib/i18n/countries';
@@ -30,6 +31,12 @@ import type { CountryCode } from '@/lib/i18n/countries';
interface ClientFormProps {
open: boolean;
onOpenChange: (open: boolean) => void;
/** Optional callback fired when the dedup suggestion panel reports
* the user picked an existing client. The form closes; parent is
* responsible for navigating to the existing client's detail page
* or opening the create-interest dialog pre-filled with that
* clientId. Skipped in edit mode. */
onUseExistingClient?: (clientId: string) => void;
/** If provided, form is in edit mode */
client?: {
id: string;
@@ -53,7 +60,7 @@ interface ClientFormProps {
};
}
export function ClientForm({ open, onOpenChange, client }: ClientFormProps) {
export function ClientForm({ open, onOpenChange, client, onUseExistingClient }: ClientFormProps) {
const queryClient = useQueryClient();
const isEdit = !!client;
@@ -143,6 +150,26 @@ export function ClientForm({ open, onOpenChange, client }: ClientFormProps) {
</SheetHeader>
<form onSubmit={handleSubmit((data) => mutation.mutate(data))} className="space-y-6 py-6">
{/* Dedup suggestion — only on the create path. Watches the
live form values for email / phone / name and surfaces
an existing client when one matches. The user can
attach the new interest to that client instead of
creating a duplicate. */}
{!isEdit ? (
<DedupSuggestionPanel
email={watch('contacts')?.find((c) => c?.channel === 'email')?.value ?? null}
phone={
watch('contacts')?.find((c) => c?.channel === 'phone' || c?.channel === 'whatsapp')
?.valueE164 ?? null
}
name={watch('fullName') ?? null}
onUseExisting={(match) => {
onUseExistingClient?.(match.clientId);
onOpenChange(false);
}}
/>
) : null}
{/* Basic Info */}
<div className="space-y-4">
<h3 className="text-sm font-medium text-muted-foreground uppercase tracking-wide">

View File

@@ -0,0 +1,183 @@
'use client';
import { useEffect, useState } from 'react';
import { useQuery } from '@tanstack/react-query';
import { AlertCircle, ArrowRight, Briefcase, X } from 'lucide-react';
import { Button } from '@/components/ui/button';
import { apiFetch } from '@/lib/api/client';
import { cn } from '@/lib/utils';
interface MatchData {
clientId: string;
fullName: string;
score: number;
confidence: 'high' | 'medium' | 'low';
reasons: string[];
interestCount: number;
emails: string[];
phonesE164: string[];
}
interface DedupSuggestionPanelProps {
/** Free-text inputs from the in-flight new-client form. The panel
* debounces them and queries /api/v1/clients/match-candidates. */
email?: string | null;
phone?: string | null;
name?: string | null;
/** Caller wants to attach the new interest to an existing client
* rather than creating a new one. The form switches to
* interest-only mode and pre-fills the client. */
onUseExisting: (match: MatchData) => void;
/** User explicitly said "create new anyway." Hide the panel until
* they change input again. */
onDismiss?: () => void;
}
/**
* Surfaces existing clients that match the form's in-flight inputs.
*
* Renders nothing while inputs are short / no useful match found.
* On a high-confidence match, the panel interrupts visually with a
* solid border and a primary "Use this client" button.
*
* Wired into the new-client form. Skipped in edit mode.
*/
export function DedupSuggestionPanel({
email,
phone,
name,
onUseExisting,
onDismiss,
}: DedupSuggestionPanelProps) {
const [dismissed, setDismissed] = useState(false);
// Debounce inputs by 300ms so we don't fire on every keystroke. Keep
// the latest debounced values in component state.
const [debounced, setDebounced] = useState({
email: email ?? '',
phone: phone ?? '',
name: name ?? '',
});
useEffect(() => {
const t = setTimeout(() => {
setDebounced({ email: email ?? '', phone: phone ?? '', name: name ?? '' });
// Clear the dismissed flag when inputs change — the user typed
// something new, so the prior dismissal no longer applies.
setDismissed(false);
}, 300);
return () => clearTimeout(t);
}, [email, phone, name]);
const hasSomething =
debounced.email.length > 3 || debounced.phone.length > 3 || debounced.name.length > 2;
const { data, isFetching } = useQuery<{ data: MatchData[] }>({
queryKey: ['dedup-match-candidates', debounced],
queryFn: () => {
const params = new URLSearchParams();
if (debounced.email) params.set('email', debounced.email);
if (debounced.phone) params.set('phone', debounced.phone);
if (debounced.name) params.set('name', debounced.name);
return apiFetch<{ data: MatchData[] }>(`/api/v1/clients/match-candidates?${params}`);
},
enabled: hasSomething && !dismissed,
// Same query is fine to cache for a minute — moves are slow at this layer.
staleTime: 60_000,
});
if (dismissed) return null;
if (!hasSomething) return null;
if (isFetching && !data) return null;
const matches = data?.data ?? [];
if (matches.length === 0) return null;
const top = matches[0]!;
const isHigh = top.confidence === 'high';
return (
<div
className={cn(
'rounded-lg border p-3 mb-3 transition-colors',
isHigh
? 'border-amber-300 bg-amber-50/60 dark:bg-amber-950/30'
: 'border-border bg-muted/40',
)}
data-testid="dedup-suggestion"
>
<div className="flex items-start gap-3">
<div className="mt-0.5">
<AlertCircle
className={cn(
'size-5',
isHigh ? 'text-amber-700 dark:text-amber-400' : 'text-muted-foreground',
)}
aria-hidden
/>
</div>
<div className="min-w-0 flex-1">
<p className="text-sm font-semibold leading-tight">
{isHigh
? 'This looks like an existing client'
: 'Possible match — check before creating'}
</p>
<div className="mt-2 rounded-md border bg-background/80 p-2.5">
<div className="flex items-center gap-2">
<p className="truncate text-sm font-medium">{top.fullName}</p>
<span
className={cn(
'shrink-0 rounded-full px-1.5 py-0.5 text-[10px] font-medium uppercase tracking-wide',
isHigh
? 'bg-amber-200 text-amber-900 dark:bg-amber-800 dark:text-amber-100'
: 'bg-muted text-muted-foreground',
)}
>
{top.confidence}
</span>
</div>
<div className="mt-0.5 flex flex-wrap items-center gap-x-3 gap-y-0.5 text-xs text-muted-foreground">
{top.emails[0] ? <span className="truncate">{top.emails[0]}</span> : null}
{top.phonesE164[0] ? <span>{top.phonesE164[0]}</span> : null}
<span className="inline-flex items-center gap-1">
<Briefcase className="size-3" aria-hidden />
{top.interestCount} {top.interestCount === 1 ? 'interest' : 'interests'}
</span>
</div>
<p className="mt-1.5 text-[11px] text-muted-foreground">{top.reasons.join(' · ')}</p>
</div>
<div className="mt-3 flex flex-wrap items-center gap-2">
<Button
type="button"
size="sm"
onClick={() => onUseExisting(top)}
data-testid="dedup-use-existing"
>
Use this client
<ArrowRight className="ml-1 size-3.5" aria-hidden />
</Button>
<Button
type="button"
size="sm"
variant="ghost"
onClick={() => {
setDismissed(true);
onDismiss?.();
}}
data-testid="dedup-dismiss"
>
<X className="mr-1 size-3.5" aria-hidden />
Create new anyway
</Button>
{matches.length > 1 ? (
<span className="text-xs text-muted-foreground">
+{matches.length - 1} other possible{' '}
{matches.length - 1 === 1 ? 'match' : 'matches'}
</span>
) : null}
</div>
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,30 @@
CREATE TABLE "client_merge_candidates" (
"id" text PRIMARY KEY NOT NULL,
"port_id" text NOT NULL,
"client_a_id" text NOT NULL,
"client_b_id" text NOT NULL,
"score" integer NOT NULL,
"reasons" jsonb NOT NULL,
"status" text DEFAULT 'pending' NOT NULL,
"created_at" timestamp with time zone DEFAULT now() NOT NULL,
"resolved_at" timestamp with time zone,
"resolved_by" text
);
--> statement-breakpoint
CREATE TABLE "migration_source_links" (
"id" text PRIMARY KEY NOT NULL,
"source_system" text NOT NULL,
"source_id" text NOT NULL,
"target_entity_type" text NOT NULL,
"target_entity_id" text NOT NULL,
"applied_id" text NOT NULL,
"applied_by" text,
"applied_at" timestamp with time zone DEFAULT now() NOT NULL
);
--> statement-breakpoint
ALTER TABLE "client_merge_candidates" ADD CONSTRAINT "client_merge_candidates_port_id_ports_id_fk" FOREIGN KEY ("port_id") REFERENCES "public"."ports"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "client_merge_candidates" ADD CONSTRAINT "client_merge_candidates_client_a_id_clients_id_fk" FOREIGN KEY ("client_a_id") REFERENCES "public"."clients"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "client_merge_candidates" ADD CONSTRAINT "client_merge_candidates_client_b_id_clients_id_fk" FOREIGN KEY ("client_b_id") REFERENCES "public"."clients"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
CREATE INDEX "idx_cmc_port_status" ON "client_merge_candidates" USING btree ("port_id","status");--> statement-breakpoint
CREATE UNIQUE INDEX "idx_cmc_pair" ON "client_merge_candidates" USING btree ("port_id","client_a_id","client_b_id");--> statement-breakpoint
CREATE UNIQUE INDEX "idx_msl_source_target" ON "migration_source_links" USING btree ("source_system","source_id","target_entity_type");

View File

@@ -0,0 +1,2 @@
ALTER TABLE "clients" ADD COLUMN "merged_into_client_id" text;--> statement-breakpoint
CREATE INDEX "idx_clients_merged_into" ON "clients" USING btree ("merged_into_client_id");

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -148,6 +148,20 @@
"when": 1777814682110,
"tag": "0020_medical_betty_brant",
"breakpoints": true
},
{
"idx": 21,
"version": "7",
"when": 1777811835982,
"tag": "0021_unusual_azazel",
"breakpoints": true
},
{
"idx": 22,
"version": "7",
"when": 1777812671833,
"tag": "0022_magenta_madame_hydra",
"breakpoints": true
}
]
}

View File

@@ -2,6 +2,7 @@ import {
pgTable,
text,
boolean,
integer,
timestamp,
jsonb,
index,
@@ -30,6 +31,11 @@ export const clients = pgTable(
source: text('source'), // website, manual, referral, broker
sourceDetails: text('source_details'),
archivedAt: timestamp('archived_at', { withTimezone: true }),
/** When this client was merged into another (the "loser" of a dedup
* merge), this points at the surviving client. Used by the
* /admin/duplicates review queue to redirect any stragglers, and by
* the unmerge flow to restore. Null for live clients. */
mergedIntoClientId: text('merged_into_client_id'),
createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
updatedAt: timestamp('updated_at', { withTimezone: true }).notNull().defaultNow(),
},
@@ -38,6 +44,7 @@ export const clients = pgTable(
index('idx_clients_name').on(table.portId, table.fullName),
index('idx_clients_archived').on(table.portId, table.archivedAt),
index('idx_clients_nationality_iso').on(table.nationalityIso),
index('idx_clients_merged_into').on(table.mergedIntoClientId),
],
);
@@ -145,6 +152,54 @@ export const clientMergeLog = pgTable(
(table) => [index('idx_cml_port').on(table.portId)],
);
/**
* Pairs of clients flagged by the background scoring job as potential
* duplicates. The `/admin/duplicates` review queue reads from here.
*
* Lifecycle:
* - Background job inserts a row when a pair scores >= the
* `dedup_review_queue_threshold` system setting.
* - User reviews in the admin UI and either merges (status='merged')
* or dismisses (status='dismissed').
* - Subsequent runs of the scoring job skip pairs already
* `dismissed` so the same false-positive doesn't keep reappearing.
* A future score increase recreates the row.
*
* Pairs are stored canonically with `clientAId < clientBId` (string
* comparison) so the same pair only generates one row regardless of
* scoring direction.
*/
export const clientMergeCandidates = pgTable(
'client_merge_candidates',
{
id: text('id')
.primaryKey()
.$defaultFn(() => crypto.randomUUID()),
portId: text('port_id')
.notNull()
.references(() => ports.id),
clientAId: text('client_a_id')
.notNull()
.references(() => clients.id, { onDelete: 'cascade' }),
clientBId: text('client_b_id')
.notNull()
.references(() => clients.id, { onDelete: 'cascade' }),
score: integer('score').notNull(),
/** Human-readable rule list, e.g. ["email match", "phone match"]. */
reasons: jsonb('reasons').notNull(),
status: text('status').notNull().default('pending'), // pending | dismissed | merged
createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
resolvedAt: timestamp('resolved_at', { withTimezone: true }),
resolvedBy: text('resolved_by'),
},
(table) => [
index('idx_cmc_port_status').on(table.portId, table.status),
// Same pair shouldn't surface twice — enforce uniqueness on the
// canonical (a < b) ordering.
uniqueIndex('idx_cmc_pair').on(table.portId, table.clientAId, table.clientBId),
],
);
export const clientAddresses = pgTable(
'client_addresses',
{
@@ -190,3 +245,5 @@ export type ClientMergeLog = typeof clientMergeLog.$inferSelect;
export type NewClientMergeLog = typeof clientMergeLog.$inferInsert;
export type ClientAddress = typeof clientAddresses.$inferSelect;
export type NewClientAddress = typeof clientAddresses.$inferInsert;
export type ClientMergeCandidate = typeof clientMergeCandidates.$inferSelect;
export type NewClientMergeCandidate = typeof clientMergeCandidates.$inferInsert;

View File

@@ -56,5 +56,8 @@ export * from './ai-usage';
// GDPR export tracking (Phase 3d)
export * from './gdpr';
// Migration ledger (one-shot scripts — NocoDB import etc.)
export * from './migration';
// Relations (must come last — references all tables)
export * from './relations';

View File

@@ -0,0 +1,48 @@
import { pgTable, text, timestamp, uniqueIndex } from 'drizzle-orm/pg-core';
/**
* Idempotency ledger for one-shot data migrations from external sources
* (e.g. the legacy NocoDB Interests table).
*
* Every entity created during a migration script's `--apply` run gets a
* row here mapping the source-system row identifier to the new-system
* entity id. Re-running `--apply` against the same report skips rows
* already linked, so partial-failure resumption is just "run again."
*
* One source row can generate multiple new entities (e.g. one NocoDB
* Interests row → one client + one interest + one yacht), so the
* uniqueness constraint includes `target_entity_type`.
*/
export const migrationSourceLinks = pgTable(
'migration_source_links',
{
id: text('id')
.primaryKey()
.$defaultFn(() => crypto.randomUUID()),
/** e.g. 'nocodb_interests', 'nocodb_residences', 'nocodb_website_submissions'. */
sourceSystem: text('source_system').notNull(),
/** Source row identifier as a string (NocoDB IDs are integers; we keep
* text here for forward compat with other sources). */
sourceId: text('source_id').notNull(),
/** e.g. 'client', 'interest', 'yacht', 'document'. */
targetEntityType: text('target_entity_type').notNull(),
/** UUID of the new-system entity (clients.id, interests.id, etc.). */
targetEntityId: text('target_entity_id').notNull(),
/** Apply-id from the migration run that created this link — pairs with
* the on-disk apply manifest so `--rollback --apply-id <id>` knows
* exactly which links to remove. */
appliedId: text('applied_id').notNull(),
appliedBy: text('applied_by'),
appliedAt: timestamp('applied_at', { withTimezone: true }).notNull().defaultNow(),
},
(table) => [
uniqueIndex('idx_msl_source_target').on(
table.sourceSystem,
table.sourceId,
table.targetEntityType,
),
],
);
export type MigrationSourceLink = typeof migrationSourceLinks.$inferSelect;
export type NewMigrationSourceLink = typeof migrationSourceLinks.$inferInsert;

View File

@@ -0,0 +1,255 @@
/**
* Client-match finder — pure scoring logic.
*
* Compares one input candidate against a pool of existing candidates and
* returns scored matches. Used by:
* - the at-create suggestion in client/interest forms (Layer 1)
* - the public-form auto-link path (when score >= block threshold)
* - the nightly background scoring job (Layer 3)
* - the migration script's dedup pass
*
* Performance shape: blocking via email / phone / surname-token reduces
* the pairwise scan from O(n²) to ~O(n) for any pool size we'll see in
* production. See `findClientMatches` for the blocking implementation.
*
* Design reference: docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §4.
*/
import { parsePhoneScriptSafe as parsePhone } from './phone-parse';
import { levenshtein } from './normalize';
// ─── Types ──────────────────────────────────────────────────────────────────
export interface MatchCandidate {
id: string;
fullName: string | null;
/** Lowercased last non-particle token from `normalizeName(...).surnameToken`.
* Used as a blocking key. */
surnameToken: string | null;
/** Already lowercased + validated via `normalizeEmail`. */
emails: string[];
/** Already canonical E.164 via `normalizePhone`. */
phonesE164: string[];
/** Address country (NOT phone country) — used for tiebreaking, not scoring. */
countryIso: string | null;
}
export type MatchConfidence = 'high' | 'medium' | 'low';
export interface MatchResult {
candidate: MatchCandidate;
/** 0100 after capping. */
score: number;
/** Human-readable list of which rules contributed. Useful for the
* review queue UI ("matched on email + phone + surname token"). */
reasons: string[];
confidence: MatchConfidence;
}
export interface DedupThresholds {
/** Inclusive lower bound for `'high'` confidence. */
highScore: number;
/** Inclusive lower bound for `'medium'` confidence. Below this is `'low'`. */
mediumScore: number;
}
// ─── Public entry point ─────────────────────────────────────────────────────
/**
* Compare `input` against every reachable candidate in `pool` and return
* scored matches, sorted by score descending. The result list includes
* low-confidence hits — caller filters by `confidence` or `score`
* depending on use case.
*
* Self-matches (an entry with `id === input.id`, e.g. when re-scoring an
* existing client during a background job) are excluded.
*/
export function findClientMatches(
input: MatchCandidate,
pool: MatchCandidate[],
thresholds: DedupThresholds,
): MatchResult[] {
if (pool.length === 0) return [];
// ── Phase 1: build blocking indexes off the pool. ─────────────────────────
//
// Three indexes mean any candidate that shares ANY of (email / phone /
// surname-token) with the input shows up in the comparison set. Anything
// that shares NONE is structurally too different to be a duplicate and
// is skipped — this is what keeps the algorithm O(n) at scale.
const byEmail = new Map<string, MatchCandidate[]>();
const byPhone = new Map<string, MatchCandidate[]>();
const bySurnameToken = new Map<string, MatchCandidate[]>();
for (const c of pool) {
if (c.id === input.id) continue;
for (const email of c.emails) {
pushTo(byEmail, email, c);
}
for (const phone of c.phonesE164) {
pushTo(byPhone, phone, c);
}
if (c.surnameToken) {
pushTo(bySurnameToken, c.surnameToken, c);
}
}
// ── Phase 2: gather the comparison set via the blocking indexes. ─────────
const comparisonSet = new Map<string, MatchCandidate>();
for (const email of input.emails) {
for (const c of byEmail.get(email) ?? []) {
comparisonSet.set(c.id, c);
}
}
for (const phone of input.phonesE164) {
for (const c of byPhone.get(phone) ?? []) {
comparisonSet.set(c.id, c);
}
}
if (input.surnameToken) {
for (const c of bySurnameToken.get(input.surnameToken) ?? []) {
comparisonSet.set(c.id, c);
}
}
// ── Phase 3: score every candidate that survived blocking. ───────────────
const results: MatchResult[] = [];
for (const candidate of comparisonSet.values()) {
const r = scorePair(input, candidate);
results.push(r);
}
// ── Phase 4: sort by score desc + assign confidence tier. ────────────────
results.sort((a, b) => b.score - a.score);
for (const r of results) {
r.confidence = classify(r.score, thresholds);
}
return results;
}
// ─── Scoring ────────────────────────────────────────────────────────────────
/**
* Score one (input, candidate) pair against the rule set in design §4.2.
* Compounding: positive rules sum, negative rules subtract; the result is
* clamped to [0, 100]. Reasons accumulate in the order rules fire so the
* review-queue UI can show "matched on email + phone".
*/
function scorePair(a: MatchCandidate, b: MatchCandidate): MatchResult {
let score = 0;
const reasons: string[] = [];
// ── Positive rules. ──────────────────────────────────────────────────────
const sharedEmail = a.emails.find((e) => b.emails.includes(e));
const emailMatch = !!sharedEmail;
if (emailMatch) {
score += 60;
reasons.push('email match');
}
const sharedPhone = a.phonesE164.find((p) => b.phonesE164.includes(p) && countDigits(p) >= 8);
const phoneMatch = !!sharedPhone;
if (phoneMatch) {
score += 50;
reasons.push('phone match');
}
const aNameNorm = (a.fullName ?? '').toLowerCase().trim();
const bNameNorm = (b.fullName ?? '').toLowerCase().trim();
const nameExactMatch = aNameNorm.length > 0 && aNameNorm === bNameNorm;
if (nameExactMatch) {
score += 20;
reasons.push('name match');
}
// Surname + given-name fuzzy. Only fires when names are NOT exactly
// equal — avoids double-counting with the rule above. Catches
// 'Constanzo' / 'Costanzo', 'Marc' / 'Marcus' etc. when other contact
// signals confirm them.
if (!nameExactMatch && a.surnameToken && b.surnameToken && a.surnameToken === b.surnameToken) {
const aGiven = (a.fullName ?? '').toLowerCase().split(/\s+/)[0] ?? '';
const bGiven = (b.fullName ?? '').toLowerCase().split(/\s+/)[0] ?? '';
if (aGiven && bGiven && levenshtein(aGiven, bGiven) <= 1) {
score += 15;
reasons.push('surname + given-name fuzzy match');
}
}
// ── Negative rules. ──────────────────────────────────────────────────────
// Same email but the two parties' phone numbers belong to different
// countries. Common when one inbox is shared by spouses / coworkers
// and the actual phone owners are distinct people. Don't auto-merge.
if (emailMatch && !phoneMatch && a.phonesE164.length > 0 && b.phonesE164.length > 0) {
const aCountries = phoneCountriesOf(a);
const bCountries = phoneCountriesOf(b);
const overlap = [...aCountries].some((c) => bCountries.has(c));
if (!overlap && aCountries.size > 0 && bCountries.size > 0) {
score -= 15;
reasons.push('phone country mismatch (negative)');
}
}
// Same name but no contact match. Two distinct people with the same
// name (common for "John Smith") sneak through name-based blocking;
// penalize so the score lands below the auto-merge threshold.
if (nameExactMatch && !emailMatch && !phoneMatch) {
score -= 20;
reasons.push('name match but no shared contact (negative)');
}
return {
candidate: b,
score: clamp(score, 0, 100),
reasons,
confidence: 'low', // assigned by caller after threshold lookup
};
}
// ─── Helpers ────────────────────────────────────────────────────────────────
function pushTo<K, V>(map: Map<K, V[]>, key: K, value: V): void {
const existing = map.get(key);
if (existing) {
existing.push(value);
} else {
map.set(key, [value]);
}
}
function classify(score: number, thresholds: DedupThresholds): MatchConfidence {
if (score >= thresholds.highScore) return 'high';
if (score >= thresholds.mediumScore) return 'medium';
return 'low';
}
function clamp(value: number, min: number, max: number): number {
if (value < min) return min;
if (value > max) return max;
return value;
}
function countDigits(s: string): number {
let count = 0;
for (let i = 0; i < s.length; i += 1) {
const code = s.charCodeAt(i);
if (code >= 48 && code <= 57) count += 1;
}
return count;
}
/**
* Resolve each phone in a candidate to its ISO country code (via
* libphonenumber-js). Cached per call; the surrounding caller doesn't
* batch so we accept the parse cost.
*/
function phoneCountriesOf(c: MatchCandidate): Set<string> {
const out = new Set<string>();
for (const p of c.phonesE164) {
const parsed = parsePhone(p);
if (parsed.country) out.add(parsed.country);
}
return out;
}

View File

@@ -0,0 +1,274 @@
/**
* Migration report writer — turns a `MigrationPlan` (from
* `migration-transform.ts`) into a CSV + a human-readable Markdown
* summary on disk under `.migration/<timestamp>/`.
*
* The CSV format is intentionally machine-friendly (one row per
* planned operation) so it can be diffed across runs and inspected
* by hand. The summary is designed for "open this in your editor and
* eyeball it for 5 minutes before --apply."
*/
import { promises as fs } from 'node:fs';
import path from 'node:path';
import type { MigrationPlan } from './migration-transform';
// ─── Output directory ───────────────────────────────────────────────────────
export interface ReportPaths {
rootDir: string;
csvPath: string;
summaryPath: string;
planJsonPath: string;
}
/** Resolve report paths relative to the worktree root. The timestamped
* directory is created lazily by `writeReport`. */
export function resolveReportPaths(
rootDir: string,
timestamp: string = new Date().toISOString().replace(/[:.]/g, '-'),
): ReportPaths {
const dir = path.join(rootDir, '.migration', timestamp);
return {
rootDir: dir,
csvPath: path.join(dir, 'report.csv'),
summaryPath: path.join(dir, 'summary.md'),
planJsonPath: path.join(dir, 'plan.json'),
};
}
// ─── CSV row shape ──────────────────────────────────────────────────────────
interface CsvRow {
op: string; // create_client / create_contact / create_interest / auto_link / flag / needs_review
reason: string;
source_id: string;
target_table: string;
target_value: string;
confidence: string;
manual_review: 'true' | 'false';
}
// Trivial CSV escape: quote any cell that contains comma / quote / newline,
// double up internal quotes per RFC 4180. No need for a dependency.
function csvEscape(s: string): string {
if (/[",\n\r]/.test(s)) {
return `"${s.replace(/"/g, '""')}"`;
}
return s;
}
function rowToCsvLine(r: CsvRow): string {
return [
r.op,
r.reason,
r.source_id,
r.target_table,
r.target_value,
r.confidence,
r.manual_review,
]
.map(csvEscape)
.join(',');
}
// ─── Build CSV ──────────────────────────────────────────────────────────────
export function buildCsv(plan: MigrationPlan): string {
const lines: string[] = [];
lines.push(
[
'op',
'reason',
'source_id',
'target_table',
'target_value',
'confidence',
'manual_review',
].join(','),
);
for (const client of plan.clients) {
lines.push(
rowToCsvLine({
op: 'create_client',
reason: client.sourceIds.length > 1 ? 'auto-merged cluster' : 'new',
source_id: client.sourceIds.join('|'),
target_table: 'clients.fullName',
target_value: client.fullName,
confidence: 'N/A',
manual_review: 'false',
}),
);
for (const c of client.contacts) {
lines.push(
rowToCsvLine({
op: 'create_contact',
reason: c.flagged ?? 'new',
source_id: client.sourceIds.join('|'),
target_table: `clientContacts.${c.channel}`,
target_value: c.value,
confidence: 'N/A',
manual_review: c.flagged ? 'true' : 'false',
}),
);
}
for (const a of client.addresses) {
lines.push(
rowToCsvLine({
op: 'create_address',
reason: 'address text present',
source_id: client.sourceIds.join('|'),
target_table: 'clientAddresses.countryIso',
target_value: a.countryIso ?? '(unresolved)',
confidence: a.countryConfidence ?? 'fallback',
manual_review: a.countryConfidence === 'fallback' || !a.countryIso ? 'true' : 'false',
}),
);
}
}
for (const interest of plan.interests) {
lines.push(
rowToCsvLine({
op: 'create_interest',
reason: `pipelineStage=${interest.pipelineStage}`,
source_id: String(interest.sourceId),
target_table: 'interests',
target_value: `${interest.berthMooringNumber ?? '(no berth)'} / ${interest.yachtName ?? '(no yacht)'}`,
confidence: 'N/A',
manual_review: 'false',
}),
);
}
for (const link of plan.autoLinks) {
lines.push(
rowToCsvLine({
op: 'auto_link',
reason: link.reasons.join(' + '),
source_id: `${link.leadSourceId}<-${link.mergedSourceIds.join(',')}`,
target_table: 'clients',
target_value: '(merged into lead)',
confidence: `score=${link.score}`,
manual_review: 'false',
}),
);
}
for (const pair of plan.needsReview) {
lines.push(
rowToCsvLine({
op: 'needs_review',
reason: pair.reasons.join(' + '),
source_id: `${pair.aSourceId}<->${pair.bSourceId}`,
target_table: 'clients',
target_value: '(human review required)',
confidence: `score=${pair.score}`,
manual_review: 'true',
}),
);
}
for (const flag of plan.flags) {
lines.push(
rowToCsvLine({
op: 'flag',
reason: flag.reason,
source_id: String(flag.sourceId),
target_table: flag.sourceTable,
target_value: JSON.stringify(flag.details ?? {}),
confidence: 'N/A',
manual_review: 'true',
}),
);
}
return lines.join('\n') + '\n';
}
// ─── Build summary markdown ─────────────────────────────────────────────────
export function buildSummary(plan: MigrationPlan, generatedAt: string): string {
const s = plan.stats;
const lines: string[] = [];
lines.push(`# Migration Dry-Run — ${generatedAt}`);
lines.push('');
lines.push('## Input');
lines.push(`- ${s.inputInterestRows} NocoDB Interests`);
lines.push(`- ${s.inputResidentialRows} NocoDB Residential Interests`);
lines.push('');
lines.push('## Outcome');
lines.push(`- ${s.outputClients} clients`);
lines.push(`- ${s.outputInterests} interests (one per source row, linked to deduped client)`);
lines.push(`- ${s.outputContacts} client_contacts`);
lines.push(`- ${s.outputAddresses} client_addresses`);
lines.push('');
lines.push('## Auto-linked clusters');
if (plan.autoLinks.length === 0) {
lines.push('_None — every input row maps to a unique client._');
} else {
for (const link of plan.autoLinks) {
const merged = link.mergedSourceIds.length;
lines.push(
`- Lead row \`${link.leadSourceId}\` ← merged ${merged} other row${merged === 1 ? '' : 's'} (\`${link.mergedSourceIds.join(', ')}\`) — score ${link.score} via ${link.reasons.join(' + ')}`,
);
}
}
lines.push('');
lines.push('## Pairs flagged for human review');
if (plan.needsReview.length === 0) {
lines.push('_None._');
} else {
for (const pair of plan.needsReview) {
lines.push(
`- Rows \`${pair.aSourceId}\`\`${pair.bSourceId}\` — score ${pair.score} (${pair.reasons.join(' + ')})`,
);
}
}
lines.push('');
lines.push('## Data quality flags');
if (plan.flags.length === 0) {
lines.push('_No quality issues._');
} else {
const byReason = new Map<string, number>();
for (const f of plan.flags) {
byReason.set(f.reason, (byReason.get(f.reason) ?? 0) + 1);
}
for (const [reason, count] of [...byReason].sort((a, b) => b[1] - a[1])) {
lines.push(`- **${count}× ${reason}**`);
}
lines.push('');
lines.push('### Detail');
for (const f of plan.flags.slice(0, 30)) {
lines.push(
`- \`${f.sourceTable}#${f.sourceId}\`: ${f.reason}${f.details ? `\`${JSON.stringify(f.details)}\`` : ''}`,
);
}
if (plan.flags.length > 30) {
lines.push(`- _… and ${plan.flags.length - 30} more (see report.csv for full list)_`);
}
}
lines.push('');
lines.push('## Next step');
lines.push('');
lines.push('Eyeball the auto-linked + flagged-for-review pairs above.');
lines.push('When satisfied, re-run the script with `--apply --report .migration/<this-dir>/`.');
lines.push('Apply will refuse to run if the source NocoDB has changed since this dry-run.');
return lines.join('\n') + '\n';
}
// ─── Write to disk ──────────────────────────────────────────────────────────
export async function writeReport(
paths: ReportPaths,
plan: MigrationPlan,
generatedAt: string,
): Promise<void> {
await fs.mkdir(paths.rootDir, { recursive: true });
await fs.writeFile(paths.csvPath, buildCsv(plan), 'utf-8');
await fs.writeFile(paths.summaryPath, buildSummary(plan, generatedAt), 'utf-8');
await fs.writeFile(paths.planJsonPath, JSON.stringify(plan, null, 2), 'utf-8');
}

View File

@@ -0,0 +1,576 @@
/**
* Pure transform: NocoDB snapshot → planned new-system entities + dedup result.
*
* Used by the migration script's `--dry-run` (to produce the report) and
* `--apply` (to actually write). Keeping this pure means the same code
* runs in both modes, in tests against the frozen fixture, and in the
* one-off CLI run against the live base.
*
* No side effects, no DB calls, no external services.
*/
import {
normalizeName,
normalizeEmail,
normalizePhone,
resolveCountry,
type NormalizedPhone,
} from './normalize';
import { findClientMatches, type MatchCandidate } from './find-matches';
import type { CountryCode } from '@/lib/i18n/countries';
import type { NocoDbRow, NocoDbSnapshot } from './nocodb-source';
// ─── Plan output ────────────────────────────────────────────────────────────
export interface PlannedClient {
/** Stable id derived from the deduped cluster's lead row. Used by the
* apply phase to reference newly-created clients before they exist
* in the DB. */
tempId: string;
/** Source row IDs that contributed to this client (one if no duplicates,
* many if dedup merged a cluster). */
sourceIds: number[];
fullName: string;
surnameToken?: string;
countryIso: CountryCode | null;
preferredContactMethod: string | null;
source: string | null;
contacts: PlannedContact[];
addresses: PlannedAddress[];
}
export interface PlannedContact {
channel: 'email' | 'phone' | 'whatsapp' | 'other';
value: string;
valueE164?: string | null;
valueCountry?: CountryCode | null;
isPrimary: boolean;
flagged?: string;
}
export interface PlannedAddress {
streetAddress: string | null;
city: string | null;
countryIso: CountryCode | null;
/** When confidence is low, the migration script flags the row for
* human review. */
countryConfidence: 'exact' | 'fuzzy' | 'city' | 'fallback' | null;
}
export interface PlannedInterest {
/** NocoDB row id this interest came from. */
sourceId: number;
/** tempId of the planned client this interest hangs off. */
clientTempId: string;
pipelineStage: string;
leadCategory: string | null;
source: string | null;
notes: string | null;
/** Mooring number; the apply phase resolves this to a berthId via the
* new-system Berths table. */
berthMooringNumber: string | null;
yachtName: string | null;
/** Date stamps for milestone columns. ISO strings if parseable. */
dateEoiSent: string | null;
dateEoiSigned: string | null;
dateDepositReceived: string | null;
dateContractSent: string | null;
dateContractSigned: string | null;
dateLastContact: string | null;
/** Documenso linkage carried forward when present so the document
* record can be stitched up downstream. */
documensoId: string | null;
}
export interface MigrationFlag {
sourceTable: 'interests' | 'residential_interests' | 'website_interest_submissions';
sourceId: number;
reason: string;
details?: Record<string, unknown>;
}
export interface MigrationPlan {
clients: PlannedClient[];
interests: PlannedInterest[];
flags: MigrationFlag[];
/** Pairs that the migration would auto-link (high score). */
autoLinks: Array<{
leadSourceId: number;
mergedSourceIds: number[];
score: number;
reasons: string[];
}>;
/** Pairs that need human review (medium score). Each pair shows up
* in the migration report; the user resolves before --apply. */
needsReview: Array<{ aSourceId: number; bSourceId: number; score: number; reasons: string[] }>;
stats: MigrationStats;
}
export interface MigrationStats {
inputInterestRows: number;
inputResidentialRows: number;
outputClients: number;
outputInterests: number;
outputContacts: number;
outputAddresses: number;
flaggedRows: number;
autoLinkedClusters: number;
needsReviewPairs: number;
}
export interface TransformOptions {
/** ISO country used when a phone has no prefix and the row has no
* Place of Residence. Defaults to AI (Anguilla / Port Nimara's home). */
defaultPhoneCountry: CountryCode;
/** Score thresholds for auto-link vs human review. Should match the
* per-port `system_settings` values once the runtime UI is in place. */
thresholds: {
autoLink: number;
needsReview: number;
};
}
const DEFAULT_OPTIONS: TransformOptions = {
defaultPhoneCountry: 'AI',
thresholds: { autoLink: 90, needsReview: 50 },
};
// ─── Stage mapping ──────────────────────────────────────────────────────────
const STAGE_MAP: Record<string, string> = {
'General Qualified Interest': 'open',
'Specific Qualified Interest': 'details_sent',
'EOI and NDA Sent': 'eoi_sent',
'Signed EOI and NDA': 'eoi_signed',
'Made Reservation': 'deposit_10pct',
'Contract Negotiation': 'contract_sent',
'Contract Negotiations Finalized': 'contract_sent',
'Contract Signed': 'contract_signed',
};
const LEAD_CATEGORY_MAP: Record<string, string> = {
General: 'general_interest',
'Friends and Family': 'general_interest',
};
const SOURCE_MAP: Record<string, string> = {
portal: 'website',
Form: 'website',
External: 'manual',
};
// ─── Date parsing ───────────────────────────────────────────────────────────
/**
* Parse a date the legacy NocoDB might have stored in DD-MM-YYYY,
* DD/MM/YYYY, YYYY-MM-DD, or ISO format. Returns ISO string or null.
*/
function parseFlexibleDate(input: unknown): string | null {
if (typeof input !== 'string' || input.trim() === '') return null;
const s = input.trim();
// Already ISO
if (/^\d{4}-\d{2}-\d{2}/.test(s)) {
const d = new Date(s);
return Number.isNaN(d.getTime()) ? null : d.toISOString();
}
// DD-MM-YYYY or DD/MM/YYYY
const m = s.match(/^(\d{1,2})[-/](\d{1,2})[-/](\d{4})$/);
if (m) {
const [, day, month, year] = m;
const iso = `${year}-${month!.padStart(2, '0')}-${day!.padStart(2, '0')}`;
const d = new Date(iso);
return Number.isNaN(d.getTime()) ? null : d.toISOString();
}
// Anything else: try Date constructor as a last resort
const d = new Date(s);
return Number.isNaN(d.getTime()) ? null : d.toISOString();
}
// ─── Main transform ─────────────────────────────────────────────────────────
/**
* Run the full transform pipeline against a NocoDB snapshot. Pure
* function — same input always produces the same plan.
*/
export function transformSnapshot(
snapshot: NocoDbSnapshot,
options: Partial<TransformOptions> = {},
): MigrationPlan {
const opts = { ...DEFAULT_OPTIONS, ...options };
const flags: MigrationFlag[] = [];
// Build per-row candidates first so we can run dedup before assigning
// tempIds (clients with multiple source rows merge into one tempId).
const perRow = snapshot.interests.map((row) => rowToCandidate(row, 'interests', opts, flags));
// Dedup pass 1: every row scored against every other row (within the
// same pool). The blocking strategy in `findClientMatches` keeps this
// cheap even for the full 252-row dataset.
const clusters = clusterByDedup(perRow, opts);
// Build the planned clients + interests from the clusters.
const clients: PlannedClient[] = [];
const interests: PlannedInterest[] = [];
const autoLinks: MigrationPlan['autoLinks'] = [];
const needsReview: MigrationPlan['needsReview'] = [];
for (const cluster of clusters) {
const lead = cluster.leadCandidate;
const tempId = `client-${lead.row.Id}`;
// Build the client record from the lead row, then merge in any
// contact info / address info from the other rows in the cluster.
const planned = buildPlannedClient(tempId, cluster, opts);
clients.push(planned);
// Each row in the cluster becomes its own interest record.
for (const member of cluster.members) {
const interest = buildPlannedInterest(member.row, tempId);
interests.push(interest);
}
if (cluster.members.length > 1) {
autoLinks.push({
leadSourceId: lead.row.Id,
mergedSourceIds: cluster.members.filter((m) => m !== lead).map((m) => m.row.Id),
score: cluster.maxScore,
reasons: cluster.reasons,
});
}
for (const pair of cluster.reviewPairs) {
needsReview.push(pair);
}
}
return {
clients,
interests,
flags,
autoLinks,
needsReview,
stats: {
inputInterestRows: snapshot.interests.length,
inputResidentialRows: snapshot.residentialInterests.length,
outputClients: clients.length,
outputInterests: interests.length,
outputContacts: clients.reduce((sum, c) => sum + c.contacts.length, 0),
outputAddresses: clients.reduce((sum, c) => sum + c.addresses.length, 0),
flaggedRows: flags.length,
autoLinkedClusters: autoLinks.length,
needsReviewPairs: needsReview.length,
},
};
}
// ─── Helpers ────────────────────────────────────────────────────────────────
interface RowCandidate {
row: NocoDbRow;
candidate: MatchCandidate;
/** Phone normalize result for the row's primary phone; used downstream
* to attach valueE164 + country to the planned contact. */
phoneResult: NormalizedPhone | null;
/** Country resolved from "Place of Residence". */
countryIso: CountryCode | null;
countryConfidence: 'exact' | 'fuzzy' | 'city' | null;
/** Normalized email or null. */
email: string | null;
/** Display name from `normalizeName`. */
displayName: string;
}
function rowToCandidate(
row: NocoDbRow,
sourceTable: MigrationFlag['sourceTable'],
opts: TransformOptions,
flags: MigrationFlag[],
): RowCandidate {
const rawName = (row['Full Name'] as string | undefined) ?? '';
const rawEmail = (row['Email Address'] as string | undefined) ?? '';
const rawPhone = (row['Phone Number'] as string | undefined) ?? '';
const rawCountry = (row['Place of Residence'] as string | undefined) ?? '';
const normName = normalizeName(rawName);
const email = normalizeEmail(rawEmail);
const country = resolveCountry(rawCountry);
const phoneCountry = country.iso ?? opts.defaultPhoneCountry;
const phoneResult = normalizePhone(rawPhone, phoneCountry as CountryCode);
// Surface anything weird so the report can show it.
if (rawPhone && !phoneResult?.e164) {
flags.push({
sourceTable,
sourceId: row.Id,
reason: phoneResult?.flagged ? `phone ${phoneResult.flagged}` : 'phone unparseable',
details: { rawPhone },
});
}
if (rawEmail && !email) {
flags.push({
sourceTable,
sourceId: row.Id,
reason: 'email invalid',
details: { rawEmail },
});
}
if (rawCountry && !country.iso) {
flags.push({
sourceTable,
sourceId: row.Id,
reason: 'country unresolved',
details: { rawCountry },
});
}
const candidate: MatchCandidate = {
id: String(row.Id),
fullName: normName.display || null,
surnameToken: normName.surnameToken ?? null,
emails: email ? [email] : [],
phonesE164: phoneResult?.e164 ? [phoneResult.e164] : [],
countryIso: country.iso ?? null,
};
return {
row,
candidate,
phoneResult,
countryIso: country.iso ?? null,
countryConfidence: country.confidence,
email,
displayName: normName.display,
};
}
interface Cluster {
/** The cluster's "lead" row (most complete + most recent). */
leadCandidate: RowCandidate;
members: RowCandidate[];
maxScore: number;
reasons: string[];
/** Pairs in this cluster that scored medium (need review). */
reviewPairs: Array<{ aSourceId: number; bSourceId: number; score: number; reasons: string[] }>;
}
function clusterByDedup(rows: RowCandidate[], opts: TransformOptions): Cluster[] {
// Use a union-find structure indexed by row id. Every pair with a
// score >= autoLink threshold gets unioned. Pairs in [needsReview,
// autoLink) accumulate onto the cluster's reviewPairs list — they're
// surfaced for human triage but not auto-merged.
const parent = new Map<string, string>();
for (const r of rows) parent.set(r.candidate.id, r.candidate.id);
const find = (id: string): string => {
let cur = id;
while (parent.get(cur) !== cur) {
const next = parent.get(cur)!;
parent.set(cur, parent.get(next)!); // path compression
cur = parent.get(cur)!;
}
return cur;
};
const union = (a: string, b: string) => {
const rootA = find(a);
const rootB = find(b);
if (rootA !== rootB) parent.set(rootA, rootB);
};
const clusterReasons = new Map<string, string[]>();
const clusterMaxScore = new Map<string, number>();
const clusterReviewPairs = new Map<string, Cluster['reviewPairs']>();
// Score every candidate against every other candidate. The find-matches
// function does its own blocking so this is cheap.
for (let i = 0; i < rows.length; i += 1) {
const left = rows[i]!;
const remainingPool = rows.slice(i + 1).map((r) => r.candidate);
if (remainingPool.length === 0) continue;
const matches = findClientMatches(left.candidate, remainingPool, {
highScore: opts.thresholds.autoLink,
mediumScore: opts.thresholds.needsReview,
});
for (const m of matches) {
if (m.score >= opts.thresholds.autoLink) {
union(left.candidate.id, m.candidate.id);
const root = find(left.candidate.id);
clusterMaxScore.set(root, Math.max(clusterMaxScore.get(root) ?? 0, m.score));
const existing = clusterReasons.get(root) ?? [];
for (const reason of m.reasons) {
if (!existing.includes(reason)) existing.push(reason);
}
clusterReasons.set(root, existing);
} else if (m.score >= opts.thresholds.needsReview) {
// Medium — track on whichever cluster `left` belongs to.
const root = find(left.candidate.id);
const list = clusterReviewPairs.get(root) ?? [];
list.push({
aSourceId: parseInt(left.candidate.id, 10),
bSourceId: parseInt(m.candidate.id, 10),
score: m.score,
reasons: m.reasons,
});
clusterReviewPairs.set(root, list);
}
}
}
// Group rows by their cluster root.
const byRoot = new Map<string, RowCandidate[]>();
for (const r of rows) {
const root = find(r.candidate.id);
const list = byRoot.get(root) ?? [];
list.push(r);
byRoot.set(root, list);
}
// Build cluster objects, choosing the most-complete row as the lead.
const clusters: Cluster[] = [];
for (const [root, members] of byRoot) {
const lead = pickLead(members);
clusters.push({
leadCandidate: lead,
members,
maxScore: clusterMaxScore.get(root) ?? 0,
reasons: clusterReasons.get(root) ?? [],
reviewPairs: clusterReviewPairs.get(root) ?? [],
});
}
return clusters;
}
function pickLead(rows: RowCandidate[]): RowCandidate {
// Pick the row with the most populated fields, breaking ties by
// recency (highest Id, since NocoDB IDs are monotonic).
return rows.reduce((best, current) => {
const bestScore = completenessScore(best);
const currentScore = completenessScore(current);
if (currentScore > bestScore) return current;
if (currentScore === bestScore && current.row.Id > best.row.Id) return current;
return best;
});
}
function completenessScore(r: RowCandidate): number {
let score = 0;
if (r.email) score += 1;
if (r.phoneResult?.e164) score += 1;
if (r.row['Address']) score += 0.5;
if (r.row['Yacht Name']) score += 0.5;
if (r.row['Source']) score += 0.25;
if (r.row['Lead Category']) score += 0.25;
if (r.row['Internal Notes']) score += 0.25;
return score;
}
function buildPlannedClient(
tempId: string,
cluster: Cluster,
opts: TransformOptions,
): PlannedClient {
const lead = cluster.leadCandidate;
// Collect distinct emails + phones from across the cluster — duplicate
// submissions often come with different contact methods we want to
// preserve as multiple rows in `client_contacts`.
const seenEmails = new Set<string>();
const seenPhones = new Set<string>();
const contacts: PlannedContact[] = [];
for (const member of cluster.members) {
if (member.email && !seenEmails.has(member.email)) {
seenEmails.add(member.email);
contacts.push({
channel: 'email',
value: member.email,
isPrimary: contacts.length === 0,
});
}
if (member.phoneResult?.e164 && !seenPhones.has(member.phoneResult.e164)) {
seenPhones.add(member.phoneResult.e164);
const isFirstPhone = !contacts.some((c) => c.channel === 'phone');
contacts.push({
channel: 'phone',
value: member.phoneResult.e164,
valueE164: member.phoneResult.e164,
valueCountry: member.phoneResult.country,
isPrimary: isFirstPhone && contacts.every((c) => !c.isPrimary || c.channel === 'email'),
flagged: member.phoneResult.flagged,
});
}
}
// Demote the email-primary if a more-completable phone exists.
// Simpler invariant: the first contact is primary unless the row
// explicitly preferred phone.
const preferredMethod = (lead.row['Contact Method Preferred'] as string | undefined)
?.toLowerCase()
?.trim();
// Address: only build if the lead row has a meaningful address text.
const rawAddress = (lead.row['Address'] as string | undefined)?.trim();
const addresses: PlannedAddress[] = [];
if (rawAddress) {
addresses.push({
streetAddress: rawAddress,
city: null,
countryIso: lead.countryIso ?? opts.defaultPhoneCountry,
countryConfidence: lead.countryConfidence ?? 'fallback',
});
}
const sourceFromRow = (lead.row['Source'] as string | undefined) ?? null;
const mappedSource = sourceFromRow ? (SOURCE_MAP[sourceFromRow] ?? 'manual') : null;
return {
tempId,
sourceIds: cluster.members.map((m) => m.row.Id),
fullName: lead.displayName,
surnameToken: lead.candidate.surnameToken ?? undefined,
countryIso: lead.countryIso,
preferredContactMethod: preferredMethod ?? null,
source: mappedSource,
contacts,
addresses,
};
}
function buildPlannedInterest(row: NocoDbRow, clientTempId: string): PlannedInterest {
const stage = (row['Sales Process Level'] as string | undefined) ?? '';
const cat = (row['Lead Category'] as string | undefined) ?? '';
const notesParts: string[] = [];
const internalNotes = row['Internal Notes'] as string | undefined;
const extraComments = row['Extra Comments'] as string | undefined;
if (internalNotes?.trim()) notesParts.push(internalNotes.trim());
if (extraComments?.trim()) notesParts.push(`Extra Comments: ${extraComments.trim()}`);
const berthSize = row['Berth Size Desired'] as string | undefined;
if (berthSize?.trim()) notesParts.push(`Berth size desired: ${berthSize.trim()}`);
return {
sourceId: row.Id,
clientTempId,
pipelineStage: STAGE_MAP[stage] ?? 'open',
leadCategory: LEAD_CATEGORY_MAP[cat] ?? null,
source: ((row['Source'] as string | undefined) ?? null) || null,
notes: notesParts.join('\n\n') || null,
berthMooringNumber: (row['Berth Number'] as string | undefined) ?? null,
yachtName: (() => {
const n = (row['Yacht Name'] as string | undefined)?.trim();
// Filter placeholder values used by sales reps for "we don't know yet".
if (!n) return null;
if (['TBC', 'Na', 'NA', 'na', 'N/A', 'TBD', 'tbd'].includes(n)) return null;
return n;
})(),
dateEoiSent: parseFlexibleDate(row['EOI Time Sent']),
dateEoiSigned: parseFlexibleDate(row['all_signed_notified_at'] ?? row['developerSignTime']),
dateDepositReceived: null, // not directly tracked in legacy schema
dateContractSent: parseFlexibleDate(row['Time LOI Sent']),
dateContractSigned: parseFlexibleDate(row['developerSignTime']),
dateLastContact: parseFlexibleDate(row['Created At'] ?? row['Date Added']),
documensoId: (row['documensoID'] as string | undefined) ?? null,
};
}

View File

@@ -0,0 +1,152 @@
/**
* Read-only adapter for the legacy NocoDB Port Nimara base.
*
* Used by the one-shot migration script (`scripts/migrate-from-nocodb.ts`)
* to pull every Interest, Residential Interest, and Website Submission
* row from the source-of-truth NocoDB tables. No mutations.
*
* Auth: `xc-token` header per NocoDB v2 API.
*
* The shape returned is a verbatim record of the row's fields — caller
* is responsible for mapping to the new schema via `nocodb-transform.ts`.
*/
import { z } from 'zod';
// ─── Configuration ──────────────────────────────────────────────────────────
const ConfigSchema = z.object({
url: z.string().url(),
token: z.string().min(1),
});
export interface NocoDbConfig {
url: string;
token: string;
}
export function loadNocoDbConfig(env: NodeJS.ProcessEnv = process.env): NocoDbConfig {
return ConfigSchema.parse({
url: env.NOCODB_URL,
token: env.NOCODB_TOKEN,
});
}
// ─── Table identifiers ──────────────────────────────────────────────────────
//
// These IDs are stable per the NocoDB base — they were captured during the
// 2026-05-03 audit and won't change unless the base is rebuilt. If the
// base is reset, regenerate them from `getTablesList`.
export const NOCO_TABLES = {
interests: 'mbs9hjauug4eseo',
residentialInterests: 'mscfpwwwjuds4nt',
websiteInterestSubmissions: 'mevkpcih67c6jsm',
websiteContactFormSubmissions: 'mxk5cd0pmwnwlcl',
websiteBerthEoiSupplements: 'mglmioo0ku8zgqj',
berths: 'mczgos9hr3oa9qc',
} as const;
// ─── HTTP shape ─────────────────────────────────────────────────────────────
interface NocoDbListResponse<T> {
list: T[];
pageInfo: {
totalRows: number;
page: number;
pageSize: number;
isFirstPage: boolean;
isLastPage: boolean;
};
}
/** A row's `Id` is always present. The rest of the fields vary per table. */
export type NocoDbRow = Record<string, unknown> & { Id: number };
// ─── Public API ─────────────────────────────────────────────────────────────
/**
* Fetch all rows from a NocoDB table. Auto-paginates until the API
* reports `isLastPage`. The legacy base is small (252 Interests rows
* being the largest table) so we keep this simple — no streaming.
*/
export async function fetchAllRows(
tableId: string,
config: NocoDbConfig,
pageSize = 250,
): Promise<NocoDbRow[]> {
const all: NocoDbRow[] = [];
let page = 1;
// Hard cap to prevent infinite-loop bugs if pageInfo lies. Each page
// pulls up to `pageSize` rows, so 200 pages * 250 = 50k rows is the
// maximum we'll ever fetch from one table.
const MAX_PAGES = 200;
while (page <= MAX_PAGES) {
const url = new URL(`${config.url}/api/v2/tables/${tableId}/records`);
url.searchParams.set('limit', String(pageSize));
url.searchParams.set('offset', String((page - 1) * pageSize));
const res = await fetch(url, {
headers: {
'xc-token': config.token,
accept: 'application/json',
},
});
if (!res.ok) {
throw new Error(
`NocoDB fetch failed: ${res.status} ${res.statusText} — table ${tableId} page ${page}`,
);
}
const json = (await res.json()) as NocoDbListResponse<NocoDbRow>;
all.push(...json.list);
if (json.pageInfo.isLastPage || json.list.length === 0) break;
page += 1;
}
return all;
}
/**
* Convenience snapshot — pulls every table the migration cares about
* in parallel. Returned shape is the input the transform layer expects.
*/
export interface NocoDbSnapshot {
interests: NocoDbRow[];
residentialInterests: NocoDbRow[];
websiteInterestSubmissions: NocoDbRow[];
websiteContactFormSubmissions: NocoDbRow[];
websiteBerthEoiSupplements: NocoDbRow[];
berths: NocoDbRow[];
fetchedAt: string;
}
export async function fetchSnapshot(config: NocoDbConfig): Promise<NocoDbSnapshot> {
const [
interests,
residentialInterests,
websiteInterestSubmissions,
websiteContactFormSubmissions,
websiteBerthEoiSupplements,
berths,
] = await Promise.all([
fetchAllRows(NOCO_TABLES.interests, config),
fetchAllRows(NOCO_TABLES.residentialInterests, config),
fetchAllRows(NOCO_TABLES.websiteInterestSubmissions, config),
fetchAllRows(NOCO_TABLES.websiteContactFormSubmissions, config),
fetchAllRows(NOCO_TABLES.websiteBerthEoiSupplements, config),
fetchAllRows(NOCO_TABLES.berths, config),
]);
return {
interests,
residentialInterests,
websiteInterestSubmissions,
websiteContactFormSubmissions,
websiteBerthEoiSupplements,
berths,
fetchedAt: new Date().toISOString(),
};
}

418
src/lib/dedup/normalize.ts Normal file
View File

@@ -0,0 +1,418 @@
/**
* Normalization helpers for the dedup pipeline.
*
* Pure functions (no DB, no React). Used by both the runtime at-create
* surfaces and the one-shot NocoDB migration script. Every transform
* here has a fixture in `tests/unit/dedup/normalize.test.ts` drawn from
* real dirty values observed in the legacy NocoDB Interests table.
*
* Design reference: docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §3.
*/
import { z } from 'zod';
import { ALL_COUNTRY_CODES, getCountryName, type CountryCode } from '@/lib/i18n/countries';
import { parsePhoneScriptSafe as parsePhone } from './phone-parse';
// ─── Names ──────────────────────────────────────────────────────────────────
/**
* Tokens that should stay lowercase mid-name. Covers the common Romance,
* Germanic, and Iberian particles seen in client records. The first token
* of a name is always title-cased even if it appears in this set.
*/
const PARTICLES: ReadonlySet<string> = new Set([
'van',
'von',
'de',
'del',
'da',
'das',
'do',
'dos',
'di',
'le',
'la',
'el',
'al',
'der',
'den',
'des',
'du',
'dalla',
'della',
'st',
'st.',
'y',
]);
export interface NormalizedName {
/** Human-readable form preserved for UI display. Trims, collapses
* whitespace, fixes case, but never destroys the user's intent —
* slash-with-company structure ("Daniel Wainstein / 7 Knots, LLC")
* is left intact. */
display: string;
/** Lowercased form for matching. */
normalized: string;
/** Last non-particle token, lowercased. Used as a blocking key by the
* dedup algorithm so we only compare candidates with similar surnames. */
surnameToken?: string;
}
/**
* Normalize a free-text full name. Trims and collapses whitespace,
* replaces \r/\n/\t with single spaces, intelligently title-cases
* ALL-CAPS surnames while keeping particles (van / de / dalla / etc.)
* lowercase mid-name, and preserves Irish O' surnames as O'Brien.
*
* If the input contains a `/` (slash-with-company structure like
* "Daniel Wainstein / 7 Knots, LLC"), the trailing company text is
* preserved verbatim — it's signal, not noise.
*/
export function normalizeName(raw: string | null | undefined): NormalizedName {
const safe = (raw ?? '').toString();
// Replace \r, \n, \t with single spaces, then collapse runs of whitespace.
const cleaned = safe
.replace(/[\r\n\t]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
if (!cleaned) {
return { display: '', normalized: '', surnameToken: undefined };
}
// Slash-with-company: title-case the part before the slash, leave the
// company segment untouched (it's typically already a brand we shouldn't
// mangle: "SAS TIKI", "7 Knots, LLC").
const slashIdx = cleaned.indexOf('/');
let displayCore: string;
if (slashIdx !== -1) {
const personPart = cleaned.slice(0, slashIdx).trim();
const companyPart = cleaned.slice(slashIdx + 1).trim();
displayCore = `${titleCaseTokens(personPart)} / ${companyPart}`;
} else {
displayCore = titleCaseTokens(cleaned);
}
const display = displayCore;
const normalized = display.toLowerCase();
const surnameToken = computeSurnameToken(slashIdx !== -1 ? cleaned.slice(0, slashIdx) : cleaned);
return { display, normalized, surnameToken };
}
function titleCaseTokens(s: string): string {
const tokens = s.split(' ').filter(Boolean);
if (tokens.length === 0) return '';
return tokens.map((tok, idx) => titleCaseOneToken(tok, idx === 0)).join(' ');
}
function titleCaseOneToken(token: string, isFirst: boolean): string {
if (!token) return '';
const lower = token.toLowerCase();
if (!isFirst && PARTICLES.has(lower)) return lower;
// O'Brien / D'Angelo / l'Estrange — capitalize the segment after each
// apostrophe so a lowercased input round-trips to readable Irish caps.
if (lower.includes("'")) {
return lower
.split("'")
.map((part) => (part.length > 0 ? part[0]!.toUpperCase() + part.slice(1) : part))
.join("'");
}
return lower[0]!.toUpperCase() + lower.slice(1);
}
function computeSurnameToken(personPart: string): string | undefined {
const cleaned = personPart
.replace(/[\r\n\t]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
if (!cleaned) return undefined;
const tokens = cleaned.split(' ').map((t) => t.toLowerCase());
// Walk from the right past particles to find the last "real" surname token.
for (let i = tokens.length - 1; i >= 0; i -= 1) {
const tok = tokens[i]!;
if (!PARTICLES.has(tok)) return tok;
}
// All tokens are particles? Fall back to the last token verbatim.
return tokens[tokens.length - 1];
}
// ─── Emails ─────────────────────────────────────────────────────────────────
const emailSchema = z.string().email();
/**
* Normalize a free-text email. Trims + lowercases. Returns null for empty
* or malformed input — caller decides whether to flag, store, or drop.
*
* Plus-aliases (`user+tag@domain.com`) are NOT stripped: they're real
* distinct addresses, and stripping them would auto-merge legitimately
* separate accounts.
*/
export function normalizeEmail(raw: string | null | undefined): string | null {
if (raw == null) return null;
const trimmed = raw.toString().trim().toLowerCase();
if (!trimmed) return null;
const result = emailSchema.safeParse(trimmed);
return result.success ? trimmed : null;
}
// ─── Phones ─────────────────────────────────────────────────────────────────
export type PhoneFlag = 'multi_number' | 'placeholder' | 'unparseable';
export interface NormalizedPhone {
/** Canonical E.164 form, e.g. '+15742740548'. Null when unparseable
* or flagged as placeholder. */
e164: string | null;
/** ISO-3166-1 alpha-2 of the country the number was parsed against. */
country: CountryCode | null;
/** Display-friendly international format. Useful for migration reports. */
display: string | null;
/** Set when the input had a quirk worth surfacing in the migration
* report or runtime audit log. Absent on clean parses. */
flagged?: PhoneFlag;
}
/**
* Normalize a raw user-entered phone string for comparison + storage.
*
* Pipeline:
* 1. strip leading apostrophe (spreadsheet copy-paste artifact)
* 2. strip \r / \n / \t (real values seen in NocoDB had carriage returns)
* 3. detect multi-number fields ("+33611111111;+33622222222",
* "0677580750/0690511494") — flag and take first segment
* 4. strip whitespace, dots, dashes, parens, single quotes
* 5. convert leading "00" → "+" (international dialling code)
* 6. detect placeholder fakes (8+ consecutive zeros) — flag, return null e164
* 7. parse via libphonenumber-js
* 8. on parse failure or invalid number → flag 'unparseable'
*
* Returns null for empty inputs (cheaper to short-circuit than to wrap).
*/
export function normalizePhone(
raw: string | null | undefined,
defaultCountry?: CountryCode,
): NormalizedPhone | null {
if (raw == null) return null;
let cleaned = raw.toString().trim();
if (!cleaned) return null;
// 1. Spreadsheet apostrophe prefix.
if (cleaned.startsWith("'")) cleaned = cleaned.slice(1);
// 2. Strip carriage returns / newlines / tabs.
cleaned = cleaned.replace(/[\r\n\t]/g, '');
// 3. Multi-number detection — split on /, ;, , (in that order of priority).
let flagged: PhoneFlag | undefined;
if (/[/;,]/.test(cleaned)) {
flagged = 'multi_number';
cleaned = cleaned.split(/[/;,]/)[0]!.trim();
}
// 4. Strip whitespace, dots, dashes, parens. Keep + for E.164 prefix.
cleaned = cleaned.replace(/[\s.\-()]/g, '');
if (!cleaned) return { e164: null, country: null, display: null, flagged: 'unparseable' };
// 5. 00 international prefix → +.
if (cleaned.startsWith('00')) {
cleaned = '+' + cleaned.slice(2);
}
// 6. Placeholder fakes — runs of 8+ consecutive zeros, e.g. +447000000000.
if (/0{8,}/.test(cleaned)) {
return { e164: null, country: null, display: null, flagged: 'placeholder' };
}
// 7. Parse via the existing i18n helper (libphonenumber-js under the hood).
const parsed = parsePhone(cleaned, defaultCountry);
if (!parsed.e164) {
// Couldn't even produce a canonical form — genuinely garbage.
return { e164: null, country: null, display: null, flagged: 'unparseable' };
}
// Note: we deliberately don't gate on `parsed.isValid`. The
// libphonenumber-js `min` build returns isValid=false for many real
// numbers (NANP territories share +1; some country metadata is
// truncated). For dedup we only need a canonical E.164 string to
// compare; strict validity is the form layer's problem, not ours.
// If a string-only test (e.g. \"abc-not-a-phone\") gets here, parse
// returns null e164 anyway and the branch above handles it.
return {
e164: parsed.e164,
country: parsed.country,
display: parsed.international,
flagged,
};
}
// ─── Countries ──────────────────────────────────────────────────────────────
/**
* Aliases for canonical country names that don't match
* `Intl.DisplayNames(en)` output verbatim. Keys are pre-normalized
* (lowercase, diacritic-free, hyphens/dots → spaces, collapsed whitespace).
*
* Kept opinionated and small — only entries we've actually seen in legacy
* data. Adding a new alias is cheap; trying to be exhaustive isn't.
*/
const COUNTRY_ALIASES: Record<string, CountryCode> = {
// Generic abbreviations
usa: 'US',
us: 'US',
uk: 'GB',
// Saint-Barthélemy variants seen in production
'saint barthelemy': 'BL',
'saint barth': 'BL',
'st barth': 'BL',
'st barths': 'BL',
'st barthelemy': 'BL',
// Caribbean short-forms whose canonical Intl names are awkward
// ("Antigua and Barbuda", "Saint Vincent and the Grenadines", etc.).
antigua: 'AG',
barbuda: 'AG',
'st kitts': 'KN',
'saint kitts': 'KN',
nevis: 'KN',
};
/**
* High-frequency cities → country, used as a last-resort fallback when
* exact / alias / fuzzy country matching all miss. Keys are normalized.
*
* Order matters: an entry's key is also matched as a substring of the
* input ("Sag Harbor Y" contains "sag harbor"), so the most specific
* city appears first to avoid a wrong partial hit.
*/
const CITY_TO_COUNTRY: Record<string, CountryCode> = {
'kansas city': 'US',
'sag harbor': 'US',
'new york': 'US',
// Cities that came out unresolved from the 2026-05-03 NocoDB dry-run.
// Using lowercase (post-normalize keys).
boston: 'US',
tampa: 'US',
'fort lauderdale': 'US',
'port jefferson': 'US',
nantucket: 'US',
// US state abbreviations that often appear standalone or as suffix:
' fl': 'US',
' ma': 'US',
' ny': 'US',
' tx': 'US',
' ca': 'US',
// International
london: 'GB',
paris: 'FR',
};
export type CountryConfidence = 'exact' | 'fuzzy' | 'city';
export interface ResolvedCountry {
iso: CountryCode | null;
confidence: CountryConfidence | null;
}
/**
* Map free-text country / region input to an ISO-3166-1 alpha-2 code.
*
* Lookup order: alias → exact (vs. all locale country names) → city →
* fuzzy (Levenshtein ≤ 2). Anything beyond fuzzy returns null and the
* migration script flags the row for human review.
*/
export function resolveCountry(text: string | null | undefined): ResolvedCountry {
if (text == null) return { iso: null, confidence: null };
const normalized = normalizeForLookup(text.toString());
if (!normalized) return { iso: null, confidence: null };
// 1. Aliases — covers USA / UK / St Barth and friends.
const alias = COUNTRY_ALIASES[normalized];
if (alias) return { iso: alias, confidence: 'exact' };
// 2. Exact match against Intl-derived country names. We compare against
// diacritic-stripped + lowercased canonical names so 'United States'
// and 'united states' both resolve.
for (const code of ALL_COUNTRY_CODES) {
const cleanName = normalizeForLookup(getCountryName(code, 'en'));
if (cleanName === normalized) return { iso: code, confidence: 'exact' };
}
// 3. City → country fallback, exact or substring.
const cityExact = CITY_TO_COUNTRY[normalized];
if (cityExact) return { iso: cityExact, confidence: 'city' };
for (const [city, iso] of Object.entries(CITY_TO_COUNTRY)) {
if (normalized.includes(city)) return { iso, confidence: 'city' };
}
// 4. Fuzzy fallback (Levenshtein ≤ 2). Skipped for short inputs because
// a 4-char string like "Mars" sits within distance 2 of multiple
// short country names (Mali, Laos, Iran, …) — false-positive city.
if (normalized.length >= 6) {
let bestCode: CountryCode | null = null;
let bestDistance = Number.POSITIVE_INFINITY;
for (const code of ALL_COUNTRY_CODES) {
const cleanName = normalizeForLookup(getCountryName(code, 'en'));
const d = levenshtein(cleanName, normalized);
if (d < bestDistance) {
bestDistance = d;
bestCode = code;
if (d === 0) break;
}
}
if (bestDistance <= 2 && bestCode) {
return { iso: bestCode, confidence: 'fuzzy' };
}
}
return { iso: null, confidence: null };
}
/** Lowercase + strip diacritics + replace hyphens/dots with spaces +
* collapse whitespace. Used by both the input and the canonical-name
* side of the country comparison so they meet on the same shape. */
function normalizeForLookup(s: string): string {
return s
.normalize('NFD')
.replace(/[̀-ͯ]/g, '')
.toLowerCase()
.replace(/[-.]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
// ─── Levenshtein ────────────────────────────────────────────────────────────
/**
* Standard iterative Levenshtein. Used by the country fuzzy match and by
* the dedup algorithm's name-similarity rule. Allocates O(n*m) so callers
* shouldn't run it against pathological inputs — the dedup blocking
* strategy keeps comparison sets small.
*
* Exported so the find-matches module can reuse the same implementation
* without relying on an external dep.
*/
export function levenshtein(a: string, b: string): number {
if (a === b) return 0;
if (!a) return b.length;
if (!b) return a.length;
const m = a.length;
const n = b.length;
// Two rolling rows is enough — keeps memory at O(n) instead of O(n*m).
let prev = new Array<number>(n + 1);
let curr = new Array<number>(n + 1);
for (let j = 0; j <= n; j += 1) prev[j] = j;
for (let i = 1; i <= m; i += 1) {
curr[0] = i;
for (let j = 1; j <= n; j += 1) {
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
curr[j] = Math.min(curr[j - 1]! + 1, prev[j]! + 1, prev[j - 1]! + cost);
}
[prev, curr] = [curr, prev];
}
return prev[n]!;
}

View File

@@ -0,0 +1,66 @@
/**
* Script-safe phone parser.
*
* The project's existing `src/lib/i18n/phone.ts` imports from
* `libphonenumber-js`, which under Node 25 + tsx loader hits a
* metadata-shape interop bug (`{ default }` wrapping the JSON). It
* works fine in Next.js + vitest, but a `tsx scripts/...` invocation
* blows up.
*
* This wrapper bypasses the bundled `index.cjs.js` and calls
* `libphonenumber-js/core` directly with metadata loaded as raw JSON.
* Same surface as the i18n helper; usable from both runtimes.
*
* Used by the dedup library's `normalizePhone`. The runtime UI still
* imports `i18n/phone` directly — no reason to touch a working path.
*/
// eslint-disable-next-line @typescript-eslint/no-require-imports
const core: typeof import('libphonenumber-js/core') = require('libphonenumber-js/core');
// Load the JSON directly. The bundled `index.cjs.js` does the same
// thing but its `require('../metadata.min.json')` hits a Node 25 ESM
// interop bug that wraps the JSON in `{ default }`. Importing the
// JSON file by absolute path through the package root sidesteps it.
// eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/no-explicit-any
const metadata: any = require('libphonenumber-js/metadata.min.json');
import type { CountryCode } from '@/lib/i18n/countries';
export interface ParsedPhone {
e164: string | null;
country: CountryCode | null;
national: string | null;
international: string | null;
isValid: boolean;
}
const EMPTY: ParsedPhone = {
e164: null,
country: null,
national: null,
international: null,
isValid: false,
};
export function parsePhoneScriptSafe(raw: string, defaultCountry?: CountryCode): ParsedPhone {
const trimmed = raw.trim();
if (!trimmed) return EMPTY;
try {
// The core entry expects its own `CountryCode` type from
// libphonenumber-js. Our `CountryCode` type is the same set of ISO
// alpha-2 codes (we re-derive from the same Intl source) so this
// cast is structural-equivalent, not lossy.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const parsed = core.parsePhoneNumberFromString(trimmed, defaultCountry as any, metadata);
if (!parsed) return EMPTY;
return {
e164: parsed.number,
country: (parsed.country ?? null) as CountryCode | null,
national: parsed.formatNational(),
international: parsed.formatInternational(),
isValid: parsed.isValid(),
};
} catch {
return EMPTY;
}
}

View File

@@ -0,0 +1,393 @@
/**
* Client merge service — atomically combines two client records.
*
* Used by:
* - /admin/duplicates review queue (when an admin confirms a merge)
* - the at-create suggestion path ("use existing client") — though
* that path uses the lighter `attachInterestToClient` and never
* actually merges two pre-existing clients
* - the migration script's `--apply` (eventually)
*
* Reversibility: every merge writes a `client_merge_log` row containing
* the loser's full pre-merge state. Within the configured undo window
* (default 7 days, see `dedup_undo_window_days` in system_settings) a
* follow-up `unmergeClients` call can restore the loser and detach
* everything that was reattached.
*
* Design reference: docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §6.
*/
import { and, eq, sql } from 'drizzle-orm';
import { db } from '@/lib/db';
import {
clients,
clientContacts,
clientAddresses,
clientNotes,
clientTags,
clientRelationships,
clientMergeLog,
clientMergeCandidates,
} from '@/lib/db/schema/clients';
import { interests } from '@/lib/db/schema/interests';
import { berthReservations } from '@/lib/db/schema/reservations';
import { auditLogs } from '@/lib/db/schema/system';
// ─── Public API ─────────────────────────────────────────────────────────────
export interface MergeFieldChoices {
/** Per-field overrides — `winner` keeps the surviving client's value;
* `loser` copies the loser's value over. Fields not listed default
* to `winner` (no change). */
fullName?: 'winner' | 'loser';
nationalityIso?: 'winner' | 'loser';
preferredContactMethod?: 'winner' | 'loser';
preferredLanguage?: 'winner' | 'loser';
timezone?: 'winner' | 'loser';
source?: 'winner' | 'loser';
sourceDetails?: 'winner' | 'loser';
}
export interface MergeOptions {
winnerId: string;
loserId: string;
/** ID of the user performing the merge (for audit + clientMergeLog.mergedBy). */
mergedBy: string;
/** Per-field choice overrides. Multi-value fields (contacts, addresses,
* notes, tags) are always preserved from both sides; this only
* affects single-value scalar fields on the `clients` row. */
fieldChoices?: MergeFieldChoices;
}
export interface MergeResult {
mergeLogId: string;
movedRows: {
interests: number;
contacts: number;
addresses: number;
notes: number;
tags: number;
relationships: number;
reservations: number;
};
}
/**
* Atomically merge `loserId` into `winnerId`. Throws if:
* - either id doesn't exist or belongs to a different port
* - the loser has already been merged (mergedIntoClientId set)
* - the winner is itself archived
*/
export async function mergeClients(opts: MergeOptions): Promise<MergeResult> {
if (opts.winnerId === opts.loserId) {
throw new Error('Cannot merge a client into itself');
}
return await db.transaction(async (tx) => {
// ── Lock both rows for the duration. The first FOR UPDATE that
// arrives wins; a concurrent second merge of the same loser
// will see `mergedIntoClientId` set and bail. ──────────────────────
const [winnerRow] = await tx
.select()
.from(clients)
.where(eq(clients.id, opts.winnerId))
.for('update');
const [loserRow] = await tx
.select()
.from(clients)
.where(eq(clients.id, opts.loserId))
.for('update');
if (!winnerRow) throw new Error(`Winner client ${opts.winnerId} not found`);
if (!loserRow) throw new Error(`Loser client ${opts.loserId} not found`);
if (winnerRow.portId !== loserRow.portId) {
throw new Error('Cannot merge clients across different ports');
}
if (loserRow.mergedIntoClientId) {
throw new Error(`Loser ${opts.loserId} already merged into ${loserRow.mergedIntoClientId}`);
}
if (winnerRow.archivedAt) {
throw new Error('Cannot merge into an archived client');
}
// ── Snapshot the loser's full state before any mutation. Used by
// `unmergeClients` to restore within the undo window. ──────────────
const loserContacts = await tx
.select()
.from(clientContacts)
.where(eq(clientContacts.clientId, opts.loserId));
const loserAddresses = await tx
.select()
.from(clientAddresses)
.where(eq(clientAddresses.clientId, opts.loserId));
const loserNotes = await tx
.select()
.from(clientNotes)
.where(eq(clientNotes.clientId, opts.loserId));
const loserTags = await tx
.select()
.from(clientTags)
.where(eq(clientTags.clientId, opts.loserId));
const loserInterests = await tx
.select({ id: interests.id })
.from(interests)
.where(eq(interests.clientId, opts.loserId));
const loserReservations = await tx
.select({ id: berthReservations.id })
.from(berthReservations)
.where(eq(berthReservations.clientId, opts.loserId));
const loserRelationshipsAsA = await tx
.select()
.from(clientRelationships)
.where(eq(clientRelationships.clientAId, opts.loserId));
const loserRelationshipsAsB = await tx
.select()
.from(clientRelationships)
.where(eq(clientRelationships.clientBId, opts.loserId));
const snapshot = {
loser: loserRow,
contacts: loserContacts,
addresses: loserAddresses,
notes: loserNotes,
tags: loserTags,
interests: loserInterests.map((r) => r.id),
reservations: loserReservations.map((r) => r.id),
relationshipsAsA: loserRelationshipsAsA,
relationshipsAsB: loserRelationshipsAsB,
fieldChoices: opts.fieldChoices ?? {},
mergedAt: new Date().toISOString(),
};
// ── Apply field choices on the winner. We only touch fields the
// caller explicitly asked to copy from the loser; everything
// else stays as-is. ────────────────────────────────────────────────
const fieldUpdates: Partial<typeof winnerRow> = {};
if (opts.fieldChoices?.fullName === 'loser') fieldUpdates.fullName = loserRow.fullName;
if (opts.fieldChoices?.nationalityIso === 'loser')
fieldUpdates.nationalityIso = loserRow.nationalityIso;
if (opts.fieldChoices?.preferredContactMethod === 'loser')
fieldUpdates.preferredContactMethod = loserRow.preferredContactMethod;
if (opts.fieldChoices?.preferredLanguage === 'loser')
fieldUpdates.preferredLanguage = loserRow.preferredLanguage;
if (opts.fieldChoices?.timezone === 'loser') fieldUpdates.timezone = loserRow.timezone;
if (opts.fieldChoices?.source === 'loser') fieldUpdates.source = loserRow.source;
if (opts.fieldChoices?.sourceDetails === 'loser')
fieldUpdates.sourceDetails = loserRow.sourceDetails;
if (Object.keys(fieldUpdates).length > 0) {
await tx
.update(clients)
.set({ ...fieldUpdates, updatedAt: new Date() })
.where(eq(clients.id, opts.winnerId));
}
// ── Reattach. Each table that points at the loser via clientId
// gets pointed at the winner instead. ─────────────────────────────
const movedInterests = (
await tx
.update(interests)
.set({ clientId: opts.winnerId, updatedAt: new Date() })
.where(eq(interests.clientId, opts.loserId))
.returning({ id: interests.id })
).length;
const movedReservations = (
await tx
.update(berthReservations)
.set({ clientId: opts.winnerId, updatedAt: new Date() })
.where(eq(berthReservations.clientId, opts.loserId))
.returning({ id: berthReservations.id })
).length;
// Contacts: move loser's contacts to winner, but DON'T duplicate any
// already-present (channel, value) pair. Loser-only ones get
// demoted to non-primary so the winner's primary stays intact.
const winnerContacts = await tx
.select({ channel: clientContacts.channel, value: clientContacts.value })
.from(clientContacts)
.where(eq(clientContacts.clientId, opts.winnerId));
const winnerContactKeys = new Set(
winnerContacts.map((c) => `${c.channel}::${c.value.toLowerCase()}`),
);
let movedContacts = 0;
for (const c of loserContacts) {
const key = `${c.channel}::${c.value.toLowerCase()}`;
if (winnerContactKeys.has(key)) {
// Winner already has this contact — drop loser's row (cascade
// will clean up when loser is archived). But we keep snapshot
// so undo restores it.
continue;
}
await tx
.update(clientContacts)
.set({ clientId: opts.winnerId, isPrimary: false, updatedAt: new Date() })
.where(eq(clientContacts.id, c.id));
movedContacts += 1;
}
// Addresses: same shape as contacts, but uniqueness is harder to
// detect cleanly (free-text street). Just move them all and let the
// user dedupe in the UI later.
const movedAddresses = (
await tx
.update(clientAddresses)
.set({ clientId: opts.winnerId, isPrimary: false, updatedAt: new Date() })
.where(eq(clientAddresses.clientId, opts.loserId))
.returning({ id: clientAddresses.id })
).length;
const movedNotes = (
await tx
.update(clientNotes)
.set({ clientId: opts.winnerId, updatedAt: new Date() })
.where(eq(clientNotes.clientId, opts.loserId))
.returning({ id: clientNotes.id })
).length;
// Tags: copy any loser-only tag to the winner; drop overlap.
const winnerTags = await tx
.select({ tagId: clientTags.tagId })
.from(clientTags)
.where(eq(clientTags.clientId, opts.winnerId));
const winnerTagSet = new Set(winnerTags.map((t) => t.tagId));
let movedTags = 0;
for (const t of loserTags) {
if (!winnerTagSet.has(t.tagId)) {
await tx.insert(clientTags).values({ clientId: opts.winnerId, tagId: t.tagId });
movedTags += 1;
}
}
await tx.delete(clientTags).where(eq(clientTags.clientId, opts.loserId));
// Relationships: rewrite each FK side to point at the winner. Keep
// both sides regardless — even if A and B both end up as the same
// person, the row is preserved for audit; the UI hides self-loops.
const movedRelationships =
(
await tx
.update(clientRelationships)
.set({ clientAId: opts.winnerId })
.where(eq(clientRelationships.clientAId, opts.loserId))
.returning({ id: clientRelationships.id })
).length +
(
await tx
.update(clientRelationships)
.set({ clientBId: opts.winnerId })
.where(eq(clientRelationships.clientBId, opts.loserId))
.returning({ id: clientRelationships.id })
).length;
// ── Archive the loser. Row stays in DB for the undo window;
// `mergedIntoClientId` is the redirect pointer for any stragglers
// (links / direct queries / saved views). ──────────────────────────
await tx
.update(clients)
.set({
archivedAt: new Date(),
mergedIntoClientId: opts.winnerId,
updatedAt: new Date(),
})
.where(eq(clients.id, opts.loserId));
// ── Mark any open merge candidate row for this pair as resolved. ───
await tx
.update(clientMergeCandidates)
.set({
status: 'merged',
resolvedAt: new Date(),
resolvedBy: opts.mergedBy,
})
.where(
and(
eq(clientMergeCandidates.portId, winnerRow.portId),
// pair stored in canonical order — match either direction
sql`(
(${clientMergeCandidates.clientAId} = ${opts.winnerId}
AND ${clientMergeCandidates.clientBId} = ${opts.loserId})
OR
(${clientMergeCandidates.clientAId} = ${opts.loserId}
AND ${clientMergeCandidates.clientBId} = ${opts.winnerId})
)`,
),
);
// ── Write the merge log + audit log. ────────────────────────────────
const [logRow] = await tx
.insert(clientMergeLog)
.values({
portId: winnerRow.portId,
survivingClientId: opts.winnerId,
mergedClientId: opts.loserId,
mergedBy: opts.mergedBy,
mergeDetails: snapshot,
})
.returning({ id: clientMergeLog.id });
await tx.insert(auditLogs).values({
portId: winnerRow.portId,
userId: opts.mergedBy,
entityType: 'client',
entityId: opts.winnerId,
action: 'merge',
newValue: {
loserId: opts.loserId,
loserName: loserRow.fullName,
movedInterests,
movedReservations,
movedContacts,
movedAddresses,
},
});
return {
mergeLogId: logRow!.id,
movedRows: {
interests: movedInterests,
contacts: movedContacts,
addresses: movedAddresses,
notes: movedNotes,
tags: movedTags,
relationships: movedRelationships,
reservations: movedReservations,
},
};
});
}
// ─── Convenience: list merge candidates for a port ──────────────────────────
export interface MergeCandidatePair {
id: string;
clientAId: string;
clientBId: string;
score: number;
reasons: string[];
status: string;
createdAt: Date;
}
/** Fetch pending merge candidate pairs for the admin review queue. */
export async function listPendingMergeCandidates(portId: string): Promise<MergeCandidatePair[]> {
const rows = await db
.select()
.from(clientMergeCandidates)
.where(
and(eq(clientMergeCandidates.portId, portId), eq(clientMergeCandidates.status, 'pending')),
)
.orderBy(sql`${clientMergeCandidates.score} DESC`);
return rows.map((r) => ({
id: r.id,
clientAId: r.clientAId,
clientBId: r.clientBId,
score: r.score,
reasons: Array.isArray(r.reasons) ? (r.reasons as string[]) : [],
status: r.status,
createdAt: r.createdAt,
}));
}