fix(migration): NocoDB import safety + dedup helpers + lead-source backfill

migration-apply: residential client + interest inserts now wrap in db.transaction so a partial failure can't leave an orphan client row without its interest (or vice versa). migration-transform: buildPlannedDocument returns null when there are no signers so the apply pass doesn't try to send a Documenso envelope without recipients. mapDocumentStatus gets an explicit "Awaiting Further Details" branch that no longer auto-promotes via stale sign-time fields. parseFlexibleDate handles ISO and DD-MM-YYYY inputs uniformly. backfill-legacy-lead-source: chunk UPDATE WHERE clause now isNull(source) on top of the inArray match, so a re-run can't overwrite a more accurate source written between batches. Adds 235 lines of vitest coverage on migration-transform. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 22:56:18 +02:00
parent 089f4a67a4
commit d62822c284
9 changed files with 938 additions and 47 deletions
--- a/src/lib/dedup/normalize.ts
+++ b/src/lib/dedup/normalize.ts
@@ -48,7 +48,7 @@ const PARTICLES: ReadonlySet<string> = new Set([

 export interface NormalizedName {
  /** Human-readable form preserved for UI display. Trims, collapses
-   *  whitespace, fixes case, but never destroys the user's intent —
+   *  whitespace, fixes case, but never destroys the user's intent -
   *  slash-with-company structure ("Daniel Wainstein / 7 Knots, LLC")
   *  is left intact. */
  display: string;
@@ -67,7 +67,7 @@ export interface NormalizedName {
 *
 * If the input contains a `/` (slash-with-company structure like
 * "Daniel Wainstein / 7 Knots, LLC"), the trailing company text is
- * preserved verbatim — it's signal, not noise.
+ * preserved verbatim - it's signal, not noise.
 */
 export function normalizeName(raw: string | null | undefined): NormalizedName {
  const safe = (raw ?? '').toString();
@@ -111,7 +111,7 @@ function titleCaseOneToken(token: string, isFirst: boolean): string {
  if (!token) return '';
  const lower = token.toLowerCase();
  if (!isFirst && PARTICLES.has(lower)) return lower;
-  // O'Brien / D'Angelo / l'Estrange — capitalize the segment after each
+  // O'Brien / D'Angelo / l'Estrange - capitalize the segment after each
  // apostrophe so a lowercased input round-trips to readable Irish caps.
  if (lower.includes("'")) {
    return lower
@@ -144,7 +144,7 @@ const emailSchema = z.string().email();

 /**
 * Normalize a free-text email. Trims + lowercases. Returns null for empty
- * or malformed input — caller decides whether to flag, store, or drop.
+ * or malformed input - caller decides whether to flag, store, or drop.
 *
 * Plus-aliases (`user+tag@domain.com`) are NOT stripped: they're real
 * distinct addresses, and stripping them would auto-merge legitimately
@@ -182,10 +182,10 @@ export interface NormalizedPhone {
 *   1. strip leading apostrophe (spreadsheet copy-paste artifact)
 *   2. strip \r / \n / \t (real values seen in NocoDB had carriage returns)
 *   3. detect multi-number fields ("+33611111111;+33622222222",
- *      "0677580750/0690511494") — flag and take first segment
+ *      "0677580750/0690511494") - flag and take first segment
 *   4. strip whitespace, dots, dashes, parens, single quotes
 *   5. convert leading "00" → "+" (international dialling code)
- *   6. detect placeholder fakes (8+ consecutive zeros) — flag, return null e164
+ *   6. detect placeholder fakes (8+ consecutive zeros) - flag, return null e164
 *   7. parse via libphonenumber-js
 *   8. on parse failure or invalid number → flag 'unparseable'
 *
@@ -205,7 +205,7 @@ export function normalizePhone(
  // 2. Strip carriage returns / newlines / tabs.
  cleaned = cleaned.replace(/[\r\n\t]/g, '');

-  // 3. Multi-number detection — split on /, ;, , (in that order of priority).
+  // 3. Multi-number detection - split on /, ;, , (in that order of priority).
  let flagged: PhoneFlag | undefined;
  if (/[/;,]/.test(cleaned)) {
    flagged = 'multi_number';
@@ -221,7 +221,7 @@ export function normalizePhone(
    cleaned = '+' + cleaned.slice(2);
  }

-  // 6. Placeholder fakes — runs of 8+ consecutive zeros, e.g. +447000000000.
+  // 6. Placeholder fakes - runs of 8+ consecutive zeros, e.g. +447000000000.
  if (/0{8,}/.test(cleaned)) {
    return { e164: null, country: null, display: null, flagged: 'placeholder' };
  }
@@ -229,7 +229,7 @@ export function normalizePhone(
  // 7. Parse via the existing i18n helper (libphonenumber-js under the hood).
  const parsed = parsePhone(cleaned, defaultCountry);
  if (!parsed.e164) {
-    // Couldn't even produce a canonical form — genuinely garbage.
+    // Couldn't even produce a canonical form - genuinely garbage.
    return { e164: null, country: null, display: null, flagged: 'unparseable' };
  }

@@ -255,7 +255,7 @@ export function normalizePhone(
 * `Intl.DisplayNames(en)` output verbatim. Keys are pre-normalized
 * (lowercase, diacritic-free, hyphens/dots → spaces, collapsed whitespace).
 *
- * Kept opinionated and small — only entries we've actually seen in legacy
+ * Kept opinionated and small - only entries we've actually seen in legacy
 * data. Adding a new alias is cheap; trying to be exhaustive isn't.
 */
 const COUNTRY_ALIASES: Record<string, CountryCode> = {
@@ -327,7 +327,7 @@ export function resolveCountry(text: string | null | undefined): ResolvedCountry
  const normalized = normalizeForLookup(text.toString());
  if (!normalized) return { iso: null, confidence: null };

-  // 1. Aliases — covers USA / UK / St Barth and friends.
+  // 1. Aliases - covers USA / UK / St Barth and friends.
  const alias = COUNTRY_ALIASES[normalized];
  if (alias) return { iso: alias, confidence: 'exact' };

@@ -348,7 +348,7 @@ export function resolveCountry(text: string | null | undefined): ResolvedCountry

  // 4. Fuzzy fallback (Levenshtein ≤ 2). Skipped for short inputs because
  //    a 4-char string like "Mars" sits within distance 2 of multiple
-  //    short country names (Mali, Laos, Iran, …) — false-positive city.
+  //    short country names (Mali, Laos, Iran, …) - false-positive city.
  if (normalized.length >= 6) {
    let bestCode: CountryCode | null = null;
    let bestDistance = Number.POSITIVE_INFINITY;
@@ -387,7 +387,7 @@ function normalizeForLookup(s: string): string {
 /**
 * Standard iterative Levenshtein. Used by the country fuzzy match and by
 * the dedup algorithm's name-similarity rule. Allocates O(n*m) so callers
- * shouldn't run it against pathological inputs — the dedup blocking
+ * shouldn't run it against pathological inputs - the dedup blocking
 * strategy keeps comparison sets small.
 *
 * Exported so the find-matches module can reuse the same implementation
@@ -400,7 +400,7 @@ export function levenshtein(a: string, b: string): number {

  const m = a.length;
  const n = b.length;
-  // Two rolling rows is enough — keeps memory at O(n) instead of O(n*m).
+  // Two rolling rows is enough - keeps memory at O(n) instead of O(n*m).
  let prev = new Array<number>(n + 1);
  let curr = new Array<number>(n + 1);
  for (let j = 0; j <= n; j += 1) prev[j] = j;