fix(migration): NocoDB import safety + dedup helpers + lead-source backfill
migration-apply: residential client + interest inserts now wrap in db.transaction so a partial failure can't leave an orphan client row without its interest (or vice versa). migration-transform: buildPlannedDocument returns null when there are no signers so the apply pass doesn't try to send a Documenso envelope without recipients. mapDocumentStatus gets an explicit "Awaiting Further Details" branch that no longer auto-promotes via stale sign-time fields. parseFlexibleDate handles ISO and DD-MM-YYYY inputs uniformly. backfill-legacy-lead-source: chunk UPDATE WHERE clause now isNull(source) on top of the inArray match, so a re-run can't overwrite a more accurate source written between batches. Adds 235 lines of vitest coverage on migration-transform. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -48,7 +48,7 @@ const PARTICLES: ReadonlySet<string> = new Set([
|
||||
|
||||
export interface NormalizedName {
|
||||
/** Human-readable form preserved for UI display. Trims, collapses
|
||||
* whitespace, fixes case, but never destroys the user's intent —
|
||||
* whitespace, fixes case, but never destroys the user's intent -
|
||||
* slash-with-company structure ("Daniel Wainstein / 7 Knots, LLC")
|
||||
* is left intact. */
|
||||
display: string;
|
||||
@@ -67,7 +67,7 @@ export interface NormalizedName {
|
||||
*
|
||||
* If the input contains a `/` (slash-with-company structure like
|
||||
* "Daniel Wainstein / 7 Knots, LLC"), the trailing company text is
|
||||
* preserved verbatim — it's signal, not noise.
|
||||
* preserved verbatim - it's signal, not noise.
|
||||
*/
|
||||
export function normalizeName(raw: string | null | undefined): NormalizedName {
|
||||
const safe = (raw ?? '').toString();
|
||||
@@ -111,7 +111,7 @@ function titleCaseOneToken(token: string, isFirst: boolean): string {
|
||||
if (!token) return '';
|
||||
const lower = token.toLowerCase();
|
||||
if (!isFirst && PARTICLES.has(lower)) return lower;
|
||||
// O'Brien / D'Angelo / l'Estrange — capitalize the segment after each
|
||||
// O'Brien / D'Angelo / l'Estrange - capitalize the segment after each
|
||||
// apostrophe so a lowercased input round-trips to readable Irish caps.
|
||||
if (lower.includes("'")) {
|
||||
return lower
|
||||
@@ -144,7 +144,7 @@ const emailSchema = z.string().email();
|
||||
|
||||
/**
|
||||
* Normalize a free-text email. Trims + lowercases. Returns null for empty
|
||||
* or malformed input — caller decides whether to flag, store, or drop.
|
||||
* or malformed input - caller decides whether to flag, store, or drop.
|
||||
*
|
||||
* Plus-aliases (`user+tag@domain.com`) are NOT stripped: they're real
|
||||
* distinct addresses, and stripping them would auto-merge legitimately
|
||||
@@ -182,10 +182,10 @@ export interface NormalizedPhone {
|
||||
* 1. strip leading apostrophe (spreadsheet copy-paste artifact)
|
||||
* 2. strip \r / \n / \t (real values seen in NocoDB had carriage returns)
|
||||
* 3. detect multi-number fields ("+33611111111;+33622222222",
|
||||
* "0677580750/0690511494") — flag and take first segment
|
||||
* "0677580750/0690511494") - flag and take first segment
|
||||
* 4. strip whitespace, dots, dashes, parens, single quotes
|
||||
* 5. convert leading "00" → "+" (international dialling code)
|
||||
* 6. detect placeholder fakes (8+ consecutive zeros) — flag, return null e164
|
||||
* 6. detect placeholder fakes (8+ consecutive zeros) - flag, return null e164
|
||||
* 7. parse via libphonenumber-js
|
||||
* 8. on parse failure or invalid number → flag 'unparseable'
|
||||
*
|
||||
@@ -205,7 +205,7 @@ export function normalizePhone(
|
||||
// 2. Strip carriage returns / newlines / tabs.
|
||||
cleaned = cleaned.replace(/[\r\n\t]/g, '');
|
||||
|
||||
// 3. Multi-number detection — split on /, ;, , (in that order of priority).
|
||||
// 3. Multi-number detection - split on /, ;, , (in that order of priority).
|
||||
let flagged: PhoneFlag | undefined;
|
||||
if (/[/;,]/.test(cleaned)) {
|
||||
flagged = 'multi_number';
|
||||
@@ -221,7 +221,7 @@ export function normalizePhone(
|
||||
cleaned = '+' + cleaned.slice(2);
|
||||
}
|
||||
|
||||
// 6. Placeholder fakes — runs of 8+ consecutive zeros, e.g. +447000000000.
|
||||
// 6. Placeholder fakes - runs of 8+ consecutive zeros, e.g. +447000000000.
|
||||
if (/0{8,}/.test(cleaned)) {
|
||||
return { e164: null, country: null, display: null, flagged: 'placeholder' };
|
||||
}
|
||||
@@ -229,7 +229,7 @@ export function normalizePhone(
|
||||
// 7. Parse via the existing i18n helper (libphonenumber-js under the hood).
|
||||
const parsed = parsePhone(cleaned, defaultCountry);
|
||||
if (!parsed.e164) {
|
||||
// Couldn't even produce a canonical form — genuinely garbage.
|
||||
// Couldn't even produce a canonical form - genuinely garbage.
|
||||
return { e164: null, country: null, display: null, flagged: 'unparseable' };
|
||||
}
|
||||
|
||||
@@ -255,7 +255,7 @@ export function normalizePhone(
|
||||
* `Intl.DisplayNames(en)` output verbatim. Keys are pre-normalized
|
||||
* (lowercase, diacritic-free, hyphens/dots → spaces, collapsed whitespace).
|
||||
*
|
||||
* Kept opinionated and small — only entries we've actually seen in legacy
|
||||
* Kept opinionated and small - only entries we've actually seen in legacy
|
||||
* data. Adding a new alias is cheap; trying to be exhaustive isn't.
|
||||
*/
|
||||
const COUNTRY_ALIASES: Record<string, CountryCode> = {
|
||||
@@ -327,7 +327,7 @@ export function resolveCountry(text: string | null | undefined): ResolvedCountry
|
||||
const normalized = normalizeForLookup(text.toString());
|
||||
if (!normalized) return { iso: null, confidence: null };
|
||||
|
||||
// 1. Aliases — covers USA / UK / St Barth and friends.
|
||||
// 1. Aliases - covers USA / UK / St Barth and friends.
|
||||
const alias = COUNTRY_ALIASES[normalized];
|
||||
if (alias) return { iso: alias, confidence: 'exact' };
|
||||
|
||||
@@ -348,7 +348,7 @@ export function resolveCountry(text: string | null | undefined): ResolvedCountry
|
||||
|
||||
// 4. Fuzzy fallback (Levenshtein ≤ 2). Skipped for short inputs because
|
||||
// a 4-char string like "Mars" sits within distance 2 of multiple
|
||||
// short country names (Mali, Laos, Iran, …) — false-positive city.
|
||||
// short country names (Mali, Laos, Iran, …) - false-positive city.
|
||||
if (normalized.length >= 6) {
|
||||
let bestCode: CountryCode | null = null;
|
||||
let bestDistance = Number.POSITIVE_INFINITY;
|
||||
@@ -387,7 +387,7 @@ function normalizeForLookup(s: string): string {
|
||||
/**
|
||||
* Standard iterative Levenshtein. Used by the country fuzzy match and by
|
||||
* the dedup algorithm's name-similarity rule. Allocates O(n*m) so callers
|
||||
* shouldn't run it against pathological inputs — the dedup blocking
|
||||
* shouldn't run it against pathological inputs - the dedup blocking
|
||||
* strategy keeps comparison sets small.
|
||||
*
|
||||
* Exported so the find-matches module can reuse the same implementation
|
||||
@@ -400,7 +400,7 @@ export function levenshtein(a: string, b: string): number {
|
||||
|
||||
const m = a.length;
|
||||
const n = b.length;
|
||||
// Two rolling rows is enough — keeps memory at O(n) instead of O(n*m).
|
||||
// Two rolling rows is enough - keeps memory at O(n) instead of O(n*m).
|
||||
let prev = new Array<number>(n + 1);
|
||||
let curr = new Array<number>(n + 1);
|
||||
for (let j = 0; j <= n; j += 1) prev[j] = j;
|
||||
|
||||
Reference in New Issue
Block a user