tests/unit/dedup/normalize.test.ts

/**
 * Normalization library — unit tests.
 *
 * Every fixture here comes from real dirty values observed in the legacy
 * NocoDB Interests table during the 2026-05-03 audit (see
 * docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §1.3).
 * The point is regression-prevention: if any of these patterns ever
 * stops normalizing the way it should, dedup quality silently drops.
 */
import { describe, expect, it } from 'vitest';

import {
  normalizeName,
  normalizeEmail,
  normalizePhone,
  resolveCountry,
} from '@/lib/dedup/normalize';

describe('normalizeName', () => {
  it('returns null fields for empty / null input', () => {
    expect(normalizeName('')).toEqual({ display: '', normalized: '', surnameToken: undefined });
    expect(normalizeName('   ')).toEqual({
      display: '',
      normalized: '',
      surnameToken: undefined,
    });
  });

  it('trims leading/trailing whitespace', () => {
    expect(normalizeName('  Marcus Laurent  ')).toMatchObject({
      display: 'Marcus Laurent',
      normalized: 'marcus laurent',
    });
  });

  it('collapses repeated internal whitespace to a single space', () => {
    // From real data: "Arthur  Matthews" (#183), "Corinne  Roche" (#208).
    expect(normalizeName('Arthur  Matthews').display).toBe('Arthur Matthews');
    expect(normalizeName('Corinne   Roche').display).toBe('Corinne Roche');
  });

  it('replaces embedded carriage returns and newlines with single spaces', () => {
    // From real data: "Andrei \nVAGNANOV" (#178), "Daniel\r PRZEDBORSKI" (#175).
    expect(normalizeName('Andrei \nVAGNANOV').display).toBe('Andrei Vagnanov');
    expect(normalizeName('Daniel\r PRZEDBORSKI').display).toBe('Daniel Przedborski');
  });

  it('title-cases ALL-CAPS surnames while keeping given name title-cased', () => {
    // From real data: "Jona ANDERSEN" (#232), "Duane SALTSGAVER" (#227),
    // "Marcos DALLA PRIA" (#165).
    expect(normalizeName('Jona ANDERSEN').display).toBe('Jona Andersen');
    expect(normalizeName('Duane SALTSGAVER').display).toBe('Duane Saltsgaver');
    // Particle 'dalla' stays lowercase mid-name.
    expect(normalizeName('Marcos DALLA PRIA').display).toBe('Marcos dalla Pria');
  });

  it('title-cases lowercased entries', () => {
    // From real data: "antony amaral" (#665), "david rosenbloom" (#239),
    // "john Tickner" (#247).
    expect(normalizeName('antony amaral').display).toBe('Antony Amaral');
    expect(normalizeName('david rosenbloom').display).toBe('David Rosenbloom');
    expect(normalizeName('john Tickner').display).toBe('John Tickner');
  });

  it('keeps Romance and Germanic particles lowercase mid-name', () => {
    // From real data: "Olav van Velsen" (#526), "Bruno Joyerot" (#18),
    // "OLIVIER DAIN" (#677). Also synthetic "Carla de la Cruz".
    expect(normalizeName('Olav van Velsen').display).toBe('Olav van Velsen');
    expect(normalizeName('Carla de la Cruz').display).toBe('Carla de la Cruz');
    expect(normalizeName('OLIVIER DAIN').display).toBe('Olivier Dain');
  });

  it('preserves O‘-prefixed Irish surnames as title-case', () => {
    expect(normalizeName("liam o'brien").display).toBe("Liam O'Brien");
  });

  it('keeps the slash-with-company structure intact', () => {
    // From real data: "Daniel Wainstein / 7 Knots, LLC" (#637),
    // "Bruno Joyerot / SAS TIKI" (#18).
    expect(normalizeName('Daniel Wainstein / 7 Knots, LLC').display).toBe(
      'Daniel Wainstein / 7 Knots, LLC',
    );
    expect(normalizeName('Bruno Joyerot / SAS TIKI').display).toBe('Bruno Joyerot / SAS TIKI');
  });

  it('exposes the last non-particle token as surnameToken (lowercase) for blocking', () => {
    expect(normalizeName('Marcus Laurent').surnameToken).toBe('laurent');
    expect(normalizeName('Olav van Velsen').surnameToken).toBe('velsen');
    expect(normalizeName('Carla de la Cruz').surnameToken).toBe('cruz');
    expect(normalizeName("Liam O'Brien").surnameToken).toBe("o'brien");
  });

  it('handles single-token names — surnameToken is the only token', () => {
    expect(normalizeName('Madonna').surnameToken).toBe('madonna');
  });

  it('produces a normalized form that is always lowercase', () => {
    expect(normalizeName('Andrei VAGNANOV').normalized).toBe('andrei vagnanov');
    expect(normalizeName('Daniel Wainstein / 7 Knots, LLC').normalized).toBe(
      'daniel wainstein / 7 knots, llc',
    );
  });
});

describe('normalizeEmail', () => {
  it('returns null for empty / null inputs', () => {
    expect(normalizeEmail('')).toBeNull();
    expect(normalizeEmail('   ')).toBeNull();
  });

  it('lowercases and trims', () => {
    // From real data: "Arthur@laser-align.com" vs "arthur@laser-align.com" (#183/#686).
    expect(normalizeEmail('Arthur@laser-align.com')).toBe('arthur@laser-align.com');
    expect(normalizeEmail('  marcus@example.com  ')).toBe('marcus@example.com');
  });

  it('lowercases capitalized localparts', () => {
    // From real data: "Bmalone850@gmail.com" (#489), "Hef355@yahoo.com" (#533),
    // "Donclaytonmusic@gmail.com" (#679).
    expect(normalizeEmail('Bmalone850@gmail.com')).toBe('bmalone850@gmail.com');
    expect(normalizeEmail('Hef355@yahoo.com')).toBe('hef355@yahoo.com');
  });

  it('preserves plus-aliases — both legitimate and tricks', () => {
    // Per design §3.2: "+aliases" are not stripped. Compare by full localpart.
    expect(normalizeEmail('marcus+sales@example.com')).toBe('marcus+sales@example.com');
  });

  it('returns null for invalid email shapes', () => {
    expect(normalizeEmail('not-an-email')).toBeNull();
    expect(normalizeEmail('@example.com')).toBeNull();
    expect(normalizeEmail('user@')).toBeNull();
    expect(normalizeEmail('user@.com')).toBeNull();
  });
});

describe('normalizePhone', () => {
  it('returns null for empty / whitespace / null', () => {
    expect(normalizePhone('', 'AI')).toBeNull();
    expect(normalizePhone('   ', 'AI')).toBeNull();
  });

  it('parses a plain E.164 number', () => {
    expect(normalizePhone('+15742740548', 'US')).toMatchObject({
      e164: '+15742740548',
      country: 'US',
    });
  });

  it('strips embedded carriage returns and trailing whitespace', () => {
    // From real data: "+1-264-235-8840\r" (#19), "+1-264-772-3272\r" (#20).
    const out = normalizePhone('+1-264-235-8840\r', 'AI');
    expect(out?.e164).toBe('+12642358840');
  });

  it('strips dashes, dots, parens, single quotes, spaces in a single pass', () => {
    // From real data: "'+1.214.603.4235" (#205), "574-274-0548" (#236),
    // "+1-264-235-8840" (#19), "+1 (212) 555-0123" (synthetic).
    expect(normalizePhone("'+1.214.603.4235", 'US')?.e164).toBe('+12146034235');
    expect(normalizePhone('574-274-0548', 'US')?.e164).toBe('+15742740548');
    expect(normalizePhone('+1 (212) 555-0123', 'US')?.e164).toBe('+12125550123');
  });

  it('converts a leading 00 prefix to + (international dialling)', () => {
    // From real data: "00447956657022" (#216), "0033651381036" (#702).
    expect(normalizePhone('00447956657022', 'GB')?.e164).toBe('+447956657022');
    expect(normalizePhone('0033651381036', 'FR')?.e164).toBe('+33651381036');
  });

  it('uses defaultCountry when input has no international prefix', () => {
    // From real data: "0690699699" (#203, French local), "0651381036" (#701).
    expect(normalizePhone('0690699699', 'FR')?.e164).toBe('+33690699699');
    expect(normalizePhone('0651381036', 'FR')?.e164).toBe('+33651381036');
  });

  it('returns null when there is no prefix AND no defaultCountry', () => {
    // The migration script flags these for human review.
    const out = normalizePhone('5742740548');
    expect(out?.e164 ?? null).toBeNull();
  });

  it('flags placeholder all-zeros numbers and returns null', () => {
    // From real data: "+447000000000" (#641, "Milos Vitkovic" — clearly fake).
    const out = normalizePhone('+447000000000', 'GB');
    expect(out?.flagged).toBe('placeholder');
    expect(out?.e164).toBeNull();
  });

  it('flags multi-number fields and uses the first segment', () => {
    // From real data: "0677580750/0690511494" (#209). Other separators: ; ,
    const slash = normalizePhone('0677580750/0690511494', 'FR');
    expect(slash?.flagged).toBe('multi_number');
    expect(slash?.e164).toBe('+33677580750');

    const semi = normalizePhone('+33611111111;+33622222222', 'FR');
    expect(semi?.flagged).toBe('multi_number');
    expect(semi?.e164).toBe('+33611111111');
  });

  it('flags genuinely unparseable input as `unparseable`', () => {
    const out = normalizePhone('xyz-not-a-phone', 'US');
    expect(out?.flagged).toBe('unparseable');
    expect(out?.e164).toBeNull();
  });

  it('strips an apostrophe-prefix without breaking the parse', () => {
    // From real data: leading "'" copy-pasted from spreadsheets escapes
    // numeric-cell coercion. Should be invisible to dedup.
    expect(normalizePhone("'0690699699", 'FR')?.e164).toBe('+33690699699');
  });

  it('returns the country alongside the E.164 form', () => {
    expect(normalizePhone('+33690699699', 'FR')).toMatchObject({
      e164: '+33690699699',
      country: 'FR',
    });
  });
});

describe('resolveCountry', () => {
  it('returns null for empty / nullish input', () => {
    expect(resolveCountry('')).toEqual({ iso: null, confidence: null });
    expect(resolveCountry('   ')).toEqual({ iso: null, confidence: null });
  });

  it('exact-matches a canonical English country name', () => {
    expect(resolveCountry('Anguilla')).toEqual({ iso: 'AI', confidence: 'exact' });
    expect(resolveCountry('United Kingdom')).toEqual({ iso: 'GB', confidence: 'exact' });
    expect(resolveCountry('United States')).toEqual({ iso: 'US', confidence: 'exact' });
  });

  it('matches case-insensitively', () => {
    expect(resolveCountry('anguilla').iso).toBe('AI');
    expect(resolveCountry('UNITED KINGDOM').iso).toBe('GB');
  });

  it('matches values with surrounding whitespace', () => {
    expect(resolveCountry('  United States  ').iso).toBe('US');
  });

  it('handles diacritic variants of Saint-Barthélemy', () => {
    // From real data: "Saint barthelemy" (#203), "St Barth" (#208), "Saint-Barthélemy".
    expect(resolveCountry('Saint-Barthélemy').iso).toBe('BL');
    expect(resolveCountry('Saint Barthelemy').iso).toBe('BL');
    expect(resolveCountry('saint barthelemy').iso).toBe('BL');
    expect(resolveCountry('St Barth').iso).toBe('BL');
  });

  it('resolves common abbreviations', () => {
    expect(resolveCountry('USA').iso).toBe('US');
    expect(resolveCountry('UK').iso).toBe('GB');
  });

  it('falls back to a city → country mapping for high-frequency cities', () => {
    // From real data: "Kansas City" (#198), "Sag Harbor Y" (#239).
    expect(resolveCountry('Kansas City').iso).toBe('US');
    expect(resolveCountry('Sag Harbor Y').iso).toBe('US');
  });

  it('marks the confidence tier appropriately', () => {
    expect(resolveCountry('Anguilla').confidence).toBe('exact');
    expect(resolveCountry('Kansas City').confidence).toBe('city');
  });

  it('returns null + null for unresolvable values', () => {
    // Migration script flags these for human review rather than guessing.
    expect(resolveCountry('asdfghjkl xyz')).toEqual({ iso: null, confidence: null });
    expect(resolveCountry('Mars')).toEqual({ iso: null, confidence: null });
  });
});
-												feat(dedup): normalization + match-finding library (P1)

The pure-logic spine of the client deduplication system spec'd in
docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md.
Two modules, JSX-free, vitest-tested against fixtures drawn directly
from real dirty values observed in the legacy NocoDB Interests audit.

src/lib/dedup/normalize.ts
- normalizeName: trims whitespace, replaces \r/\n/\t, intelligently
  title-cases ALL-CAPS surnames while keeping particles (van / de /
  dalla / etc.) lowercase mid-name. Preserves Irish O' surnames and
  the "slash-with-company" structure ("Daniel Wainstein / 7 Knots,
  LLC") seen in production. Returns a surnameToken (lowercased last
  non-particle token) for use as a dedup blocking key.
- normalizeEmail: trim + lowercase + zod email validation. Plus-aliases
  preserved; null on invalid.
- normalizePhone: pre-cleans the input (strips spreadsheet apostrophes,
  carriage returns, dots/dashes/parens, converts 00 prefix to +) then
  delegates to libphonenumber-js. Detects multi-number fields ("a/b",
  "a;b") and placeholder fakes (8+ consecutive zeros, e.g.
  +447000000000). Flags every quirk so the migration report and runtime
  audit log can surface it.
- resolveCountry: maps free-text country/region input to ISO-3166-1
  alpha-2 via alias → exact (vs. Intl-derived names) → city → fuzzy
  (Levenshtein ≤ 2). Fuzzy is gated by length so 4-char inputs ("Mars")
  don't false-positive against short country names.
- levenshtein: standard iterative implementation, exported for reuse
  by find-matches.

src/lib/dedup/find-matches.ts
- findClientMatches: builds three blocking indexes off the pool (email
  / phone / surname-token), gathers the comparison set via union, and
  scores each candidate via the rule set in design §4.2:
    Email match            +60
    Phone E.164 match      +50  (≥ 8 digits, excludes placeholder zeros)
    Name exact match       +20
    Surname + given fuzzy  +15  (Levenshtein ≤ 1)
    Negative: shared email but different phone country  −15
    Negative: name match but no shared contact          −20
  Score is clamped to [0,100]. Confidence tier ('high' / 'medium' /
  'low') is derived from configurable thresholds passed in by the
  caller — defaults are highScore=90, mediumScore=50.

tests/unit/dedup/normalize.test.ts (38 cases)
Every dirty-data pattern from design §1.3 has a fixture: carriage
returns in names, ALL-CAPS surnames, lowercase entries, particles,
slash-with-company, plus-aliases, capitalized email localparts,
spreadsheet-apostrophe phones, multi-number phones, placeholder
phones, 00-prefix phones, French/UK local-format phones,
Saint-Barthélemy diacritic variants, Kansas City fallback.

tests/unit/dedup/find-matches.test.ts (12 cases)
Each duplicate cluster from design §1.2 has a test:
- Pattern A (Deepak Ramchandani — pure double-submit) → high
- Pattern B (Howard Wiarda — phone format variance) → high
- Pattern C (Nicolas Ruiz — name capitalization) → high
- Pattern D (Chris/Christopher Allen — name shortening) → high
- Pattern E (Christopher Camazou — typo on resubmit) → high or medium
- Pattern E (Constanzo/Costanzo — surname typo, multi-yacht) → high
- Pattern F (Etiennette Clamouze — same name, different country) →
  must NOT auto-merge
- Pattern F (Bruno+Bruce — shared household contact) → no match
- Negative evidence (same email, different phone country) → medium
- Blocking (no shared keys → 0 matches)
- Sort order (high before low)
- Empty pool

Total: 50 new tests, all green. Zero changes to runtime behavior or
schema; unblocks P2 (runtime surfaces) and P3 (NocoDB migration).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-03 14:28:59 +02:00
+								/**
 								 * Normalization library — unit tests.
 								 *
 								 * Every fixture here comes from real dirty values observed in the legacy
 								 * NocoDB Interests table during the 2026-05-03 audit (see
 								 * docs/superpowers/specs/2026-05-03-dedup-and-migration-design.md §1.3).
 								 * The point is regression-prevention: if any of these patterns ever
 								 * stops normalizing the way it should, dedup quality silently drops.
 								 */
 								import { describe, expect, it } from 'vitest';
 								import {
 								  normalizeName,
 								  normalizeEmail,
 								  normalizePhone,
 								  resolveCountry,
 								} from '@/lib/dedup/normalize';
 								describe('normalizeName', () => {
 								  it('returns null fields for empty / null input', () => {
 								    expect(normalizeName('')).toEqual({ display: '', normalized: '', surnameToken: undefined });
 								    expect(normalizeName('   ')).toEqual({
 								      display: '',
 								      normalized: '',
 								      surnameToken: undefined,
 								    });
 								  });
 								  it('trims leading/trailing whitespace', () => {
 								    expect(normalizeName('  Marcus Laurent  ')).toMatchObject({
 								      display: 'Marcus Laurent',
 								      normalized: 'marcus laurent',
 								    });
 								  });
 								  it('collapses repeated internal whitespace to a single space', () => {
 								    // From real data: "Arthur  Matthews" (#183), "Corinne  Roche" (#208).
 								    expect(normalizeName('Arthur  Matthews').display).toBe('Arthur Matthews');
 								    expect(normalizeName('Corinne   Roche').display).toBe('Corinne Roche');
 								  });
 								  it('replaces embedded carriage returns and newlines with single spaces', () => {
 								    // From real data: "Andrei \nVAGNANOV" (#178), "Daniel\r PRZEDBORSKI" (#175).
 								    expect(normalizeName('Andrei \nVAGNANOV').display).toBe('Andrei Vagnanov');
 								    expect(normalizeName('Daniel\r PRZEDBORSKI').display).toBe('Daniel Przedborski');
 								  });
 								  it('title-cases ALL-CAPS surnames while keeping given name title-cased', () => {
 								    // From real data: "Jona ANDERSEN" (#232), "Duane SALTSGAVER" (#227),
 								    // "Marcos DALLA PRIA" (#165).
 								    expect(normalizeName('Jona ANDERSEN').display).toBe('Jona Andersen');
 								    expect(normalizeName('Duane SALTSGAVER').display).toBe('Duane Saltsgaver');
 								    // Particle 'dalla' stays lowercase mid-name.
 								    expect(normalizeName('Marcos DALLA PRIA').display).toBe('Marcos dalla Pria');
 								  });
 								  it('title-cases lowercased entries', () => {
 								    // From real data: "antony amaral" (#665), "david rosenbloom" (#239),
 								    // "john Tickner" (#247).
 								    expect(normalizeName('antony amaral').display).toBe('Antony Amaral');
 								    expect(normalizeName('david rosenbloom').display).toBe('David Rosenbloom');
 								    expect(normalizeName('john Tickner').display).toBe('John Tickner');
 								  });
 								  it('keeps Romance and Germanic particles lowercase mid-name', () => {
 								    // From real data: "Olav van Velsen" (#526), "Bruno Joyerot" (#18),
 								    // "OLIVIER DAIN" (#677). Also synthetic "Carla de la Cruz".
 								    expect(normalizeName('Olav van Velsen').display).toBe('Olav van Velsen');
 								    expect(normalizeName('Carla de la Cruz').display).toBe('Carla de la Cruz');
 								    expect(normalizeName('OLIVIER DAIN').display).toBe('Olivier Dain');
 								  });
 								  it('preserves O‘-prefixed Irish surnames as title-case', () => {
 								    expect(normalizeName("liam o'brien").display).toBe("Liam O'Brien");
 								  });
 								  it('keeps the slash-with-company structure intact', () => {
 								    // From real data: "Daniel Wainstein / 7 Knots, LLC" (#637),
 								    // "Bruno Joyerot / SAS TIKI" (#18).
 								    expect(normalizeName('Daniel Wainstein / 7 Knots, LLC').display).toBe(
 								      'Daniel Wainstein / 7 Knots, LLC',
 								    );
 								    expect(normalizeName('Bruno Joyerot / SAS TIKI').display).toBe('Bruno Joyerot / SAS TIKI');
 								  });
 								  it('exposes the last non-particle token as surnameToken (lowercase) for blocking', () => {
 								    expect(normalizeName('Marcus Laurent').surnameToken).toBe('laurent');
 								    expect(normalizeName('Olav van Velsen').surnameToken).toBe('velsen');
 								    expect(normalizeName('Carla de la Cruz').surnameToken).toBe('cruz');
 								    expect(normalizeName("Liam O'Brien").surnameToken).toBe("o'brien");
 								  });
 								  it('handles single-token names — surnameToken is the only token', () => {
 								    expect(normalizeName('Madonna').surnameToken).toBe('madonna');
 								  });
 								  it('produces a normalized form that is always lowercase', () => {
 								    expect(normalizeName('Andrei VAGNANOV').normalized).toBe('andrei vagnanov');
 								    expect(normalizeName('Daniel Wainstein / 7 Knots, LLC').normalized).toBe(
 								      'daniel wainstein / 7 knots, llc',
 								    );
 								  });
 								});
 								describe('normalizeEmail', () => {
 								  it('returns null for empty / null inputs', () => {
 								    expect(normalizeEmail('')).toBeNull();
 								    expect(normalizeEmail('   ')).toBeNull();
 								  });
 								  it('lowercases and trims', () => {
 								    // From real data: "Arthur@laser-align.com" vs "arthur@laser-align.com" (#183/#686).
 								    expect(normalizeEmail('Arthur@laser-align.com')).toBe('arthur@laser-align.com');
 								    expect(normalizeEmail('  marcus@example.com  ')).toBe('marcus@example.com');
 								  });
 								  it('lowercases capitalized localparts', () => {
 								    // From real data: "Bmalone850@gmail.com" (#489), "Hef355@yahoo.com" (#533),
 								    // "Donclaytonmusic@gmail.com" (#679).
 								    expect(normalizeEmail('Bmalone850@gmail.com')).toBe('bmalone850@gmail.com');
 								    expect(normalizeEmail('Hef355@yahoo.com')).toBe('hef355@yahoo.com');
 								  });
 								  it('preserves plus-aliases — both legitimate and tricks', () => {
 								    // Per design §3.2: "+aliases" are not stripped. Compare by full localpart.
 								    expect(normalizeEmail('marcus+sales@example.com')).toBe('marcus+sales@example.com');
 								  });
 								  it('returns null for invalid email shapes', () => {
 								    expect(normalizeEmail('not-an-email')).toBeNull();
 								    expect(normalizeEmail('@example.com')).toBeNull();
 								    expect(normalizeEmail('user@')).toBeNull();
 								    expect(normalizeEmail('user@.com')).toBeNull();
 								  });
 								});
 								describe('normalizePhone', () => {
 								  it('returns null for empty / whitespace / null', () => {
 								    expect(normalizePhone('', 'AI')).toBeNull();
 								    expect(normalizePhone('   ', 'AI')).toBeNull();
 								  });
 								  it('parses a plain E.164 number', () => {
 								    expect(normalizePhone('+15742740548', 'US')).toMatchObject({
 								      e164: '+15742740548',
 								      country: 'US',
 								    });
 								  });
 								  it('strips embedded carriage returns and trailing whitespace', () => {
 								    // From real data: "+1-264-235-8840\r" (#19), "+1-264-772-3272\r" (#20).
 								    const out = normalizePhone('+1-264-235-8840\r', 'AI');
 								    expect(out?.e164).toBe('+12642358840');
 								  });
 								  it('strips dashes, dots, parens, single quotes, spaces in a single pass', () => {
 								    // From real data: "'+1.214.603.4235" (#205), "574-274-0548" (#236),
 								    // "+1-264-235-8840" (#19), "+1 (212) 555-0123" (synthetic).
 								    expect(normalizePhone("'+1.214.603.4235", 'US')?.e164).toBe('+12146034235');
 								    expect(normalizePhone('574-274-0548', 'US')?.e164).toBe('+15742740548');
 								    expect(normalizePhone('+1 (212) 555-0123', 'US')?.e164).toBe('+12125550123');
 								  });
 								  it('converts a leading 00 prefix to + (international dialling)', () => {
 								    // From real data: "00447956657022" (#216), "0033651381036" (#702).
 								    expect(normalizePhone('00447956657022', 'GB')?.e164).toBe('+447956657022');
 								    expect(normalizePhone('0033651381036', 'FR')?.e164).toBe('+33651381036');
 								  });
 								  it('uses defaultCountry when input has no international prefix', () => {
 								    // From real data: "0690699699" (#203, French local), "0651381036" (#701).
 								    expect(normalizePhone('0690699699', 'FR')?.e164).toBe('+33690699699');
 								    expect(normalizePhone('0651381036', 'FR')?.e164).toBe('+33651381036');
 								  });
 								  it('returns null when there is no prefix AND no defaultCountry', () => {
 								    // The migration script flags these for human review.
 								    const out = normalizePhone('5742740548');
 								    expect(out?.e164 ?? null).toBeNull();
 								  });
 								  it('flags placeholder all-zeros numbers and returns null', () => {
 								    // From real data: "+447000000000" (#641, "Milos Vitkovic" — clearly fake).
 								    const out = normalizePhone('+447000000000', 'GB');
 								    expect(out?.flagged).toBe('placeholder');
 								    expect(out?.e164).toBeNull();
 								  });
 								  it('flags multi-number fields and uses the first segment', () => {
 								    // From real data: "0677580750/0690511494" (#209). Other separators: ; ,
 								    const slash = normalizePhone('0677580750/0690511494', 'FR');
 								    expect(slash?.flagged).toBe('multi_number');
 								    expect(slash?.e164).toBe('+33677580750');
 								    const semi = normalizePhone('+33611111111;+33622222222', 'FR');
 								    expect(semi?.flagged).toBe('multi_number');
 								    expect(semi?.e164).toBe('+33611111111');
 								  });
 								  it('flags genuinely unparseable input as `unparseable`', () => {
 								    const out = normalizePhone('xyz-not-a-phone', 'US');
 								    expect(out?.flagged).toBe('unparseable');
 								    expect(out?.e164).toBeNull();
 								  });
 								  it('strips an apostrophe-prefix without breaking the parse', () => {
 								    // From real data: leading "'" copy-pasted from spreadsheets escapes
 								    // numeric-cell coercion. Should be invisible to dedup.
 								    expect(normalizePhone("'0690699699", 'FR')?.e164).toBe('+33690699699');
 								  });
 								  it('returns the country alongside the E.164 form', () => {
 								    expect(normalizePhone('+33690699699', 'FR')).toMatchObject({
 								      e164: '+33690699699',
 								      country: 'FR',
 								    });
 								  });
 								});
 								describe('resolveCountry', () => {
 								  it('returns null for empty / nullish input', () => {
 								    expect(resolveCountry('')).toEqual({ iso: null, confidence: null });
 								    expect(resolveCountry('   ')).toEqual({ iso: null, confidence: null });
 								  });
 								  it('exact-matches a canonical English country name', () => {
 								    expect(resolveCountry('Anguilla')).toEqual({ iso: 'AI', confidence: 'exact' });
 								    expect(resolveCountry('United Kingdom')).toEqual({ iso: 'GB', confidence: 'exact' });
 								    expect(resolveCountry('United States')).toEqual({ iso: 'US', confidence: 'exact' });
 								  });
 								  it('matches case-insensitively', () => {
 								    expect(resolveCountry('anguilla').iso).toBe('AI');
 								    expect(resolveCountry('UNITED KINGDOM').iso).toBe('GB');
 								  });
 								  it('matches values with surrounding whitespace', () => {
 								    expect(resolveCountry('  United States  ').iso).toBe('US');
 								  });
 								  it('handles diacritic variants of Saint-Barthélemy', () => {
 								    // From real data: "Saint barthelemy" (#203), "St Barth" (#208), "Saint-Barthélemy".
 								    expect(resolveCountry('Saint-Barthélemy').iso).toBe('BL');
 								    expect(resolveCountry('Saint Barthelemy').iso).toBe('BL');
 								    expect(resolveCountry('saint barthelemy').iso).toBe('BL');
 								    expect(resolveCountry('St Barth').iso).toBe('BL');
 								  });
 								  it('resolves common abbreviations', () => {
 								    expect(resolveCountry('USA').iso).toBe('US');
 								    expect(resolveCountry('UK').iso).toBe('GB');
 								  });
 								  it('falls back to a city → country mapping for high-frequency cities', () => {
 								    // From real data: "Kansas City" (#198), "Sag Harbor Y" (#239).
 								    expect(resolveCountry('Kansas City').iso).toBe('US');
 								    expect(resolveCountry('Sag Harbor Y').iso).toBe('US');
 								  });
 								  it('marks the confidence tier appropriately', () => {
 								    expect(resolveCountry('Anguilla').confidence).toBe('exact');
 								    expect(resolveCountry('Kansas City').confidence).toBe('city');
 								  });
 								  it('returns null + null for unresolvable values', () => {
 								    // Migration script flags these for human review rather than guessing.
 								    expect(resolveCountry('asdfghjkl xyz')).toEqual({ iso: null, confidence: null });
 								    expect(resolveCountry('Mars')).toEqual({ iso: null, confidence: null });
 								  });
 								});