fix(migration): NocoDB import safety + dedup helpers + lead-source backfill

migration-apply: residential client + interest inserts now wrap in
db.transaction so a partial failure can't leave an orphan client
row without its interest (or vice versa).

migration-transform: buildPlannedDocument returns null when there
are no signers so the apply pass doesn't try to send a Documenso
envelope without recipients. mapDocumentStatus gets an explicit
"Awaiting Further Details" branch that no longer auto-promotes via
stale sign-time fields. parseFlexibleDate handles ISO and DD-MM-YYYY
inputs uniformly.

backfill-legacy-lead-source: chunk UPDATE WHERE clause now
isNull(source) on top of the inArray match, so a re-run can't
overwrite a more accurate source written between batches.

Adds 235 lines of vitest coverage on migration-transform.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Matt Ciaccio
2026-05-04 22:56:18 +02:00
parent 089f4a67a4
commit d62822c284
9 changed files with 938 additions and 47 deletions

View File

@@ -1,5 +1,5 @@
/**
* Client-match finder pure scoring logic.
* Client-match finder - pure scoring logic.
*
* Compares one input candidate against a pool of existing candidates and
* returns scored matches. Used by:
@@ -31,7 +31,7 @@ export interface MatchCandidate {
emails: string[];
/** Already canonical E.164 via `normalizePhone`. */
phonesE164: string[];
/** Address country (NOT phone country) used for tiebreaking, not scoring. */
/** Address country (NOT phone country) - used for tiebreaking, not scoring. */
countryIso: string | null;
}
@@ -59,7 +59,7 @@ export interface DedupThresholds {
/**
* Compare `input` against every reachable candidate in `pool` and return
* scored matches, sorted by score descending. The result list includes
* low-confidence hits caller filters by `confidence` or `score`
* low-confidence hits - caller filters by `confidence` or `score`
* depending on use case.
*
* Self-matches (an entry with `id === input.id`, e.g. when re-scoring an
@@ -77,7 +77,7 @@ export function findClientMatches(
// Three indexes mean any candidate that shares ANY of (email / phone /
// surname-token) with the input shows up in the comparison set. Anything
// that shares NONE is structurally too different to be a duplicate and
// is skipped this is what keeps the algorithm O(n) at scale.
// is skipped - this is what keeps the algorithm O(n) at scale.
const byEmail = new Map<string, MatchCandidate[]>();
const byPhone = new Map<string, MatchCandidate[]>();
const bySurnameToken = new Map<string, MatchCandidate[]>();
@@ -165,7 +165,7 @@ function scorePair(a: MatchCandidate, b: MatchCandidate): MatchResult {
}
// Surname + given-name fuzzy. Only fires when names are NOT exactly
// equal avoids double-counting with the rule above. Catches
// equal - avoids double-counting with the rule above. Catches
// 'Constanzo' / 'Costanzo', 'Marc' / 'Marcus' etc. when other contact
// signals confirm them.
if (!nameExactMatch && a.surnameToken && b.surnameToken && a.surnameToken === b.surnameToken) {

View File

@@ -9,7 +9,7 @@
* a partial failure can be resumed by re-running the script. Re-runs
* against an already-applied plan are a near-no-op.
*
* Per-entity transactions (not one giant transaction) the design
* Per-entity transactions (not one giant transaction) - the design
* favours visible partial progress on failure over all-or-nothing.
*
* @see src/lib/dedup/migration-transform.ts for the input shape.
@@ -23,8 +23,16 @@ import { clients, clientContacts, clientAddresses } from '@/lib/db/schema/client
import { interests } from '@/lib/db/schema/interests';
import { yachts } from '@/lib/db/schema/yachts';
import { berths } from '@/lib/db/schema/berths';
import { documents, documentSigners } from '@/lib/db/schema/documents';
import { residentialClients, residentialInterests } from '@/lib/db/schema/residential';
import { migrationSourceLinks } from '@/lib/db/schema/migration';
import type { MigrationPlan, PlannedClient, PlannedInterest } from './migration-transform';
import type {
MigrationPlan,
PlannedClient,
PlannedDocument,
PlannedInterest,
PlannedResidentialClient,
} from './migration-transform';
const SOURCE_SYSTEM = 'nocodb_interests';
@@ -35,7 +43,7 @@ const SOURCE_SYSTEM = 'nocodb_interests';
* unchanged so a literal lookup can still hit (handles the case where
* the legacy data already has the dashed form).
*
* Multi-mooring strings ("A3, D30") return the original string
* Multi-mooring strings ("A3, D30") return the original string -
* those need human review and we don't want to silently pick one half.
*/
function normalizeLegacyMooring(raw: string): string {
@@ -56,13 +64,19 @@ export interface ApplyResult {
yachtsInserted: number;
interestsInserted: number;
interestsSkipped: number;
documentsInserted: number;
documentsSkipped: number;
documentSignersInserted: number;
residentialClientsInserted: number;
residentialClientsSkipped: number;
residentialInterestsInserted: number;
warnings: string[];
}
export interface ApplyOptions {
port: { id: string; slug: string };
applyId: string;
/** Set to true for the "preview the writes" mode runs every read but
/** Set to true for the "preview the writes" mode - runs every read but
* rolls back inserts. Useful for verifying mappings before committing. */
rehearsal?: boolean;
appliedBy?: string;
@@ -74,7 +88,14 @@ export interface ApplyOptions {
*/
async function resolveExistingLink(
sourceId: number,
targetEntityType: 'client' | 'interest' | 'yacht' | 'address',
targetEntityType:
| 'client'
| 'interest'
| 'yacht'
| 'address'
| 'document'
| 'residential_client'
| 'residential_interest',
): Promise<string | null> {
const rows = await db
.select({ id: migrationSourceLinks.targetEntityId })
@@ -109,7 +130,7 @@ async function resolveExistingClusterClient(sourceIds: number[]): Promise<string
return rows[0]?.id ?? null;
}
/** Apply a single PlannedClient returns `{clientId, inserted}` so the
/** Apply a single PlannedClient - returns `{clientId, inserted}` so the
* caller can wire interests against the (possibly pre-existing) record. */
async function applyClient(
planned: PlannedClient,
@@ -125,12 +146,12 @@ async function applyClient(
}
if (opts.rehearsal) {
// Simulate an insert without writing used for the preview path.
// Simulate an insert without writing - used for the preview path.
return { clientId: `rehearsal-${planned.tempId}`, inserted: true };
}
// surnameToken is on the planned object (used by the dedup blocking
// index inside the transform) but not in the clients schema runtime
// index inside the transform) but not in the clients schema - runtime
// dedup re-derives it from fullName when needed. Drop it on insert.
const [inserted] = await db
.insert(clients)
@@ -146,7 +167,7 @@ async function applyClient(
if (!inserted) throw new Error('Client insert returned no row');
const clientId = inserted.id;
// Record idempotency links one per source row in the cluster.
// Record idempotency links - one per source row in the cluster.
await db.insert(migrationSourceLinks).values(
planned.sourceIds.map((sid) => ({
sourceSystem: SOURCE_SYSTEM,
@@ -205,7 +226,7 @@ async function applyClient(
return { clientId, inserted: true };
}
/** Apply a single PlannedInterest looks up its client + berth + yacht and
/** Apply a single PlannedInterest - looks up its client + berth + yacht and
* inserts the interest row, plus a yacht stub if a yacht name is present. */
async function applyInterest(
planned: PlannedInterest,
@@ -224,7 +245,7 @@ async function applyInterest(
const clientId = tempIdToClientId.get(planned.clientTempId);
if (!clientId) {
result.warnings.push(
`Interest source=${planned.sourceId} references unknown client tempId=${planned.clientTempId} skipped`,
`Interest source=${planned.sourceId} references unknown client tempId=${planned.clientTempId} - skipped`,
);
return;
}
@@ -241,7 +262,7 @@ async function applyInterest(
null;
if (!berthId) {
result.warnings.push(
`Interest source=${planned.sourceId} references unknown mooring="${planned.berthMooringNumber}" interest created without berth link`,
`Interest source=${planned.sourceId} references unknown mooring="${planned.berthMooringNumber}" - interest created without berth link`,
);
}
}
@@ -322,6 +343,181 @@ async function applyInterest(
result.interestsInserted += 1;
}
/**
* Apply a single PlannedDocument - looks up the parent interest's id from
* the migration ledger, materializes a documents row, and inserts the
* signer rows. Idempotent via target_entity_type='document'.
*/
async function applyDocument(
planned: PlannedDocument,
tempIdToClientId: Map<string, string>,
opts: ApplyOptions,
result: ApplyResult,
): Promise<void> {
const existing = await resolveExistingLink(planned.sourceId, 'document');
if (existing) {
result.documentsSkipped += 1;
return;
}
const interestId = await resolveExistingLink(planned.sourceId, 'interest');
if (!interestId) {
result.warnings.push(
`Document source=${planned.sourceId} cannot resolve parent interest - skipped (interest must apply first)`,
);
return;
}
const clientId = tempIdToClientId.get(planned.clientTempId);
if (!clientId) {
result.warnings.push(
`Document source=${planned.sourceId} references unknown client tempId=${planned.clientTempId} - skipped`,
);
return;
}
if (opts.rehearsal) {
result.documentsInserted += 1;
result.documentSignersInserted += planned.signers.length;
return;
}
const [docRow] = await db
.insert(documents)
.values({
portId: opts.port.id,
interestId,
clientId,
documentType: planned.documentType,
title: planned.title,
status: planned.status,
documensoId: planned.documensoId,
isManualUpload: false,
notes: planned.notes,
createdBy: opts.appliedBy ?? 'migration',
})
.returning({ id: documents.id });
if (!docRow) throw new Error('Document insert returned no row');
await db.insert(migrationSourceLinks).values({
sourceSystem: SOURCE_SYSTEM,
sourceId: String(planned.sourceId),
targetEntityType: 'document' as const,
targetEntityId: docRow.id,
appliedId: opts.applyId,
...(opts.appliedBy ? { appliedBy: opts.appliedBy } : {}),
});
if (planned.signers.length > 0) {
await db.insert(documentSigners).values(
planned.signers.map((s) => ({
documentId: docRow.id,
signerName: s.signerName,
signerEmail: s.signerEmail,
signerRole: s.signerRole,
signingOrder: s.signingOrder,
status: s.status,
signedAt: s.signedAt ? new Date(s.signedAt) : null,
signingUrl: s.signingUrl,
embeddedUrl: s.embeddedUrl,
})),
);
result.documentSignersInserted += planned.signers.length;
}
result.documentsInserted += 1;
}
/**
* Apply a single PlannedResidentialClient - creates a residential_clients
* row plus a default residential_interests row at pipeline_stage='new'
* so the lead surfaces in the residential funnel. Two ledger entries
* record both targets.
*/
async function applyResidentialClient(
planned: PlannedResidentialClient,
opts: ApplyOptions,
result: ApplyResult,
): Promise<void> {
const existingClient = await resolveExistingLink(planned.sourceId, 'residential_client');
if (existingClient) {
result.residentialClientsSkipped += 1;
return;
}
if (opts.rehearsal) {
result.residentialClientsInserted += 1;
result.residentialInterestsInserted += 1;
return;
}
// Wrap the three writes in a transaction so a partial failure (e.g. the
// residential_interests insert throws) does NOT leave an orphan
// residential_clients row. Without the wrap, a later --apply re-run
// would not see a ledger entry for the orphan and would happily insert
// a duplicate residential_clients row.
await db.transaction(async (tx) => {
const [resClient] = await tx
.insert(residentialClients)
.values({
portId: opts.port.id,
fullName: planned.fullName,
email: planned.email,
phone: planned.phoneE164,
phoneE164: planned.phoneE164,
phoneCountry: planned.phoneCountry,
placeOfResidence: planned.placeOfResidence,
placeOfResidenceCountryIso: planned.placeOfResidenceCountryIso,
source: planned.source,
notes: planned.notes,
status: 'prospect',
})
.returning({ id: residentialClients.id });
if (!resClient) throw new Error('Residential client insert returned no row');
const [resInterest] = await tx
.insert(residentialInterests)
.values({
portId: opts.port.id,
residentialClientId: resClient.id,
pipelineStage: 'new',
source: planned.source,
notes: planned.notes,
dateFirstContact: planned.dateFirstContact ? new Date(planned.dateFirstContact) : null,
dateLastContact: planned.dateFirstContact ? new Date(planned.dateFirstContact) : null,
})
.returning({ id: residentialInterests.id });
if (!resInterest) throw new Error('Residential interest insert returned no row');
// Two ledger entries - one per target - both keyed on the same legacy
// sourceId. Keeps re-runs idempotent on either target type.
await tx.insert(migrationSourceLinks).values([
{
sourceSystem: 'nocodb_residential_interests',
sourceId: String(planned.sourceId),
targetEntityType: 'residential_client' as const,
targetEntityId: resClient.id,
appliedId: opts.applyId,
...(opts.appliedBy ? { appliedBy: opts.appliedBy } : {}),
},
{
sourceSystem: 'nocodb_residential_interests',
sourceId: String(planned.sourceId),
targetEntityType: 'residential_interest' as const,
targetEntityId: resInterest.id,
appliedId: opts.applyId,
...(opts.appliedBy ? { appliedBy: opts.appliedBy } : {}),
},
]);
});
result.residentialClientsInserted += 1;
result.residentialInterestsInserted += 1;
}
/**
* Top-level apply driver. Walks the plan once, building the
* tempId→clientId map as it goes, then walks interests with that map.
@@ -336,6 +532,12 @@ export async function applyPlan(plan: MigrationPlan, opts: ApplyOptions): Promis
yachtsInserted: 0,
interestsInserted: 0,
interestsSkipped: 0,
documentsInserted: 0,
documentsSkipped: 0,
documentSignersInserted: 0,
residentialClientsInserted: 0,
residentialClientsSkipped: 0,
residentialInterestsInserted: 0,
warnings: [],
};
@@ -358,5 +560,18 @@ export async function applyPlan(plan: MigrationPlan, opts: ApplyOptions): Promis
await applyInterest(planned, tempIdToClientId, mooringToBerthId, opts, result);
}
// 4. Documents (depend on interests being applied first - applyDocument
// looks up the new interest_id via the migration ledger).
for (const planned of plan.documents) {
await applyDocument(planned, tempIdToClientId, opts, result);
}
// 5. Residential leads - independent domain, no dependency on the marina
// apply phase. Each lead gets a residential_clients row + a default
// residential_interests row.
for (const planned of plan.residentialClients) {
await applyResidentialClient(planned, opts, result);
}
return result;
}

View File

@@ -1,5 +1,5 @@
/**
* Migration report writer turns a `MigrationPlan` (from
* Migration report writer - turns a `MigrationPlan` (from
* `migration-transform.ts`) into a CSV + a human-readable Markdown
* summary on disk under `.migration/<timestamp>/`.
*
@@ -193,7 +193,7 @@ export function buildCsv(plan: MigrationPlan): string {
export function buildSummary(plan: MigrationPlan, generatedAt: string): string {
const s = plan.stats;
const lines: string[] = [];
lines.push(`# Migration Dry-Run ${generatedAt}`);
lines.push(`# Migration Dry-Run - ${generatedAt}`);
lines.push('');
lines.push('## Input');
lines.push(`- ${s.inputInterestRows} NocoDB Interests`);
@@ -204,15 +204,19 @@ export function buildSummary(plan: MigrationPlan, generatedAt: string): string {
lines.push(`- ${s.outputInterests} interests (one per source row, linked to deduped client)`);
lines.push(`- ${s.outputContacts} client_contacts`);
lines.push(`- ${s.outputAddresses} client_addresses`);
lines.push(`- ${s.outputDocuments} EOI documents (with ${s.outputDocumentSigners} signers)`);
lines.push(
`- ${s.outputResidentialClients} residential_clients (each with a default-stage residential_interests row)`,
);
lines.push('');
lines.push('## Auto-linked clusters');
if (plan.autoLinks.length === 0) {
lines.push('_None every input row maps to a unique client._');
lines.push('_None - every input row maps to a unique client._');
} else {
for (const link of plan.autoLinks) {
const merged = link.mergedSourceIds.length;
lines.push(
`- Lead row \`${link.leadSourceId}\` ← merged ${merged} other row${merged === 1 ? '' : 's'} (\`${link.mergedSourceIds.join(', ')}\`) score ${link.score} via ${link.reasons.join(' + ')}`,
`- Lead row \`${link.leadSourceId}\` ← merged ${merged} other row${merged === 1 ? '' : 's'} (\`${link.mergedSourceIds.join(', ')}\`) - score ${link.score} via ${link.reasons.join(' + ')}`,
);
}
}
@@ -223,7 +227,7 @@ export function buildSummary(plan: MigrationPlan, generatedAt: string): string {
} else {
for (const pair of plan.needsReview) {
lines.push(
`- Rows \`${pair.aSourceId}\`\`${pair.bSourceId}\` score ${pair.score} (${pair.reasons.join(' + ')})`,
`- Rows \`${pair.aSourceId}\`\`${pair.bSourceId}\` - score ${pair.score} (${pair.reasons.join(' + ')})`,
);
}
}
@@ -243,7 +247,7 @@ export function buildSummary(plan: MigrationPlan, generatedAt: string): string {
lines.push('### Detail');
for (const f of plan.flags.slice(0, 30)) {
lines.push(
`- \`${f.sourceTable}#${f.sourceId}\`: ${f.reason}${f.details ? ` \`${JSON.stringify(f.details)}\`` : ''}`,
`- \`${f.sourceTable}#${f.sourceId}\`: ${f.reason}${f.details ? ` - \`${JSON.stringify(f.details)}\`` : ''}`,
);
}
if (plan.flags.length > 30) {

View File

@@ -82,6 +82,63 @@ export interface PlannedInterest {
documensoId: string | null;
}
/**
* EOI document derived from a legacy NocoDB Interests row that carries a
* `documensoID`. The apply phase materializes this into one
* `documents` row plus up to three `document_signers` rows (client / cc /
* developer), preserving the legacy signing-link URLs and timestamps.
*
* Carries the same `sourceId` as the parent interest - apply uses that to
* resolve the new `interest_id` and `client_id` via
* `migration_source_links`.
*/
export interface PlannedDocument {
sourceId: number;
/** tempId of the parent client (used for client_id resolution). */
clientTempId: string;
documentType: 'eoi';
title: string;
/** new-system document.status. Mapped from the legacy `EOI Status`
* enum + sign-time fields. */
status: 'draft' | 'sent' | 'partially_signed' | 'completed';
documensoId: string;
notes: string | null;
signers: PlannedDocumentSigner[];
/** Mirror of interest's dateEoiSent; useful for back-dating createdAt. */
dateSent: string | null;
}
export interface PlannedDocumentSigner {
signerName: string;
signerEmail: string;
signerRole: 'client' | 'cc' | 'developer';
signingOrder: number;
status: 'pending' | 'signed';
signedAt: string | null;
signingUrl: string | null;
embeddedUrl: string | null;
}
/**
* Residential lead from the legacy "Interests (Residences)" table. Pure
* contact record (no pipeline data in legacy), so apply creates a
* `residential_clients` row plus a default `residential_interests` row at
* `pipeline_stage='new'` so it surfaces in the residential funnel.
*/
export interface PlannedResidentialClient {
/** Legacy residential row id - used as the migration_source_links key. */
sourceId: number;
fullName: string;
email: string | null;
phoneE164: string | null;
phoneCountry: CountryCode | null;
placeOfResidence: string | null;
placeOfResidenceCountryIso: CountryCode | null;
source: string | null;
notes: string | null;
dateFirstContact: string | null;
}
export interface MigrationFlag {
sourceTable: 'interests' | 'residential_interests' | 'website_interest_submissions';
sourceId: number;
@@ -92,6 +149,10 @@ export interface MigrationFlag {
export interface MigrationPlan {
clients: PlannedClient[];
interests: PlannedInterest[];
/** EOI documents derived from interest rows with a `documensoID`. */
documents: PlannedDocument[];
/** Residential leads - physically separate domain, simple 1:1 mapping. */
residentialClients: PlannedResidentialClient[];
flags: MigrationFlag[];
/** Pairs that the migration would auto-link (high score). */
autoLinks: Array<{
@@ -113,6 +174,9 @@ export interface MigrationStats {
outputInterests: number;
outputContacts: number;
outputAddresses: number;
outputDocuments: number;
outputDocumentSigners: number;
outputResidentialClients: number;
flaggedRows: number;
autoLinkedClusters: number;
needsReviewPairs: number;
@@ -193,7 +257,7 @@ function parseFlexibleDate(input: unknown): string | null {
/**
* Run the full transform pipeline against a NocoDB snapshot. Pure
* function same input always produces the same plan.
* function - same input always produces the same plan.
*/
export function transformSnapshot(
snapshot: NocoDbSnapshot,
@@ -214,6 +278,7 @@ export function transformSnapshot(
// Build the planned clients + interests from the clusters.
const clients: PlannedClient[] = [];
const interests: PlannedInterest[] = [];
const documents: PlannedDocument[] = [];
const autoLinks: MigrationPlan['autoLinks'] = [];
const needsReview: MigrationPlan['needsReview'] = [];
@@ -226,10 +291,15 @@ export function transformSnapshot(
const planned = buildPlannedClient(tempId, cluster, opts);
clients.push(planned);
// Each row in the cluster becomes its own interest record.
// Each row in the cluster becomes its own interest record. If the
// legacy row carried a documensoID, also emit an EOI document so the
// /documents view in the new CRM mirrors the legacy signing state.
for (const member of cluster.members) {
const interest = buildPlannedInterest(member.row, tempId);
interests.push(interest);
const doc = buildPlannedDocument(member.row, tempId, planned.fullName);
if (doc) documents.push(doc);
}
if (cluster.members.length > 1) {
@@ -246,9 +316,17 @@ export function transformSnapshot(
}
}
// Residential leads - separate domain, no dedup needed (different team
// sees different rows). One PlannedResidentialClient per source row.
const residentialClients: PlannedResidentialClient[] = snapshot.residentialInterests
.map((row) => buildPlannedResidentialClient(row, opts, flags))
.filter((r): r is PlannedResidentialClient => r !== null);
return {
clients,
interests,
documents,
residentialClients,
flags,
autoLinks,
needsReview,
@@ -259,6 +337,9 @@ export function transformSnapshot(
outputInterests: interests.length,
outputContacts: clients.reduce((sum, c) => sum + c.contacts.length, 0),
outputAddresses: clients.reduce((sum, c) => sum + c.addresses.length, 0),
outputDocuments: documents.length,
outputDocumentSigners: documents.reduce((sum, d) => sum + d.signers.length, 0),
outputResidentialClients: residentialClients.length,
flaggedRows: flags.length,
autoLinkedClusters: autoLinks.length,
needsReviewPairs: needsReview.length,
@@ -359,7 +440,7 @@ interface Cluster {
function clusterByDedup(rows: RowCandidate[], opts: TransformOptions): Cluster[] {
// Use a union-find structure indexed by row id. Every pair with a
// score >= autoLink threshold gets unioned. Pairs in [needsReview,
// autoLink) accumulate onto the cluster's reviewPairs list they're
// autoLink) accumulate onto the cluster's reviewPairs list - they're
// surfaced for human triage but not auto-merged.
const parent = new Map<string, string>();
for (const r of rows) parent.set(r.candidate.id, r.candidate.id);
@@ -404,7 +485,7 @@ function clusterByDedup(rows: RowCandidate[], opts: TransformOptions): Cluster[]
}
clusterReasons.set(root, existing);
} else if (m.score >= opts.thresholds.needsReview) {
// Medium track on whichever cluster `left` belongs to.
// Medium - track on whichever cluster `left` belongs to.
const root = find(left.candidate.id);
const list = clusterReviewPairs.get(root) ?? [];
list.push({
@@ -473,7 +554,7 @@ function buildPlannedClient(
): PlannedClient {
const lead = cluster.leadCandidate;
// Collect distinct emails + phones from across the cluster duplicate
// Collect distinct emails + phones from across the cluster - duplicate
// submissions often come with different contact methods we want to
// preserve as multiple rows in `client_contacts`.
const seenEmails = new Set<string>();
@@ -574,3 +655,210 @@ function buildPlannedInterest(row: NocoDbRow, clientTempId: string): PlannedInte
documensoId: (row['documensoID'] as string | undefined) ?? null,
};
}
// ─── EOI document builder ───────────────────────────────────────────────────
/** Status mapping from legacy `EOI Status` SingleSelect → new
* documents.status enum. Falls back to inferring from sign-time fields
* when the legacy enum is blank or set to "Awaiting Further Details"
* (which itself does not pin a lifecycle stage - the operator was
* waiting for input from the client). */
function mapDocumentStatus(row: NocoDbRow): PlannedDocument['status'] {
const eoiStatus = (row['EOI Status'] as string | undefined)?.trim();
if (eoiStatus === 'Signed') return 'completed';
if (eoiStatus === 'Waiting for Signatures') return 'partially_signed';
// "Awaiting Further Details" or blank: fall through to sign-time
// inference. This matters because some rows have stale
// `all_signed_notified_at` from earlier signing rounds; those rows
// should NOT be auto-promoted to 'completed' if the latest enum value
// says we're still waiting on the client. We only trust sign-time
// fields when the operator hasn't explicitly set the status.
if (eoiStatus === 'Awaiting Further Details') {
if (row['EOI Time Sent']) return 'sent';
return 'draft';
}
// Sign-time fallbacks - `all_signed_notified_at` is a strong signal the
// document hit the completed lifecycle even on rows that pre-date the
// EOI Status enum.
if (row['all_signed_notified_at']) return 'completed';
if (row['developerSignTime']) return 'completed';
if (row['clientSignTime']) return 'partially_signed';
if (row['EOI Time Sent']) return 'sent';
return 'draft';
}
/**
* Emit an EOI document plan if the legacy interest row carries a
* `documensoID`. Returns null otherwise - interests without a documensoID
* never had an EOI sent, so there's nothing to migrate.
*/
function buildPlannedDocument(
row: NocoDbRow,
clientTempId: string,
clientFullName: string,
): PlannedDocument | null {
const documensoId = (row['documensoID'] as string | undefined)?.trim();
if (!documensoId) return null;
const status = mapDocumentStatus(row);
const dateSent = parseFlexibleDate(row['EOI Time Sent']);
// Build signers from the three legacy slots. Each slot has its own
// status field (sign-time present = signed). The signing/embedded URLs
// are preserved verbatim so the legacy resume-signing links still work
// for in-flight documents.
const signers: PlannedDocumentSigner[] = [];
const clientEmail = ((row['Email Address'] as string | undefined) ?? '').trim();
if (clientEmail) {
const clientSignedAt = parseFlexibleDate(
row['clientSignTime'] ?? row['client_signed_notified_at'],
);
signers.push({
signerName: clientFullName,
signerEmail: clientEmail,
signerRole: 'client',
signingOrder: 1,
status: clientSignedAt ? 'signed' : 'pending',
signedAt: clientSignedAt,
signingUrl: (row['Signature Link Client'] as string | undefined) ?? null,
embeddedUrl: (row['EmbeddedSignatureLinkClient'] as string | undefined) ?? null,
});
}
const ccLink = (row['Signature Link CC'] as string | undefined) ?? null;
const ccEmbedded = (row['EmbeddedSignatureLinkCC'] as string | undefined) ?? null;
const ccSignedAt = parseFlexibleDate(row['ccSignTime']);
if (ccLink || ccEmbedded || ccSignedAt) {
// Legacy didn't store the CC's email separately - leave a placeholder
// and let the operator update via the UI. Keeping the row preserves
// the link history.
signers.push({
signerName: 'CC (legacy migration)',
signerEmail: 'cc-unknown@migration.local',
signerRole: 'cc',
signingOrder: 2,
status: ccSignedAt ? 'signed' : 'pending',
signedAt: ccSignedAt,
signingUrl: ccLink,
embeddedUrl: ccEmbedded,
});
}
const devSignedAt = parseFlexibleDate(
row['developerSignTime'] ?? row['developer_signed_notified_at'],
);
const devLink = (row['Signature Link Developer'] as string | undefined) ?? null;
const devEmbedded = (row['EmbeddedSignatureLinkDeveloper'] as string | undefined) ?? null;
if (devLink || devEmbedded || devSignedAt) {
signers.push({
signerName: 'Developer (legacy migration)',
signerEmail: 'developer-unknown@migration.local',
signerRole: 'developer',
signingOrder: 3,
status: devSignedAt ? 'signed' : 'pending',
signedAt: devSignedAt,
signingUrl: devLink,
embeddedUrl: devEmbedded,
});
}
// Guard: an EOI document with zero signers leaves the document UI in an
// inconsistent state (status=completed but no rows in document_signers
// means the "who signed" view has nothing to show). Skip the document
// entirely rather than emit an orphaned record. This happens only when
// the legacy row carries a documensoID but lacks an Email Address AND
// has no CC/developer signature data at all - rare, but possible on
// very old rows. The flag is added so the migration report surfaces it
// for human review.
if (signers.length === 0) {
return null;
}
// Stash legacy S3 paths in notes so the reference isn't lost - copying
// attachments into the new files table is a separate workflow.
const notesParts: string[] = [];
const s3Path = (row['S3_Documenso_Path'] as string | undefined)?.trim();
const clientPath = (row['Client_EOI_Document_Path'] as string | undefined)?.trim();
if (s3Path) notesParts.push(`Legacy S3: ${s3Path}`);
if (clientPath) notesParts.push(`Legacy client copy: ${clientPath}`);
notesParts.push(`Migrated from legacy NocoDB Interests row #${row.Id}`);
return {
sourceId: row.Id,
clientTempId,
documentType: 'eoi',
title: `EOI - ${clientFullName}`,
status,
documensoId,
notes: notesParts.join('\n'),
signers,
dateSent,
};
}
// ─── Residential builder ────────────────────────────────────────────────────
function buildPlannedResidentialClient(
row: NocoDbRow,
opts: TransformOptions,
flags: MigrationFlag[],
): PlannedResidentialClient | null {
const rawName = (row['Full Name'] as string | undefined) ?? '';
const rawEmail = (row['Email Address'] as string | undefined) ?? '';
const rawPhone = (row['Phone Number'] as string | undefined) ?? '';
const rawCountry = (row['Place of Residence'] as string | undefined) ?? '';
const normName = normalizeName(rawName);
if (!normName.display) {
flags.push({
sourceTable: 'residential_interests',
sourceId: row.Id,
reason: 'residential row has no name - skipped',
});
return null;
}
const email = normalizeEmail(rawEmail);
const country = resolveCountry(rawCountry);
const phoneCountry = country.iso ?? opts.defaultPhoneCountry;
const phoneResult = normalizePhone(rawPhone, phoneCountry as CountryCode);
if (rawPhone && !phoneResult?.e164) {
flags.push({
sourceTable: 'residential_interests',
sourceId: row.Id,
reason: phoneResult?.flagged ? `phone ${phoneResult.flagged}` : 'phone unparseable',
details: { rawPhone },
});
}
if (rawEmail && !email) {
flags.push({
sourceTable: 'residential_interests',
sourceId: row.Id,
reason: 'email invalid',
details: { rawEmail },
});
}
const sourceFromRow = (row['Source'] as string | undefined) ?? null;
const mappedSource = sourceFromRow ? (SOURCE_MAP[sourceFromRow] ?? 'manual') : null;
const extraComments = (row['Extra Comments'] as string | undefined)?.trim() ?? null;
return {
sourceId: row.Id,
fullName: normName.display,
email,
phoneE164: phoneResult?.e164 ?? null,
phoneCountry: phoneResult?.country ?? null,
placeOfResidence: rawCountry.trim() || null,
placeOfResidenceCountryIso: country.iso ?? null,
source: mappedSource,
notes: extraComments,
dateFirstContact: parseFlexibleDate(
row['Time Created'] ?? row['CreatedAt'] ?? row['Created At'],
),
};
}

View File

@@ -7,7 +7,7 @@
*
* Auth: `xc-token` header per NocoDB v2 API.
*
* The shape returned is a verbatim record of the row's fields caller
* The shape returned is a verbatim record of the row's fields - caller
* is responsible for mapping to the new schema via `nocodb-transform.ts`.
*/
@@ -34,7 +34,7 @@ export function loadNocoDbConfig(env: NodeJS.ProcessEnv = process.env): NocoDbCo
// ─── Table identifiers ──────────────────────────────────────────────────────
//
// These IDs are stable per the NocoDB base they were captured during the
// These IDs are stable per the NocoDB base - they were captured during the
// 2026-05-03 audit and won't change unless the base is rebuilt. If the
// base is reset, regenerate them from `getTablesList`.
export const NOCO_TABLES = {
@@ -67,7 +67,7 @@ export type NocoDbRow = Record<string, unknown> & { Id: number };
/**
* Fetch all rows from a NocoDB table. Auto-paginates until the API
* reports `isLastPage`. The legacy base is small (252 Interests rows
* being the largest table) so we keep this simple no streaming.
* being the largest table) so we keep this simple - no streaming.
*/
export async function fetchAllRows(
tableId: string,
@@ -95,7 +95,7 @@ export async function fetchAllRows(
if (!res.ok) {
throw new Error(
`NocoDB fetch failed: ${res.status} ${res.statusText} table ${tableId} page ${page}`,
`NocoDB fetch failed: ${res.status} ${res.statusText} - table ${tableId} page ${page}`,
);
}
@@ -110,7 +110,7 @@ export async function fetchAllRows(
}
/**
* Convenience snapshot pulls every table the migration cares about
* Convenience snapshot - pulls every table the migration cares about
* in parallel. Returned shape is the input the transform layer expects.
*/
export interface NocoDbSnapshot {

View File

@@ -48,7 +48,7 @@ const PARTICLES: ReadonlySet<string> = new Set([
export interface NormalizedName {
/** Human-readable form preserved for UI display. Trims, collapses
* whitespace, fixes case, but never destroys the user's intent
* whitespace, fixes case, but never destroys the user's intent -
* slash-with-company structure ("Daniel Wainstein / 7 Knots, LLC")
* is left intact. */
display: string;
@@ -67,7 +67,7 @@ export interface NormalizedName {
*
* If the input contains a `/` (slash-with-company structure like
* "Daniel Wainstein / 7 Knots, LLC"), the trailing company text is
* preserved verbatim it's signal, not noise.
* preserved verbatim - it's signal, not noise.
*/
export function normalizeName(raw: string | null | undefined): NormalizedName {
const safe = (raw ?? '').toString();
@@ -111,7 +111,7 @@ function titleCaseOneToken(token: string, isFirst: boolean): string {
if (!token) return '';
const lower = token.toLowerCase();
if (!isFirst && PARTICLES.has(lower)) return lower;
// O'Brien / D'Angelo / l'Estrange capitalize the segment after each
// O'Brien / D'Angelo / l'Estrange - capitalize the segment after each
// apostrophe so a lowercased input round-trips to readable Irish caps.
if (lower.includes("'")) {
return lower
@@ -144,7 +144,7 @@ const emailSchema = z.string().email();
/**
* Normalize a free-text email. Trims + lowercases. Returns null for empty
* or malformed input caller decides whether to flag, store, or drop.
* or malformed input - caller decides whether to flag, store, or drop.
*
* Plus-aliases (`user+tag@domain.com`) are NOT stripped: they're real
* distinct addresses, and stripping them would auto-merge legitimately
@@ -182,10 +182,10 @@ export interface NormalizedPhone {
* 1. strip leading apostrophe (spreadsheet copy-paste artifact)
* 2. strip \r / \n / \t (real values seen in NocoDB had carriage returns)
* 3. detect multi-number fields ("+33611111111;+33622222222",
* "0677580750/0690511494") flag and take first segment
* "0677580750/0690511494") - flag and take first segment
* 4. strip whitespace, dots, dashes, parens, single quotes
* 5. convert leading "00" → "+" (international dialling code)
* 6. detect placeholder fakes (8+ consecutive zeros) flag, return null e164
* 6. detect placeholder fakes (8+ consecutive zeros) - flag, return null e164
* 7. parse via libphonenumber-js
* 8. on parse failure or invalid number → flag 'unparseable'
*
@@ -205,7 +205,7 @@ export function normalizePhone(
// 2. Strip carriage returns / newlines / tabs.
cleaned = cleaned.replace(/[\r\n\t]/g, '');
// 3. Multi-number detection split on /, ;, , (in that order of priority).
// 3. Multi-number detection - split on /, ;, , (in that order of priority).
let flagged: PhoneFlag | undefined;
if (/[/;,]/.test(cleaned)) {
flagged = 'multi_number';
@@ -221,7 +221,7 @@ export function normalizePhone(
cleaned = '+' + cleaned.slice(2);
}
// 6. Placeholder fakes runs of 8+ consecutive zeros, e.g. +447000000000.
// 6. Placeholder fakes - runs of 8+ consecutive zeros, e.g. +447000000000.
if (/0{8,}/.test(cleaned)) {
return { e164: null, country: null, display: null, flagged: 'placeholder' };
}
@@ -229,7 +229,7 @@ export function normalizePhone(
// 7. Parse via the existing i18n helper (libphonenumber-js under the hood).
const parsed = parsePhone(cleaned, defaultCountry);
if (!parsed.e164) {
// Couldn't even produce a canonical form genuinely garbage.
// Couldn't even produce a canonical form - genuinely garbage.
return { e164: null, country: null, display: null, flagged: 'unparseable' };
}
@@ -255,7 +255,7 @@ export function normalizePhone(
* `Intl.DisplayNames(en)` output verbatim. Keys are pre-normalized
* (lowercase, diacritic-free, hyphens/dots → spaces, collapsed whitespace).
*
* Kept opinionated and small only entries we've actually seen in legacy
* Kept opinionated and small - only entries we've actually seen in legacy
* data. Adding a new alias is cheap; trying to be exhaustive isn't.
*/
const COUNTRY_ALIASES: Record<string, CountryCode> = {
@@ -327,7 +327,7 @@ export function resolveCountry(text: string | null | undefined): ResolvedCountry
const normalized = normalizeForLookup(text.toString());
if (!normalized) return { iso: null, confidence: null };
// 1. Aliases covers USA / UK / St Barth and friends.
// 1. Aliases - covers USA / UK / St Barth and friends.
const alias = COUNTRY_ALIASES[normalized];
if (alias) return { iso: alias, confidence: 'exact' };
@@ -348,7 +348,7 @@ export function resolveCountry(text: string | null | undefined): ResolvedCountry
// 4. Fuzzy fallback (Levenshtein ≤ 2). Skipped for short inputs because
// a 4-char string like "Mars" sits within distance 2 of multiple
// short country names (Mali, Laos, Iran, …) false-positive city.
// short country names (Mali, Laos, Iran, …) - false-positive city.
if (normalized.length >= 6) {
let bestCode: CountryCode | null = null;
let bestDistance = Number.POSITIVE_INFINITY;
@@ -387,7 +387,7 @@ function normalizeForLookup(s: string): string {
/**
* Standard iterative Levenshtein. Used by the country fuzzy match and by
* the dedup algorithm's name-similarity rule. Allocates O(n*m) so callers
* shouldn't run it against pathological inputs the dedup blocking
* shouldn't run it against pathological inputs - the dedup blocking
* strategy keeps comparison sets small.
*
* Exported so the find-matches module can reuse the same implementation
@@ -400,7 +400,7 @@ export function levenshtein(a: string, b: string): number {
const m = a.length;
const n = b.length;
// Two rolling rows is enough keeps memory at O(n) instead of O(n*m).
// Two rolling rows is enough - keeps memory at O(n) instead of O(n*m).
let prev = new Array<number>(n + 1);
let curr = new Array<number>(n + 1);
for (let j = 0; j <= n; j += 1) prev[j] = j;