fix(migration): NocoDB import safety + dedup helpers + lead-source backfill

migration-apply: residential client + interest inserts now wrap in
db.transaction so a partial failure can't leave an orphan client
row without its interest (or vice versa).

migration-transform: buildPlannedDocument returns null when there
are no signers so the apply pass doesn't try to send a Documenso
envelope without recipients. mapDocumentStatus gets an explicit
"Awaiting Further Details" branch that no longer auto-promotes via
stale sign-time fields. parseFlexibleDate handles ISO and DD-MM-YYYY
inputs uniformly.

backfill-legacy-lead-source: chunk UPDATE WHERE clause now
isNull(source) on top of the inArray match, so a re-run can't
overwrite a more accurate source written between batches.

Adds 235 lines of vitest coverage on migration-transform.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Matt Ciaccio
2026-05-04 22:56:18 +02:00
parent 089f4a67a4
commit d62822c284
9 changed files with 938 additions and 47 deletions

View File

@@ -82,6 +82,63 @@ export interface PlannedInterest {
documensoId: string | null;
}
/**
* EOI document derived from a legacy NocoDB Interests row that carries a
* `documensoID`. The apply phase materializes this into one
* `documents` row plus up to three `document_signers` rows (client / cc /
* developer), preserving the legacy signing-link URLs and timestamps.
*
* Carries the same `sourceId` as the parent interest - apply uses that to
* resolve the new `interest_id` and `client_id` via
* `migration_source_links`.
*/
export interface PlannedDocument {
sourceId: number;
/** tempId of the parent client (used for client_id resolution). */
clientTempId: string;
documentType: 'eoi';
title: string;
/** new-system document.status. Mapped from the legacy `EOI Status`
* enum + sign-time fields. */
status: 'draft' | 'sent' | 'partially_signed' | 'completed';
documensoId: string;
notes: string | null;
signers: PlannedDocumentSigner[];
/** Mirror of interest's dateEoiSent; useful for back-dating createdAt. */
dateSent: string | null;
}
export interface PlannedDocumentSigner {
signerName: string;
signerEmail: string;
signerRole: 'client' | 'cc' | 'developer';
signingOrder: number;
status: 'pending' | 'signed';
signedAt: string | null;
signingUrl: string | null;
embeddedUrl: string | null;
}
/**
* Residential lead from the legacy "Interests (Residences)" table. Pure
* contact record (no pipeline data in legacy), so apply creates a
* `residential_clients` row plus a default `residential_interests` row at
* `pipeline_stage='new'` so it surfaces in the residential funnel.
*/
export interface PlannedResidentialClient {
/** Legacy residential row id - used as the migration_source_links key. */
sourceId: number;
fullName: string;
email: string | null;
phoneE164: string | null;
phoneCountry: CountryCode | null;
placeOfResidence: string | null;
placeOfResidenceCountryIso: CountryCode | null;
source: string | null;
notes: string | null;
dateFirstContact: string | null;
}
export interface MigrationFlag {
sourceTable: 'interests' | 'residential_interests' | 'website_interest_submissions';
sourceId: number;
@@ -92,6 +149,10 @@ export interface MigrationFlag {
export interface MigrationPlan {
clients: PlannedClient[];
interests: PlannedInterest[];
/** EOI documents derived from interest rows with a `documensoID`. */
documents: PlannedDocument[];
/** Residential leads - physically separate domain, simple 1:1 mapping. */
residentialClients: PlannedResidentialClient[];
flags: MigrationFlag[];
/** Pairs that the migration would auto-link (high score). */
autoLinks: Array<{
@@ -113,6 +174,9 @@ export interface MigrationStats {
outputInterests: number;
outputContacts: number;
outputAddresses: number;
outputDocuments: number;
outputDocumentSigners: number;
outputResidentialClients: number;
flaggedRows: number;
autoLinkedClusters: number;
needsReviewPairs: number;
@@ -193,7 +257,7 @@ function parseFlexibleDate(input: unknown): string | null {
/**
* Run the full transform pipeline against a NocoDB snapshot. Pure
* function same input always produces the same plan.
* function - same input always produces the same plan.
*/
export function transformSnapshot(
snapshot: NocoDbSnapshot,
@@ -214,6 +278,7 @@ export function transformSnapshot(
// Build the planned clients + interests from the clusters.
const clients: PlannedClient[] = [];
const interests: PlannedInterest[] = [];
const documents: PlannedDocument[] = [];
const autoLinks: MigrationPlan['autoLinks'] = [];
const needsReview: MigrationPlan['needsReview'] = [];
@@ -226,10 +291,15 @@ export function transformSnapshot(
const planned = buildPlannedClient(tempId, cluster, opts);
clients.push(planned);
// Each row in the cluster becomes its own interest record.
// Each row in the cluster becomes its own interest record. If the
// legacy row carried a documensoID, also emit an EOI document so the
// /documents view in the new CRM mirrors the legacy signing state.
for (const member of cluster.members) {
const interest = buildPlannedInterest(member.row, tempId);
interests.push(interest);
const doc = buildPlannedDocument(member.row, tempId, planned.fullName);
if (doc) documents.push(doc);
}
if (cluster.members.length > 1) {
@@ -246,9 +316,17 @@ export function transformSnapshot(
}
}
// Residential leads - separate domain, no dedup needed (different team
// sees different rows). One PlannedResidentialClient per source row.
const residentialClients: PlannedResidentialClient[] = snapshot.residentialInterests
.map((row) => buildPlannedResidentialClient(row, opts, flags))
.filter((r): r is PlannedResidentialClient => r !== null);
return {
clients,
interests,
documents,
residentialClients,
flags,
autoLinks,
needsReview,
@@ -259,6 +337,9 @@ export function transformSnapshot(
outputInterests: interests.length,
outputContacts: clients.reduce((sum, c) => sum + c.contacts.length, 0),
outputAddresses: clients.reduce((sum, c) => sum + c.addresses.length, 0),
outputDocuments: documents.length,
outputDocumentSigners: documents.reduce((sum, d) => sum + d.signers.length, 0),
outputResidentialClients: residentialClients.length,
flaggedRows: flags.length,
autoLinkedClusters: autoLinks.length,
needsReviewPairs: needsReview.length,
@@ -359,7 +440,7 @@ interface Cluster {
function clusterByDedup(rows: RowCandidate[], opts: TransformOptions): Cluster[] {
// Use a union-find structure indexed by row id. Every pair with a
// score >= autoLink threshold gets unioned. Pairs in [needsReview,
// autoLink) accumulate onto the cluster's reviewPairs list they're
// autoLink) accumulate onto the cluster's reviewPairs list - they're
// surfaced for human triage but not auto-merged.
const parent = new Map<string, string>();
for (const r of rows) parent.set(r.candidate.id, r.candidate.id);
@@ -404,7 +485,7 @@ function clusterByDedup(rows: RowCandidate[], opts: TransformOptions): Cluster[]
}
clusterReasons.set(root, existing);
} else if (m.score >= opts.thresholds.needsReview) {
// Medium track on whichever cluster `left` belongs to.
// Medium - track on whichever cluster `left` belongs to.
const root = find(left.candidate.id);
const list = clusterReviewPairs.get(root) ?? [];
list.push({
@@ -473,7 +554,7 @@ function buildPlannedClient(
): PlannedClient {
const lead = cluster.leadCandidate;
// Collect distinct emails + phones from across the cluster duplicate
// Collect distinct emails + phones from across the cluster - duplicate
// submissions often come with different contact methods we want to
// preserve as multiple rows in `client_contacts`.
const seenEmails = new Set<string>();
@@ -574,3 +655,210 @@ function buildPlannedInterest(row: NocoDbRow, clientTempId: string): PlannedInte
documensoId: (row['documensoID'] as string | undefined) ?? null,
};
}
// ─── EOI document builder ───────────────────────────────────────────────────
/** Status mapping from legacy `EOI Status` SingleSelect → new
* documents.status enum. Falls back to inferring from sign-time fields
* when the legacy enum is blank or set to "Awaiting Further Details"
* (which itself does not pin a lifecycle stage - the operator was
* waiting for input from the client). */
function mapDocumentStatus(row: NocoDbRow): PlannedDocument['status'] {
const eoiStatus = (row['EOI Status'] as string | undefined)?.trim();
if (eoiStatus === 'Signed') return 'completed';
if (eoiStatus === 'Waiting for Signatures') return 'partially_signed';
// "Awaiting Further Details" or blank: fall through to sign-time
// inference. This matters because some rows have stale
// `all_signed_notified_at` from earlier signing rounds; those rows
// should NOT be auto-promoted to 'completed' if the latest enum value
// says we're still waiting on the client. We only trust sign-time
// fields when the operator hasn't explicitly set the status.
if (eoiStatus === 'Awaiting Further Details') {
if (row['EOI Time Sent']) return 'sent';
return 'draft';
}
// Sign-time fallbacks - `all_signed_notified_at` is a strong signal the
// document hit the completed lifecycle even on rows that pre-date the
// EOI Status enum.
if (row['all_signed_notified_at']) return 'completed';
if (row['developerSignTime']) return 'completed';
if (row['clientSignTime']) return 'partially_signed';
if (row['EOI Time Sent']) return 'sent';
return 'draft';
}
/**
* Emit an EOI document plan if the legacy interest row carries a
* `documensoID`. Returns null otherwise - interests without a documensoID
* never had an EOI sent, so there's nothing to migrate.
*/
function buildPlannedDocument(
row: NocoDbRow,
clientTempId: string,
clientFullName: string,
): PlannedDocument | null {
const documensoId = (row['documensoID'] as string | undefined)?.trim();
if (!documensoId) return null;
const status = mapDocumentStatus(row);
const dateSent = parseFlexibleDate(row['EOI Time Sent']);
// Build signers from the three legacy slots. Each slot has its own
// status field (sign-time present = signed). The signing/embedded URLs
// are preserved verbatim so the legacy resume-signing links still work
// for in-flight documents.
const signers: PlannedDocumentSigner[] = [];
const clientEmail = ((row['Email Address'] as string | undefined) ?? '').trim();
if (clientEmail) {
const clientSignedAt = parseFlexibleDate(
row['clientSignTime'] ?? row['client_signed_notified_at'],
);
signers.push({
signerName: clientFullName,
signerEmail: clientEmail,
signerRole: 'client',
signingOrder: 1,
status: clientSignedAt ? 'signed' : 'pending',
signedAt: clientSignedAt,
signingUrl: (row['Signature Link Client'] as string | undefined) ?? null,
embeddedUrl: (row['EmbeddedSignatureLinkClient'] as string | undefined) ?? null,
});
}
const ccLink = (row['Signature Link CC'] as string | undefined) ?? null;
const ccEmbedded = (row['EmbeddedSignatureLinkCC'] as string | undefined) ?? null;
const ccSignedAt = parseFlexibleDate(row['ccSignTime']);
if (ccLink || ccEmbedded || ccSignedAt) {
// Legacy didn't store the CC's email separately - leave a placeholder
// and let the operator update via the UI. Keeping the row preserves
// the link history.
signers.push({
signerName: 'CC (legacy migration)',
signerEmail: 'cc-unknown@migration.local',
signerRole: 'cc',
signingOrder: 2,
status: ccSignedAt ? 'signed' : 'pending',
signedAt: ccSignedAt,
signingUrl: ccLink,
embeddedUrl: ccEmbedded,
});
}
const devSignedAt = parseFlexibleDate(
row['developerSignTime'] ?? row['developer_signed_notified_at'],
);
const devLink = (row['Signature Link Developer'] as string | undefined) ?? null;
const devEmbedded = (row['EmbeddedSignatureLinkDeveloper'] as string | undefined) ?? null;
if (devLink || devEmbedded || devSignedAt) {
signers.push({
signerName: 'Developer (legacy migration)',
signerEmail: 'developer-unknown@migration.local',
signerRole: 'developer',
signingOrder: 3,
status: devSignedAt ? 'signed' : 'pending',
signedAt: devSignedAt,
signingUrl: devLink,
embeddedUrl: devEmbedded,
});
}
// Guard: an EOI document with zero signers leaves the document UI in an
// inconsistent state (status=completed but no rows in document_signers
// means the "who signed" view has nothing to show). Skip the document
// entirely rather than emit an orphaned record. This happens only when
// the legacy row carries a documensoID but lacks an Email Address AND
// has no CC/developer signature data at all - rare, but possible on
// very old rows. The flag is added so the migration report surfaces it
// for human review.
if (signers.length === 0) {
return null;
}
// Stash legacy S3 paths in notes so the reference isn't lost - copying
// attachments into the new files table is a separate workflow.
const notesParts: string[] = [];
const s3Path = (row['S3_Documenso_Path'] as string | undefined)?.trim();
const clientPath = (row['Client_EOI_Document_Path'] as string | undefined)?.trim();
if (s3Path) notesParts.push(`Legacy S3: ${s3Path}`);
if (clientPath) notesParts.push(`Legacy client copy: ${clientPath}`);
notesParts.push(`Migrated from legacy NocoDB Interests row #${row.Id}`);
return {
sourceId: row.Id,
clientTempId,
documentType: 'eoi',
title: `EOI - ${clientFullName}`,
status,
documensoId,
notes: notesParts.join('\n'),
signers,
dateSent,
};
}
// ─── Residential builder ────────────────────────────────────────────────────
function buildPlannedResidentialClient(
row: NocoDbRow,
opts: TransformOptions,
flags: MigrationFlag[],
): PlannedResidentialClient | null {
const rawName = (row['Full Name'] as string | undefined) ?? '';
const rawEmail = (row['Email Address'] as string | undefined) ?? '';
const rawPhone = (row['Phone Number'] as string | undefined) ?? '';
const rawCountry = (row['Place of Residence'] as string | undefined) ?? '';
const normName = normalizeName(rawName);
if (!normName.display) {
flags.push({
sourceTable: 'residential_interests',
sourceId: row.Id,
reason: 'residential row has no name - skipped',
});
return null;
}
const email = normalizeEmail(rawEmail);
const country = resolveCountry(rawCountry);
const phoneCountry = country.iso ?? opts.defaultPhoneCountry;
const phoneResult = normalizePhone(rawPhone, phoneCountry as CountryCode);
if (rawPhone && !phoneResult?.e164) {
flags.push({
sourceTable: 'residential_interests',
sourceId: row.Id,
reason: phoneResult?.flagged ? `phone ${phoneResult.flagged}` : 'phone unparseable',
details: { rawPhone },
});
}
if (rawEmail && !email) {
flags.push({
sourceTable: 'residential_interests',
sourceId: row.Id,
reason: 'email invalid',
details: { rawEmail },
});
}
const sourceFromRow = (row['Source'] as string | undefined) ?? null;
const mappedSource = sourceFromRow ? (SOURCE_MAP[sourceFromRow] ?? 'manual') : null;
const extraComments = (row['Extra Comments'] as string | undefined)?.trim() ?? null;
return {
sourceId: row.Id,
fullName: normName.display,
email,
phoneE164: phoneResult?.e164 ?? null,
phoneCountry: phoneResult?.country ?? null,
placeOfResidence: rawCountry.trim() || null,
placeOfResidenceCountryIso: country.iso ?? null,
source: mappedSource,
notes: extraComments,
dateFirstContact: parseFlexibleDate(
row['Time Created'] ?? row['CreatedAt'] ?? row['Created At'],
),
};
}