fix(migration): NocoDB import safety + dedup helpers + lead-source backfill

migration-apply: residential client + interest inserts now wrap in
db.transaction so a partial failure can't leave an orphan client
row without its interest (or vice versa).

migration-transform: buildPlannedDocument returns null when there
are no signers so the apply pass doesn't try to send a Documenso
envelope without recipients. mapDocumentStatus gets an explicit
"Awaiting Further Details" branch that no longer auto-promotes via
stale sign-time fields. parseFlexibleDate handles ISO and DD-MM-YYYY
inputs uniformly.

backfill-legacy-lead-source: chunk UPDATE WHERE clause now
isNull(source) on top of the inArray match, so a re-run can't
overwrite a more accurate source written between batches.

Adds 235 lines of vitest coverage on migration-transform.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Matt Ciaccio
2026-05-04 22:56:18 +02:00
parent 089f4a67a4
commit d62822c284
9 changed files with 938 additions and 47 deletions

View File

@@ -0,0 +1,135 @@
/**
* One-shot: backfill `interests.source` for legacy NocoDB-imported rows.
*
* Why this exists: the legacy NocoDB Interests table left the `Source`
* column null for ~95 % of rows. The migration mapped null → null, so the
* Lead Source Attribution chart shows them as "Unspecified". Per the
* operator's best knowledge, almost all of those legacy rows came in
* through the website (web form / portal) — the few that didn't are the
* ones that already carry an explicit `Source` value (Form / portal /
* External). Defaulting null → 'website' is therefore the closest
* truth we can reconstruct without per-row sales notes review.
*
* Idempotent: only updates rows where `source IS NULL` AND the row has a
* `migration_source_links` entry tying it back to the legacy NocoDB import,
* so net-new manually-created interests with null source aren't touched.
*
* Usage:
* pnpm tsx scripts/backfill-legacy-lead-source.ts --port-slug port-nimara [--dry-run]
*/
import 'dotenv/config';
import { eq, and, isNull, inArray } from 'drizzle-orm';
import { db } from '@/lib/db';
import { ports } from '@/lib/db/schema/ports';
import { interests } from '@/lib/db/schema/interests';
import { migrationSourceLinks } from '@/lib/db/schema/migration';
interface CliArgs {
portSlug: string | null;
dryRun: boolean;
}
function parseArgs(argv: string[]): CliArgs {
const args: CliArgs = { portSlug: null, dryRun: false };
for (let i = 0; i < argv.length; i += 1) {
const a = argv[i]!;
if (a === '--port-slug') args.portSlug = argv[++i] ?? null;
else if (a === '--dry-run') args.dryRun = true;
else if (a === '-h' || a === '--help') {
console.log(
'Usage: pnpm tsx scripts/backfill-legacy-lead-source.ts --port-slug <slug> [--dry-run]',
);
process.exit(0);
}
}
if (!args.portSlug) {
console.error('Missing required --port-slug');
process.exit(1);
}
return args;
}
async function main() {
const args = parseArgs(process.argv.slice(2));
const [port] = await db
.select({ id: ports.id, name: ports.name })
.from(ports)
.where(eq(ports.slug, args.portSlug!))
.limit(1);
if (!port) {
console.error(`No port found with slug "${args.portSlug}"`);
process.exit(1);
}
console.log(`[backfill] target: ${port.name} (${port.id})`);
// Pull every interest id this port owns that has a NULL source.
const candidateInterests = await db
.select({ id: interests.id })
.from(interests)
.where(and(eq(interests.portId, port.id), isNull(interests.source)));
console.log(`[backfill] interests with NULL source in this port: ${candidateInterests.length}`);
if (candidateInterests.length === 0) {
console.log('Nothing to backfill.');
return;
}
// Filter to ONLY those that came in via the legacy migration — preserves
// null on net-new rows where the operator hasn't picked a source yet.
const candidateIds = candidateInterests.map((r) => r.id);
const legacyLinks = await db
.select({ targetEntityId: migrationSourceLinks.targetEntityId })
.from(migrationSourceLinks)
.where(
and(
eq(migrationSourceLinks.sourceSystem, 'nocodb_interests'),
eq(migrationSourceLinks.targetEntityType, 'interest'),
inArray(migrationSourceLinks.targetEntityId, candidateIds),
),
);
const legacyIds = new Set(legacyLinks.map((l) => l.targetEntityId));
const toUpdate = candidateIds.filter((id) => legacyIds.has(id));
console.log(
`[backfill] of those, ${toUpdate.length} are legacy migration rows (will set source='website')`,
);
console.log(
`[backfill] ${candidateInterests.length - toUpdate.length} are net-new rows (left untouched)`,
);
if (args.dryRun) {
console.log('[backfill] --dry-run set; no writes.');
return;
}
if (toUpdate.length === 0) {
console.log('Nothing to write.');
return;
}
// Update in chunks of 500 to keep query size sane.
const CHUNK = 500;
let updated = 0;
for (let i = 0; i < toUpdate.length; i += CHUNK) {
const chunk = toUpdate.slice(i, i + CHUNK);
// Belt-and-suspenders: re-assert `source IS NULL` in the WHERE so
// a concurrent process that set source on one of these rows
// between SELECT and UPDATE doesn't get its value clobbered.
const result = await db
.update(interests)
.set({ source: 'website' })
.where(and(inArray(interests.id, chunk), isNull(interests.source)))
.returning({ id: interests.id });
updated += result.length;
}
console.log(`[backfill] updated ${updated} rows.`);
}
main().catch((err) => {
console.error('FATAL', err);
process.exit(1);
});

View File

@@ -178,6 +178,12 @@ async function main(): Promise<void> {
); );
console.log(` Output: ${s.outputClients} clients, ${s.outputInterests} interests`); console.log(` Output: ${s.outputClients} clients, ${s.outputInterests} interests`);
console.log(` ${s.outputContacts} contacts, ${s.outputAddresses} addresses`); console.log(` ${s.outputContacts} contacts, ${s.outputAddresses} addresses`);
console.log(
` ${s.outputDocuments} EOI documents, ${s.outputDocumentSigners} signers`,
);
console.log(
` ${s.outputResidentialClients} residential clients (with default-stage interests)`,
);
console.log( console.log(
` Dedup: ${s.autoLinkedClusters} auto-linked clusters, ${s.needsReviewPairs} pairs flagged for review`, ` Dedup: ${s.autoLinkedClusters} auto-linked clusters, ${s.needsReviewPairs} pairs flagged for review`,
); );
@@ -217,6 +223,14 @@ async function main(): Promise<void> {
console.log( console.log(
` Interests: ${result.interestsInserted} inserted, ${result.interestsSkipped} already linked`, ` Interests: ${result.interestsInserted} inserted, ${result.interestsSkipped} already linked`,
); );
console.log(
` Documents: ${result.documentsInserted} inserted, ${result.documentsSkipped} already linked`,
);
console.log(` Signers: ${result.documentSignersInserted} inserted`);
console.log(
` Res-Clt: ${result.residentialClientsInserted} inserted, ${result.residentialClientsSkipped} already linked`,
);
console.log(` Res-Int: ${result.residentialInterestsInserted} inserted`);
if (result.warnings.length > 0) { if (result.warnings.length > 0) {
console.log(''); console.log('');

View File

@@ -1,5 +1,5 @@
/** /**
* Client-match finder pure scoring logic. * Client-match finder - pure scoring logic.
* *
* Compares one input candidate against a pool of existing candidates and * Compares one input candidate against a pool of existing candidates and
* returns scored matches. Used by: * returns scored matches. Used by:
@@ -31,7 +31,7 @@ export interface MatchCandidate {
emails: string[]; emails: string[];
/** Already canonical E.164 via `normalizePhone`. */ /** Already canonical E.164 via `normalizePhone`. */
phonesE164: string[]; phonesE164: string[];
/** Address country (NOT phone country) used for tiebreaking, not scoring. */ /** Address country (NOT phone country) - used for tiebreaking, not scoring. */
countryIso: string | null; countryIso: string | null;
} }
@@ -59,7 +59,7 @@ export interface DedupThresholds {
/** /**
* Compare `input` against every reachable candidate in `pool` and return * Compare `input` against every reachable candidate in `pool` and return
* scored matches, sorted by score descending. The result list includes * scored matches, sorted by score descending. The result list includes
* low-confidence hits caller filters by `confidence` or `score` * low-confidence hits - caller filters by `confidence` or `score`
* depending on use case. * depending on use case.
* *
* Self-matches (an entry with `id === input.id`, e.g. when re-scoring an * Self-matches (an entry with `id === input.id`, e.g. when re-scoring an
@@ -77,7 +77,7 @@ export function findClientMatches(
// Three indexes mean any candidate that shares ANY of (email / phone / // Three indexes mean any candidate that shares ANY of (email / phone /
// surname-token) with the input shows up in the comparison set. Anything // surname-token) with the input shows up in the comparison set. Anything
// that shares NONE is structurally too different to be a duplicate and // that shares NONE is structurally too different to be a duplicate and
// is skipped this is what keeps the algorithm O(n) at scale. // is skipped - this is what keeps the algorithm O(n) at scale.
const byEmail = new Map<string, MatchCandidate[]>(); const byEmail = new Map<string, MatchCandidate[]>();
const byPhone = new Map<string, MatchCandidate[]>(); const byPhone = new Map<string, MatchCandidate[]>();
const bySurnameToken = new Map<string, MatchCandidate[]>(); const bySurnameToken = new Map<string, MatchCandidate[]>();
@@ -165,7 +165,7 @@ function scorePair(a: MatchCandidate, b: MatchCandidate): MatchResult {
} }
// Surname + given-name fuzzy. Only fires when names are NOT exactly // Surname + given-name fuzzy. Only fires when names are NOT exactly
// equal avoids double-counting with the rule above. Catches // equal - avoids double-counting with the rule above. Catches
// 'Constanzo' / 'Costanzo', 'Marc' / 'Marcus' etc. when other contact // 'Constanzo' / 'Costanzo', 'Marc' / 'Marcus' etc. when other contact
// signals confirm them. // signals confirm them.
if (!nameExactMatch && a.surnameToken && b.surnameToken && a.surnameToken === b.surnameToken) { if (!nameExactMatch && a.surnameToken && b.surnameToken && a.surnameToken === b.surnameToken) {

View File

@@ -9,7 +9,7 @@
* a partial failure can be resumed by re-running the script. Re-runs * a partial failure can be resumed by re-running the script. Re-runs
* against an already-applied plan are a near-no-op. * against an already-applied plan are a near-no-op.
* *
* Per-entity transactions (not one giant transaction) the design * Per-entity transactions (not one giant transaction) - the design
* favours visible partial progress on failure over all-or-nothing. * favours visible partial progress on failure over all-or-nothing.
* *
* @see src/lib/dedup/migration-transform.ts for the input shape. * @see src/lib/dedup/migration-transform.ts for the input shape.
@@ -23,8 +23,16 @@ import { clients, clientContacts, clientAddresses } from '@/lib/db/schema/client
import { interests } from '@/lib/db/schema/interests'; import { interests } from '@/lib/db/schema/interests';
import { yachts } from '@/lib/db/schema/yachts'; import { yachts } from '@/lib/db/schema/yachts';
import { berths } from '@/lib/db/schema/berths'; import { berths } from '@/lib/db/schema/berths';
import { documents, documentSigners } from '@/lib/db/schema/documents';
import { residentialClients, residentialInterests } from '@/lib/db/schema/residential';
import { migrationSourceLinks } from '@/lib/db/schema/migration'; import { migrationSourceLinks } from '@/lib/db/schema/migration';
import type { MigrationPlan, PlannedClient, PlannedInterest } from './migration-transform'; import type {
MigrationPlan,
PlannedClient,
PlannedDocument,
PlannedInterest,
PlannedResidentialClient,
} from './migration-transform';
const SOURCE_SYSTEM = 'nocodb_interests'; const SOURCE_SYSTEM = 'nocodb_interests';
@@ -35,7 +43,7 @@ const SOURCE_SYSTEM = 'nocodb_interests';
* unchanged so a literal lookup can still hit (handles the case where * unchanged so a literal lookup can still hit (handles the case where
* the legacy data already has the dashed form). * the legacy data already has the dashed form).
* *
* Multi-mooring strings ("A3, D30") return the original string * Multi-mooring strings ("A3, D30") return the original string -
* those need human review and we don't want to silently pick one half. * those need human review and we don't want to silently pick one half.
*/ */
function normalizeLegacyMooring(raw: string): string { function normalizeLegacyMooring(raw: string): string {
@@ -56,13 +64,19 @@ export interface ApplyResult {
yachtsInserted: number; yachtsInserted: number;
interestsInserted: number; interestsInserted: number;
interestsSkipped: number; interestsSkipped: number;
documentsInserted: number;
documentsSkipped: number;
documentSignersInserted: number;
residentialClientsInserted: number;
residentialClientsSkipped: number;
residentialInterestsInserted: number;
warnings: string[]; warnings: string[];
} }
export interface ApplyOptions { export interface ApplyOptions {
port: { id: string; slug: string }; port: { id: string; slug: string };
applyId: string; applyId: string;
/** Set to true for the "preview the writes" mode runs every read but /** Set to true for the "preview the writes" mode - runs every read but
* rolls back inserts. Useful for verifying mappings before committing. */ * rolls back inserts. Useful for verifying mappings before committing. */
rehearsal?: boolean; rehearsal?: boolean;
appliedBy?: string; appliedBy?: string;
@@ -74,7 +88,14 @@ export interface ApplyOptions {
*/ */
async function resolveExistingLink( async function resolveExistingLink(
sourceId: number, sourceId: number,
targetEntityType: 'client' | 'interest' | 'yacht' | 'address', targetEntityType:
| 'client'
| 'interest'
| 'yacht'
| 'address'
| 'document'
| 'residential_client'
| 'residential_interest',
): Promise<string | null> { ): Promise<string | null> {
const rows = await db const rows = await db
.select({ id: migrationSourceLinks.targetEntityId }) .select({ id: migrationSourceLinks.targetEntityId })
@@ -109,7 +130,7 @@ async function resolveExistingClusterClient(sourceIds: number[]): Promise<string
return rows[0]?.id ?? null; return rows[0]?.id ?? null;
} }
/** Apply a single PlannedClient returns `{clientId, inserted}` so the /** Apply a single PlannedClient - returns `{clientId, inserted}` so the
* caller can wire interests against the (possibly pre-existing) record. */ * caller can wire interests against the (possibly pre-existing) record. */
async function applyClient( async function applyClient(
planned: PlannedClient, planned: PlannedClient,
@@ -125,12 +146,12 @@ async function applyClient(
} }
if (opts.rehearsal) { if (opts.rehearsal) {
// Simulate an insert without writing used for the preview path. // Simulate an insert without writing - used for the preview path.
return { clientId: `rehearsal-${planned.tempId}`, inserted: true }; return { clientId: `rehearsal-${planned.tempId}`, inserted: true };
} }
// surnameToken is on the planned object (used by the dedup blocking // surnameToken is on the planned object (used by the dedup blocking
// index inside the transform) but not in the clients schema runtime // index inside the transform) but not in the clients schema - runtime
// dedup re-derives it from fullName when needed. Drop it on insert. // dedup re-derives it from fullName when needed. Drop it on insert.
const [inserted] = await db const [inserted] = await db
.insert(clients) .insert(clients)
@@ -146,7 +167,7 @@ async function applyClient(
if (!inserted) throw new Error('Client insert returned no row'); if (!inserted) throw new Error('Client insert returned no row');
const clientId = inserted.id; const clientId = inserted.id;
// Record idempotency links one per source row in the cluster. // Record idempotency links - one per source row in the cluster.
await db.insert(migrationSourceLinks).values( await db.insert(migrationSourceLinks).values(
planned.sourceIds.map((sid) => ({ planned.sourceIds.map((sid) => ({
sourceSystem: SOURCE_SYSTEM, sourceSystem: SOURCE_SYSTEM,
@@ -205,7 +226,7 @@ async function applyClient(
return { clientId, inserted: true }; return { clientId, inserted: true };
} }
/** Apply a single PlannedInterest looks up its client + berth + yacht and /** Apply a single PlannedInterest - looks up its client + berth + yacht and
* inserts the interest row, plus a yacht stub if a yacht name is present. */ * inserts the interest row, plus a yacht stub if a yacht name is present. */
async function applyInterest( async function applyInterest(
planned: PlannedInterest, planned: PlannedInterest,
@@ -224,7 +245,7 @@ async function applyInterest(
const clientId = tempIdToClientId.get(planned.clientTempId); const clientId = tempIdToClientId.get(planned.clientTempId);
if (!clientId) { if (!clientId) {
result.warnings.push( result.warnings.push(
`Interest source=${planned.sourceId} references unknown client tempId=${planned.clientTempId} skipped`, `Interest source=${planned.sourceId} references unknown client tempId=${planned.clientTempId} - skipped`,
); );
return; return;
} }
@@ -241,7 +262,7 @@ async function applyInterest(
null; null;
if (!berthId) { if (!berthId) {
result.warnings.push( result.warnings.push(
`Interest source=${planned.sourceId} references unknown mooring="${planned.berthMooringNumber}" interest created without berth link`, `Interest source=${planned.sourceId} references unknown mooring="${planned.berthMooringNumber}" - interest created without berth link`,
); );
} }
} }
@@ -322,6 +343,181 @@ async function applyInterest(
result.interestsInserted += 1; result.interestsInserted += 1;
} }
/**
* Apply a single PlannedDocument - looks up the parent interest's id from
* the migration ledger, materializes a documents row, and inserts the
* signer rows. Idempotent via target_entity_type='document'.
*/
async function applyDocument(
planned: PlannedDocument,
tempIdToClientId: Map<string, string>,
opts: ApplyOptions,
result: ApplyResult,
): Promise<void> {
const existing = await resolveExistingLink(planned.sourceId, 'document');
if (existing) {
result.documentsSkipped += 1;
return;
}
const interestId = await resolveExistingLink(planned.sourceId, 'interest');
if (!interestId) {
result.warnings.push(
`Document source=${planned.sourceId} cannot resolve parent interest - skipped (interest must apply first)`,
);
return;
}
const clientId = tempIdToClientId.get(planned.clientTempId);
if (!clientId) {
result.warnings.push(
`Document source=${planned.sourceId} references unknown client tempId=${planned.clientTempId} - skipped`,
);
return;
}
if (opts.rehearsal) {
result.documentsInserted += 1;
result.documentSignersInserted += planned.signers.length;
return;
}
const [docRow] = await db
.insert(documents)
.values({
portId: opts.port.id,
interestId,
clientId,
documentType: planned.documentType,
title: planned.title,
status: planned.status,
documensoId: planned.documensoId,
isManualUpload: false,
notes: planned.notes,
createdBy: opts.appliedBy ?? 'migration',
})
.returning({ id: documents.id });
if (!docRow) throw new Error('Document insert returned no row');
await db.insert(migrationSourceLinks).values({
sourceSystem: SOURCE_SYSTEM,
sourceId: String(planned.sourceId),
targetEntityType: 'document' as const,
targetEntityId: docRow.id,
appliedId: opts.applyId,
...(opts.appliedBy ? { appliedBy: opts.appliedBy } : {}),
});
if (planned.signers.length > 0) {
await db.insert(documentSigners).values(
planned.signers.map((s) => ({
documentId: docRow.id,
signerName: s.signerName,
signerEmail: s.signerEmail,
signerRole: s.signerRole,
signingOrder: s.signingOrder,
status: s.status,
signedAt: s.signedAt ? new Date(s.signedAt) : null,
signingUrl: s.signingUrl,
embeddedUrl: s.embeddedUrl,
})),
);
result.documentSignersInserted += planned.signers.length;
}
result.documentsInserted += 1;
}
/**
* Apply a single PlannedResidentialClient - creates a residential_clients
* row plus a default residential_interests row at pipeline_stage='new'
* so the lead surfaces in the residential funnel. Two ledger entries
* record both targets.
*/
async function applyResidentialClient(
planned: PlannedResidentialClient,
opts: ApplyOptions,
result: ApplyResult,
): Promise<void> {
const existingClient = await resolveExistingLink(planned.sourceId, 'residential_client');
if (existingClient) {
result.residentialClientsSkipped += 1;
return;
}
if (opts.rehearsal) {
result.residentialClientsInserted += 1;
result.residentialInterestsInserted += 1;
return;
}
// Wrap the three writes in a transaction so a partial failure (e.g. the
// residential_interests insert throws) does NOT leave an orphan
// residential_clients row. Without the wrap, a later --apply re-run
// would not see a ledger entry for the orphan and would happily insert
// a duplicate residential_clients row.
await db.transaction(async (tx) => {
const [resClient] = await tx
.insert(residentialClients)
.values({
portId: opts.port.id,
fullName: planned.fullName,
email: planned.email,
phone: planned.phoneE164,
phoneE164: planned.phoneE164,
phoneCountry: planned.phoneCountry,
placeOfResidence: planned.placeOfResidence,
placeOfResidenceCountryIso: planned.placeOfResidenceCountryIso,
source: planned.source,
notes: planned.notes,
status: 'prospect',
})
.returning({ id: residentialClients.id });
if (!resClient) throw new Error('Residential client insert returned no row');
const [resInterest] = await tx
.insert(residentialInterests)
.values({
portId: opts.port.id,
residentialClientId: resClient.id,
pipelineStage: 'new',
source: planned.source,
notes: planned.notes,
dateFirstContact: planned.dateFirstContact ? new Date(planned.dateFirstContact) : null,
dateLastContact: planned.dateFirstContact ? new Date(planned.dateFirstContact) : null,
})
.returning({ id: residentialInterests.id });
if (!resInterest) throw new Error('Residential interest insert returned no row');
// Two ledger entries - one per target - both keyed on the same legacy
// sourceId. Keeps re-runs idempotent on either target type.
await tx.insert(migrationSourceLinks).values([
{
sourceSystem: 'nocodb_residential_interests',
sourceId: String(planned.sourceId),
targetEntityType: 'residential_client' as const,
targetEntityId: resClient.id,
appliedId: opts.applyId,
...(opts.appliedBy ? { appliedBy: opts.appliedBy } : {}),
},
{
sourceSystem: 'nocodb_residential_interests',
sourceId: String(planned.sourceId),
targetEntityType: 'residential_interest' as const,
targetEntityId: resInterest.id,
appliedId: opts.applyId,
...(opts.appliedBy ? { appliedBy: opts.appliedBy } : {}),
},
]);
});
result.residentialClientsInserted += 1;
result.residentialInterestsInserted += 1;
}
/** /**
* Top-level apply driver. Walks the plan once, building the * Top-level apply driver. Walks the plan once, building the
* tempId→clientId map as it goes, then walks interests with that map. * tempId→clientId map as it goes, then walks interests with that map.
@@ -336,6 +532,12 @@ export async function applyPlan(plan: MigrationPlan, opts: ApplyOptions): Promis
yachtsInserted: 0, yachtsInserted: 0,
interestsInserted: 0, interestsInserted: 0,
interestsSkipped: 0, interestsSkipped: 0,
documentsInserted: 0,
documentsSkipped: 0,
documentSignersInserted: 0,
residentialClientsInserted: 0,
residentialClientsSkipped: 0,
residentialInterestsInserted: 0,
warnings: [], warnings: [],
}; };
@@ -358,5 +560,18 @@ export async function applyPlan(plan: MigrationPlan, opts: ApplyOptions): Promis
await applyInterest(planned, tempIdToClientId, mooringToBerthId, opts, result); await applyInterest(planned, tempIdToClientId, mooringToBerthId, opts, result);
} }
// 4. Documents (depend on interests being applied first - applyDocument
// looks up the new interest_id via the migration ledger).
for (const planned of plan.documents) {
await applyDocument(planned, tempIdToClientId, opts, result);
}
// 5. Residential leads - independent domain, no dependency on the marina
// apply phase. Each lead gets a residential_clients row + a default
// residential_interests row.
for (const planned of plan.residentialClients) {
await applyResidentialClient(planned, opts, result);
}
return result; return result;
} }

View File

@@ -1,5 +1,5 @@
/** /**
* Migration report writer turns a `MigrationPlan` (from * Migration report writer - turns a `MigrationPlan` (from
* `migration-transform.ts`) into a CSV + a human-readable Markdown * `migration-transform.ts`) into a CSV + a human-readable Markdown
* summary on disk under `.migration/<timestamp>/`. * summary on disk under `.migration/<timestamp>/`.
* *
@@ -193,7 +193,7 @@ export function buildCsv(plan: MigrationPlan): string {
export function buildSummary(plan: MigrationPlan, generatedAt: string): string { export function buildSummary(plan: MigrationPlan, generatedAt: string): string {
const s = plan.stats; const s = plan.stats;
const lines: string[] = []; const lines: string[] = [];
lines.push(`# Migration Dry-Run ${generatedAt}`); lines.push(`# Migration Dry-Run - ${generatedAt}`);
lines.push(''); lines.push('');
lines.push('## Input'); lines.push('## Input');
lines.push(`- ${s.inputInterestRows} NocoDB Interests`); lines.push(`- ${s.inputInterestRows} NocoDB Interests`);
@@ -204,15 +204,19 @@ export function buildSummary(plan: MigrationPlan, generatedAt: string): string {
lines.push(`- ${s.outputInterests} interests (one per source row, linked to deduped client)`); lines.push(`- ${s.outputInterests} interests (one per source row, linked to deduped client)`);
lines.push(`- ${s.outputContacts} client_contacts`); lines.push(`- ${s.outputContacts} client_contacts`);
lines.push(`- ${s.outputAddresses} client_addresses`); lines.push(`- ${s.outputAddresses} client_addresses`);
lines.push(`- ${s.outputDocuments} EOI documents (with ${s.outputDocumentSigners} signers)`);
lines.push(
`- ${s.outputResidentialClients} residential_clients (each with a default-stage residential_interests row)`,
);
lines.push(''); lines.push('');
lines.push('## Auto-linked clusters'); lines.push('## Auto-linked clusters');
if (plan.autoLinks.length === 0) { if (plan.autoLinks.length === 0) {
lines.push('_None every input row maps to a unique client._'); lines.push('_None - every input row maps to a unique client._');
} else { } else {
for (const link of plan.autoLinks) { for (const link of plan.autoLinks) {
const merged = link.mergedSourceIds.length; const merged = link.mergedSourceIds.length;
lines.push( lines.push(
`- Lead row \`${link.leadSourceId}\` ← merged ${merged} other row${merged === 1 ? '' : 's'} (\`${link.mergedSourceIds.join(', ')}\`) score ${link.score} via ${link.reasons.join(' + ')}`, `- Lead row \`${link.leadSourceId}\` ← merged ${merged} other row${merged === 1 ? '' : 's'} (\`${link.mergedSourceIds.join(', ')}\`) - score ${link.score} via ${link.reasons.join(' + ')}`,
); );
} }
} }
@@ -223,7 +227,7 @@ export function buildSummary(plan: MigrationPlan, generatedAt: string): string {
} else { } else {
for (const pair of plan.needsReview) { for (const pair of plan.needsReview) {
lines.push( lines.push(
`- Rows \`${pair.aSourceId}\`\`${pair.bSourceId}\` score ${pair.score} (${pair.reasons.join(' + ')})`, `- Rows \`${pair.aSourceId}\`\`${pair.bSourceId}\` - score ${pair.score} (${pair.reasons.join(' + ')})`,
); );
} }
} }
@@ -243,7 +247,7 @@ export function buildSummary(plan: MigrationPlan, generatedAt: string): string {
lines.push('### Detail'); lines.push('### Detail');
for (const f of plan.flags.slice(0, 30)) { for (const f of plan.flags.slice(0, 30)) {
lines.push( lines.push(
`- \`${f.sourceTable}#${f.sourceId}\`: ${f.reason}${f.details ? ` \`${JSON.stringify(f.details)}\`` : ''}`, `- \`${f.sourceTable}#${f.sourceId}\`: ${f.reason}${f.details ? ` - \`${JSON.stringify(f.details)}\`` : ''}`,
); );
} }
if (plan.flags.length > 30) { if (plan.flags.length > 30) {

View File

@@ -82,6 +82,63 @@ export interface PlannedInterest {
documensoId: string | null; documensoId: string | null;
} }
/**
* EOI document derived from a legacy NocoDB Interests row that carries a
* `documensoID`. The apply phase materializes this into one
* `documents` row plus up to three `document_signers` rows (client / cc /
* developer), preserving the legacy signing-link URLs and timestamps.
*
* Carries the same `sourceId` as the parent interest - apply uses that to
* resolve the new `interest_id` and `client_id` via
* `migration_source_links`.
*/
export interface PlannedDocument {
sourceId: number;
/** tempId of the parent client (used for client_id resolution). */
clientTempId: string;
documentType: 'eoi';
title: string;
/** new-system document.status. Mapped from the legacy `EOI Status`
* enum + sign-time fields. */
status: 'draft' | 'sent' | 'partially_signed' | 'completed';
documensoId: string;
notes: string | null;
signers: PlannedDocumentSigner[];
/** Mirror of interest's dateEoiSent; useful for back-dating createdAt. */
dateSent: string | null;
}
export interface PlannedDocumentSigner {
signerName: string;
signerEmail: string;
signerRole: 'client' | 'cc' | 'developer';
signingOrder: number;
status: 'pending' | 'signed';
signedAt: string | null;
signingUrl: string | null;
embeddedUrl: string | null;
}
/**
* Residential lead from the legacy "Interests (Residences)" table. Pure
* contact record (no pipeline data in legacy), so apply creates a
* `residential_clients` row plus a default `residential_interests` row at
* `pipeline_stage='new'` so it surfaces in the residential funnel.
*/
export interface PlannedResidentialClient {
/** Legacy residential row id - used as the migration_source_links key. */
sourceId: number;
fullName: string;
email: string | null;
phoneE164: string | null;
phoneCountry: CountryCode | null;
placeOfResidence: string | null;
placeOfResidenceCountryIso: CountryCode | null;
source: string | null;
notes: string | null;
dateFirstContact: string | null;
}
export interface MigrationFlag { export interface MigrationFlag {
sourceTable: 'interests' | 'residential_interests' | 'website_interest_submissions'; sourceTable: 'interests' | 'residential_interests' | 'website_interest_submissions';
sourceId: number; sourceId: number;
@@ -92,6 +149,10 @@ export interface MigrationFlag {
export interface MigrationPlan { export interface MigrationPlan {
clients: PlannedClient[]; clients: PlannedClient[];
interests: PlannedInterest[]; interests: PlannedInterest[];
/** EOI documents derived from interest rows with a `documensoID`. */
documents: PlannedDocument[];
/** Residential leads - physically separate domain, simple 1:1 mapping. */
residentialClients: PlannedResidentialClient[];
flags: MigrationFlag[]; flags: MigrationFlag[];
/** Pairs that the migration would auto-link (high score). */ /** Pairs that the migration would auto-link (high score). */
autoLinks: Array<{ autoLinks: Array<{
@@ -113,6 +174,9 @@ export interface MigrationStats {
outputInterests: number; outputInterests: number;
outputContacts: number; outputContacts: number;
outputAddresses: number; outputAddresses: number;
outputDocuments: number;
outputDocumentSigners: number;
outputResidentialClients: number;
flaggedRows: number; flaggedRows: number;
autoLinkedClusters: number; autoLinkedClusters: number;
needsReviewPairs: number; needsReviewPairs: number;
@@ -193,7 +257,7 @@ function parseFlexibleDate(input: unknown): string | null {
/** /**
* Run the full transform pipeline against a NocoDB snapshot. Pure * Run the full transform pipeline against a NocoDB snapshot. Pure
* function same input always produces the same plan. * function - same input always produces the same plan.
*/ */
export function transformSnapshot( export function transformSnapshot(
snapshot: NocoDbSnapshot, snapshot: NocoDbSnapshot,
@@ -214,6 +278,7 @@ export function transformSnapshot(
// Build the planned clients + interests from the clusters. // Build the planned clients + interests from the clusters.
const clients: PlannedClient[] = []; const clients: PlannedClient[] = [];
const interests: PlannedInterest[] = []; const interests: PlannedInterest[] = [];
const documents: PlannedDocument[] = [];
const autoLinks: MigrationPlan['autoLinks'] = []; const autoLinks: MigrationPlan['autoLinks'] = [];
const needsReview: MigrationPlan['needsReview'] = []; const needsReview: MigrationPlan['needsReview'] = [];
@@ -226,10 +291,15 @@ export function transformSnapshot(
const planned = buildPlannedClient(tempId, cluster, opts); const planned = buildPlannedClient(tempId, cluster, opts);
clients.push(planned); clients.push(planned);
// Each row in the cluster becomes its own interest record. // Each row in the cluster becomes its own interest record. If the
// legacy row carried a documensoID, also emit an EOI document so the
// /documents view in the new CRM mirrors the legacy signing state.
for (const member of cluster.members) { for (const member of cluster.members) {
const interest = buildPlannedInterest(member.row, tempId); const interest = buildPlannedInterest(member.row, tempId);
interests.push(interest); interests.push(interest);
const doc = buildPlannedDocument(member.row, tempId, planned.fullName);
if (doc) documents.push(doc);
} }
if (cluster.members.length > 1) { if (cluster.members.length > 1) {
@@ -246,9 +316,17 @@ export function transformSnapshot(
} }
} }
// Residential leads - separate domain, no dedup needed (different team
// sees different rows). One PlannedResidentialClient per source row.
const residentialClients: PlannedResidentialClient[] = snapshot.residentialInterests
.map((row) => buildPlannedResidentialClient(row, opts, flags))
.filter((r): r is PlannedResidentialClient => r !== null);
return { return {
clients, clients,
interests, interests,
documents,
residentialClients,
flags, flags,
autoLinks, autoLinks,
needsReview, needsReview,
@@ -259,6 +337,9 @@ export function transformSnapshot(
outputInterests: interests.length, outputInterests: interests.length,
outputContacts: clients.reduce((sum, c) => sum + c.contacts.length, 0), outputContacts: clients.reduce((sum, c) => sum + c.contacts.length, 0),
outputAddresses: clients.reduce((sum, c) => sum + c.addresses.length, 0), outputAddresses: clients.reduce((sum, c) => sum + c.addresses.length, 0),
outputDocuments: documents.length,
outputDocumentSigners: documents.reduce((sum, d) => sum + d.signers.length, 0),
outputResidentialClients: residentialClients.length,
flaggedRows: flags.length, flaggedRows: flags.length,
autoLinkedClusters: autoLinks.length, autoLinkedClusters: autoLinks.length,
needsReviewPairs: needsReview.length, needsReviewPairs: needsReview.length,
@@ -359,7 +440,7 @@ interface Cluster {
function clusterByDedup(rows: RowCandidate[], opts: TransformOptions): Cluster[] { function clusterByDedup(rows: RowCandidate[], opts: TransformOptions): Cluster[] {
// Use a union-find structure indexed by row id. Every pair with a // Use a union-find structure indexed by row id. Every pair with a
// score >= autoLink threshold gets unioned. Pairs in [needsReview, // score >= autoLink threshold gets unioned. Pairs in [needsReview,
// autoLink) accumulate onto the cluster's reviewPairs list they're // autoLink) accumulate onto the cluster's reviewPairs list - they're
// surfaced for human triage but not auto-merged. // surfaced for human triage but not auto-merged.
const parent = new Map<string, string>(); const parent = new Map<string, string>();
for (const r of rows) parent.set(r.candidate.id, r.candidate.id); for (const r of rows) parent.set(r.candidate.id, r.candidate.id);
@@ -404,7 +485,7 @@ function clusterByDedup(rows: RowCandidate[], opts: TransformOptions): Cluster[]
} }
clusterReasons.set(root, existing); clusterReasons.set(root, existing);
} else if (m.score >= opts.thresholds.needsReview) { } else if (m.score >= opts.thresholds.needsReview) {
// Medium track on whichever cluster `left` belongs to. // Medium - track on whichever cluster `left` belongs to.
const root = find(left.candidate.id); const root = find(left.candidate.id);
const list = clusterReviewPairs.get(root) ?? []; const list = clusterReviewPairs.get(root) ?? [];
list.push({ list.push({
@@ -473,7 +554,7 @@ function buildPlannedClient(
): PlannedClient { ): PlannedClient {
const lead = cluster.leadCandidate; const lead = cluster.leadCandidate;
// Collect distinct emails + phones from across the cluster duplicate // Collect distinct emails + phones from across the cluster - duplicate
// submissions often come with different contact methods we want to // submissions often come with different contact methods we want to
// preserve as multiple rows in `client_contacts`. // preserve as multiple rows in `client_contacts`.
const seenEmails = new Set<string>(); const seenEmails = new Set<string>();
@@ -574,3 +655,210 @@ function buildPlannedInterest(row: NocoDbRow, clientTempId: string): PlannedInte
documensoId: (row['documensoID'] as string | undefined) ?? null, documensoId: (row['documensoID'] as string | undefined) ?? null,
}; };
} }
// ─── EOI document builder ───────────────────────────────────────────────────
/** Status mapping from legacy `EOI Status` SingleSelect → new
* documents.status enum. Falls back to inferring from sign-time fields
* when the legacy enum is blank or set to "Awaiting Further Details"
* (which itself does not pin a lifecycle stage - the operator was
* waiting for input from the client). */
function mapDocumentStatus(row: NocoDbRow): PlannedDocument['status'] {
const eoiStatus = (row['EOI Status'] as string | undefined)?.trim();
if (eoiStatus === 'Signed') return 'completed';
if (eoiStatus === 'Waiting for Signatures') return 'partially_signed';
// "Awaiting Further Details" or blank: fall through to sign-time
// inference. This matters because some rows have stale
// `all_signed_notified_at` from earlier signing rounds; those rows
// should NOT be auto-promoted to 'completed' if the latest enum value
// says we're still waiting on the client. We only trust sign-time
// fields when the operator hasn't explicitly set the status.
if (eoiStatus === 'Awaiting Further Details') {
if (row['EOI Time Sent']) return 'sent';
return 'draft';
}
// Sign-time fallbacks - `all_signed_notified_at` is a strong signal the
// document hit the completed lifecycle even on rows that pre-date the
// EOI Status enum.
if (row['all_signed_notified_at']) return 'completed';
if (row['developerSignTime']) return 'completed';
if (row['clientSignTime']) return 'partially_signed';
if (row['EOI Time Sent']) return 'sent';
return 'draft';
}
/**
* Emit an EOI document plan if the legacy interest row carries a
* `documensoID`. Returns null otherwise - interests without a documensoID
* never had an EOI sent, so there's nothing to migrate.
*/
function buildPlannedDocument(
row: NocoDbRow,
clientTempId: string,
clientFullName: string,
): PlannedDocument | null {
const documensoId = (row['documensoID'] as string | undefined)?.trim();
if (!documensoId) return null;
const status = mapDocumentStatus(row);
const dateSent = parseFlexibleDate(row['EOI Time Sent']);
// Build signers from the three legacy slots. Each slot has its own
// status field (sign-time present = signed). The signing/embedded URLs
// are preserved verbatim so the legacy resume-signing links still work
// for in-flight documents.
const signers: PlannedDocumentSigner[] = [];
const clientEmail = ((row['Email Address'] as string | undefined) ?? '').trim();
if (clientEmail) {
const clientSignedAt = parseFlexibleDate(
row['clientSignTime'] ?? row['client_signed_notified_at'],
);
signers.push({
signerName: clientFullName,
signerEmail: clientEmail,
signerRole: 'client',
signingOrder: 1,
status: clientSignedAt ? 'signed' : 'pending',
signedAt: clientSignedAt,
signingUrl: (row['Signature Link Client'] as string | undefined) ?? null,
embeddedUrl: (row['EmbeddedSignatureLinkClient'] as string | undefined) ?? null,
});
}
const ccLink = (row['Signature Link CC'] as string | undefined) ?? null;
const ccEmbedded = (row['EmbeddedSignatureLinkCC'] as string | undefined) ?? null;
const ccSignedAt = parseFlexibleDate(row['ccSignTime']);
if (ccLink || ccEmbedded || ccSignedAt) {
// Legacy didn't store the CC's email separately - leave a placeholder
// and let the operator update via the UI. Keeping the row preserves
// the link history.
signers.push({
signerName: 'CC (legacy migration)',
signerEmail: 'cc-unknown@migration.local',
signerRole: 'cc',
signingOrder: 2,
status: ccSignedAt ? 'signed' : 'pending',
signedAt: ccSignedAt,
signingUrl: ccLink,
embeddedUrl: ccEmbedded,
});
}
const devSignedAt = parseFlexibleDate(
row['developerSignTime'] ?? row['developer_signed_notified_at'],
);
const devLink = (row['Signature Link Developer'] as string | undefined) ?? null;
const devEmbedded = (row['EmbeddedSignatureLinkDeveloper'] as string | undefined) ?? null;
if (devLink || devEmbedded || devSignedAt) {
signers.push({
signerName: 'Developer (legacy migration)',
signerEmail: 'developer-unknown@migration.local',
signerRole: 'developer',
signingOrder: 3,
status: devSignedAt ? 'signed' : 'pending',
signedAt: devSignedAt,
signingUrl: devLink,
embeddedUrl: devEmbedded,
});
}
// Guard: an EOI document with zero signers leaves the document UI in an
// inconsistent state (status=completed but no rows in document_signers
// means the "who signed" view has nothing to show). Skip the document
// entirely rather than emit an orphaned record. This happens only when
// the legacy row carries a documensoID but lacks an Email Address AND
// has no CC/developer signature data at all - rare, but possible on
// very old rows. The flag is added so the migration report surfaces it
// for human review.
if (signers.length === 0) {
return null;
}
// Stash legacy S3 paths in notes so the reference isn't lost - copying
// attachments into the new files table is a separate workflow.
const notesParts: string[] = [];
const s3Path = (row['S3_Documenso_Path'] as string | undefined)?.trim();
const clientPath = (row['Client_EOI_Document_Path'] as string | undefined)?.trim();
if (s3Path) notesParts.push(`Legacy S3: ${s3Path}`);
if (clientPath) notesParts.push(`Legacy client copy: ${clientPath}`);
notesParts.push(`Migrated from legacy NocoDB Interests row #${row.Id}`);
return {
sourceId: row.Id,
clientTempId,
documentType: 'eoi',
title: `EOI - ${clientFullName}`,
status,
documensoId,
notes: notesParts.join('\n'),
signers,
dateSent,
};
}
// ─── Residential builder ────────────────────────────────────────────────────
function buildPlannedResidentialClient(
row: NocoDbRow,
opts: TransformOptions,
flags: MigrationFlag[],
): PlannedResidentialClient | null {
const rawName = (row['Full Name'] as string | undefined) ?? '';
const rawEmail = (row['Email Address'] as string | undefined) ?? '';
const rawPhone = (row['Phone Number'] as string | undefined) ?? '';
const rawCountry = (row['Place of Residence'] as string | undefined) ?? '';
const normName = normalizeName(rawName);
if (!normName.display) {
flags.push({
sourceTable: 'residential_interests',
sourceId: row.Id,
reason: 'residential row has no name - skipped',
});
return null;
}
const email = normalizeEmail(rawEmail);
const country = resolveCountry(rawCountry);
const phoneCountry = country.iso ?? opts.defaultPhoneCountry;
const phoneResult = normalizePhone(rawPhone, phoneCountry as CountryCode);
if (rawPhone && !phoneResult?.e164) {
flags.push({
sourceTable: 'residential_interests',
sourceId: row.Id,
reason: phoneResult?.flagged ? `phone ${phoneResult.flagged}` : 'phone unparseable',
details: { rawPhone },
});
}
if (rawEmail && !email) {
flags.push({
sourceTable: 'residential_interests',
sourceId: row.Id,
reason: 'email invalid',
details: { rawEmail },
});
}
const sourceFromRow = (row['Source'] as string | undefined) ?? null;
const mappedSource = sourceFromRow ? (SOURCE_MAP[sourceFromRow] ?? 'manual') : null;
const extraComments = (row['Extra Comments'] as string | undefined)?.trim() ?? null;
return {
sourceId: row.Id,
fullName: normName.display,
email,
phoneE164: phoneResult?.e164 ?? null,
phoneCountry: phoneResult?.country ?? null,
placeOfResidence: rawCountry.trim() || null,
placeOfResidenceCountryIso: country.iso ?? null,
source: mappedSource,
notes: extraComments,
dateFirstContact: parseFlexibleDate(
row['Time Created'] ?? row['CreatedAt'] ?? row['Created At'],
),
};
}

View File

@@ -7,7 +7,7 @@
* *
* Auth: `xc-token` header per NocoDB v2 API. * Auth: `xc-token` header per NocoDB v2 API.
* *
* The shape returned is a verbatim record of the row's fields caller * The shape returned is a verbatim record of the row's fields - caller
* is responsible for mapping to the new schema via `nocodb-transform.ts`. * is responsible for mapping to the new schema via `nocodb-transform.ts`.
*/ */
@@ -34,7 +34,7 @@ export function loadNocoDbConfig(env: NodeJS.ProcessEnv = process.env): NocoDbCo
// ─── Table identifiers ────────────────────────────────────────────────────── // ─── Table identifiers ──────────────────────────────────────────────────────
// //
// These IDs are stable per the NocoDB base they were captured during the // These IDs are stable per the NocoDB base - they were captured during the
// 2026-05-03 audit and won't change unless the base is rebuilt. If the // 2026-05-03 audit and won't change unless the base is rebuilt. If the
// base is reset, regenerate them from `getTablesList`. // base is reset, regenerate them from `getTablesList`.
export const NOCO_TABLES = { export const NOCO_TABLES = {
@@ -67,7 +67,7 @@ export type NocoDbRow = Record<string, unknown> & { Id: number };
/** /**
* Fetch all rows from a NocoDB table. Auto-paginates until the API * Fetch all rows from a NocoDB table. Auto-paginates until the API
* reports `isLastPage`. The legacy base is small (252 Interests rows * reports `isLastPage`. The legacy base is small (252 Interests rows
* being the largest table) so we keep this simple no streaming. * being the largest table) so we keep this simple - no streaming.
*/ */
export async function fetchAllRows( export async function fetchAllRows(
tableId: string, tableId: string,
@@ -95,7 +95,7 @@ export async function fetchAllRows(
if (!res.ok) { if (!res.ok) {
throw new Error( throw new Error(
`NocoDB fetch failed: ${res.status} ${res.statusText} table ${tableId} page ${page}`, `NocoDB fetch failed: ${res.status} ${res.statusText} - table ${tableId} page ${page}`,
); );
} }
@@ -110,7 +110,7 @@ export async function fetchAllRows(
} }
/** /**
* Convenience snapshot pulls every table the migration cares about * Convenience snapshot - pulls every table the migration cares about
* in parallel. Returned shape is the input the transform layer expects. * in parallel. Returned shape is the input the transform layer expects.
*/ */
export interface NocoDbSnapshot { export interface NocoDbSnapshot {

View File

@@ -48,7 +48,7 @@ const PARTICLES: ReadonlySet<string> = new Set([
export interface NormalizedName { export interface NormalizedName {
/** Human-readable form preserved for UI display. Trims, collapses /** Human-readable form preserved for UI display. Trims, collapses
* whitespace, fixes case, but never destroys the user's intent * whitespace, fixes case, but never destroys the user's intent -
* slash-with-company structure ("Daniel Wainstein / 7 Knots, LLC") * slash-with-company structure ("Daniel Wainstein / 7 Knots, LLC")
* is left intact. */ * is left intact. */
display: string; display: string;
@@ -67,7 +67,7 @@ export interface NormalizedName {
* *
* If the input contains a `/` (slash-with-company structure like * If the input contains a `/` (slash-with-company structure like
* "Daniel Wainstein / 7 Knots, LLC"), the trailing company text is * "Daniel Wainstein / 7 Knots, LLC"), the trailing company text is
* preserved verbatim it's signal, not noise. * preserved verbatim - it's signal, not noise.
*/ */
export function normalizeName(raw: string | null | undefined): NormalizedName { export function normalizeName(raw: string | null | undefined): NormalizedName {
const safe = (raw ?? '').toString(); const safe = (raw ?? '').toString();
@@ -111,7 +111,7 @@ function titleCaseOneToken(token: string, isFirst: boolean): string {
if (!token) return ''; if (!token) return '';
const lower = token.toLowerCase(); const lower = token.toLowerCase();
if (!isFirst && PARTICLES.has(lower)) return lower; if (!isFirst && PARTICLES.has(lower)) return lower;
// O'Brien / D'Angelo / l'Estrange capitalize the segment after each // O'Brien / D'Angelo / l'Estrange - capitalize the segment after each
// apostrophe so a lowercased input round-trips to readable Irish caps. // apostrophe so a lowercased input round-trips to readable Irish caps.
if (lower.includes("'")) { if (lower.includes("'")) {
return lower return lower
@@ -144,7 +144,7 @@ const emailSchema = z.string().email();
/** /**
* Normalize a free-text email. Trims + lowercases. Returns null for empty * Normalize a free-text email. Trims + lowercases. Returns null for empty
* or malformed input caller decides whether to flag, store, or drop. * or malformed input - caller decides whether to flag, store, or drop.
* *
* Plus-aliases (`user+tag@domain.com`) are NOT stripped: they're real * Plus-aliases (`user+tag@domain.com`) are NOT stripped: they're real
* distinct addresses, and stripping them would auto-merge legitimately * distinct addresses, and stripping them would auto-merge legitimately
@@ -182,10 +182,10 @@ export interface NormalizedPhone {
* 1. strip leading apostrophe (spreadsheet copy-paste artifact) * 1. strip leading apostrophe (spreadsheet copy-paste artifact)
* 2. strip \r / \n / \t (real values seen in NocoDB had carriage returns) * 2. strip \r / \n / \t (real values seen in NocoDB had carriage returns)
* 3. detect multi-number fields ("+33611111111;+33622222222", * 3. detect multi-number fields ("+33611111111;+33622222222",
* "0677580750/0690511494") flag and take first segment * "0677580750/0690511494") - flag and take first segment
* 4. strip whitespace, dots, dashes, parens, single quotes * 4. strip whitespace, dots, dashes, parens, single quotes
* 5. convert leading "00" → "+" (international dialling code) * 5. convert leading "00" → "+" (international dialling code)
* 6. detect placeholder fakes (8+ consecutive zeros) flag, return null e164 * 6. detect placeholder fakes (8+ consecutive zeros) - flag, return null e164
* 7. parse via libphonenumber-js * 7. parse via libphonenumber-js
* 8. on parse failure or invalid number → flag 'unparseable' * 8. on parse failure or invalid number → flag 'unparseable'
* *
@@ -205,7 +205,7 @@ export function normalizePhone(
// 2. Strip carriage returns / newlines / tabs. // 2. Strip carriage returns / newlines / tabs.
cleaned = cleaned.replace(/[\r\n\t]/g, ''); cleaned = cleaned.replace(/[\r\n\t]/g, '');
// 3. Multi-number detection split on /, ;, , (in that order of priority). // 3. Multi-number detection - split on /, ;, , (in that order of priority).
let flagged: PhoneFlag | undefined; let flagged: PhoneFlag | undefined;
if (/[/;,]/.test(cleaned)) { if (/[/;,]/.test(cleaned)) {
flagged = 'multi_number'; flagged = 'multi_number';
@@ -221,7 +221,7 @@ export function normalizePhone(
cleaned = '+' + cleaned.slice(2); cleaned = '+' + cleaned.slice(2);
} }
// 6. Placeholder fakes runs of 8+ consecutive zeros, e.g. +447000000000. // 6. Placeholder fakes - runs of 8+ consecutive zeros, e.g. +447000000000.
if (/0{8,}/.test(cleaned)) { if (/0{8,}/.test(cleaned)) {
return { e164: null, country: null, display: null, flagged: 'placeholder' }; return { e164: null, country: null, display: null, flagged: 'placeholder' };
} }
@@ -229,7 +229,7 @@ export function normalizePhone(
// 7. Parse via the existing i18n helper (libphonenumber-js under the hood). // 7. Parse via the existing i18n helper (libphonenumber-js under the hood).
const parsed = parsePhone(cleaned, defaultCountry); const parsed = parsePhone(cleaned, defaultCountry);
if (!parsed.e164) { if (!parsed.e164) {
// Couldn't even produce a canonical form genuinely garbage. // Couldn't even produce a canonical form - genuinely garbage.
return { e164: null, country: null, display: null, flagged: 'unparseable' }; return { e164: null, country: null, display: null, flagged: 'unparseable' };
} }
@@ -255,7 +255,7 @@ export function normalizePhone(
* `Intl.DisplayNames(en)` output verbatim. Keys are pre-normalized * `Intl.DisplayNames(en)` output verbatim. Keys are pre-normalized
* (lowercase, diacritic-free, hyphens/dots → spaces, collapsed whitespace). * (lowercase, diacritic-free, hyphens/dots → spaces, collapsed whitespace).
* *
* Kept opinionated and small only entries we've actually seen in legacy * Kept opinionated and small - only entries we've actually seen in legacy
* data. Adding a new alias is cheap; trying to be exhaustive isn't. * data. Adding a new alias is cheap; trying to be exhaustive isn't.
*/ */
const COUNTRY_ALIASES: Record<string, CountryCode> = { const COUNTRY_ALIASES: Record<string, CountryCode> = {
@@ -327,7 +327,7 @@ export function resolveCountry(text: string | null | undefined): ResolvedCountry
const normalized = normalizeForLookup(text.toString()); const normalized = normalizeForLookup(text.toString());
if (!normalized) return { iso: null, confidence: null }; if (!normalized) return { iso: null, confidence: null };
// 1. Aliases covers USA / UK / St Barth and friends. // 1. Aliases - covers USA / UK / St Barth and friends.
const alias = COUNTRY_ALIASES[normalized]; const alias = COUNTRY_ALIASES[normalized];
if (alias) return { iso: alias, confidence: 'exact' }; if (alias) return { iso: alias, confidence: 'exact' };
@@ -348,7 +348,7 @@ export function resolveCountry(text: string | null | undefined): ResolvedCountry
// 4. Fuzzy fallback (Levenshtein ≤ 2). Skipped for short inputs because // 4. Fuzzy fallback (Levenshtein ≤ 2). Skipped for short inputs because
// a 4-char string like "Mars" sits within distance 2 of multiple // a 4-char string like "Mars" sits within distance 2 of multiple
// short country names (Mali, Laos, Iran, …) false-positive city. // short country names (Mali, Laos, Iran, …) - false-positive city.
if (normalized.length >= 6) { if (normalized.length >= 6) {
let bestCode: CountryCode | null = null; let bestCode: CountryCode | null = null;
let bestDistance = Number.POSITIVE_INFINITY; let bestDistance = Number.POSITIVE_INFINITY;
@@ -387,7 +387,7 @@ function normalizeForLookup(s: string): string {
/** /**
* Standard iterative Levenshtein. Used by the country fuzzy match and by * Standard iterative Levenshtein. Used by the country fuzzy match and by
* the dedup algorithm's name-similarity rule. Allocates O(n*m) so callers * the dedup algorithm's name-similarity rule. Allocates O(n*m) so callers
* shouldn't run it against pathological inputs the dedup blocking * shouldn't run it against pathological inputs - the dedup blocking
* strategy keeps comparison sets small. * strategy keeps comparison sets small.
* *
* Exported so the find-matches module can reuse the same implementation * Exported so the find-matches module can reuse the same implementation
@@ -400,7 +400,7 @@ export function levenshtein(a: string, b: string): number {
const m = a.length; const m = a.length;
const n = b.length; const n = b.length;
// Two rolling rows is enough keeps memory at O(n) instead of O(n*m). // Two rolling rows is enough - keeps memory at O(n) instead of O(n*m).
let prev = new Array<number>(n + 1); let prev = new Array<number>(n + 1);
let curr = new Array<number>(n + 1); let curr = new Array<number>(n + 1);
for (let j = 0; j <= n; j += 1) prev[j] = j; for (let j = 0; j <= n; j += 1) prev[j] = j;

View File

@@ -211,3 +211,238 @@ describe('transformSnapshot — fixture regression', () => {
expect(a.autoLinks.length).toBe(b.autoLinks.length); expect(a.autoLinks.length).toBe(b.autoLinks.length);
}); });
}); });
// ─── EOI document derivation ───────────────────────────────────────────────
describe('transformSnapshot — EOI document derivation', () => {
/**
* A fixture row that mimics a fully-signed legacy interest with a
* Documenso ID, all three signing slots populated, and an S3 path.
* The transform should emit one PlannedDocument with three signers.
*/
function eoiFixture(
overrides: Partial<NocoDbRow> & { Id: number; documensoID: string },
): NocoDbSnapshot {
return {
fetchedAt: '2026-05-04T00:00:00.000Z',
berths: [],
residentialInterests: [],
websiteInterestSubmissions: [],
websiteContactFormSubmissions: [],
websiteBerthEoiSupplements: [],
interests: [
row({
'Full Name': 'Reza Amjad',
'Email Address': 'reza@example.com',
'Phone Number': '+15551112222',
'Sales Process Level': 'Signed EOI and NDA',
...overrides,
}),
],
};
}
it('emits no PlannedDocument when documensoID is absent', () => {
const plan = transformSnapshot({
fetchedAt: '2026-05-04T00:00:00.000Z',
berths: [],
residentialInterests: [],
websiteInterestSubmissions: [],
websiteContactFormSubmissions: [],
websiteBerthEoiSupplements: [],
interests: [
row({
Id: 100,
'Full Name': 'No EOI',
'Email Address': 'no-eoi@example.com',
'Sales Process Level': 'General Qualified Interest',
}),
],
});
expect(plan.documents).toHaveLength(0);
expect(plan.stats.outputDocuments).toBe(0);
});
it('emits one PlannedDocument per interest row with documensoID', () => {
const plan = transformSnapshot(
eoiFixture({
Id: 720,
documensoID: '107',
'EOI Status': 'Signed',
'EOI Time Sent': '2026-04-08T18:07:39.582Z',
clientSignTime: '2026-04-08T19:00:00.000Z',
developerSignTime: '2026-04-08T19:23:49.227Z',
'Signature Link Client': 'https://documenso.example/sign/abc',
S3_Documenso_Path: 'EOIs/Reza_Amjad_EOI_NDA_signed.pdf',
}),
);
expect(plan.documents).toHaveLength(1);
expect(plan.stats.outputDocuments).toBe(1);
const doc = plan.documents[0]!;
expect(doc.documensoId).toBe('107');
expect(doc.status).toBe('completed'); // EOI Status = "Signed"
expect(doc.documentType).toBe('eoi');
expect(doc.title).toBe('EOI - Reza Amjad');
expect(doc.notes).toContain('Legacy S3: EOIs/Reza_Amjad_EOI_NDA_signed.pdf');
expect(doc.notes).toContain('Migrated from legacy NocoDB Interests row #720');
const clientSigner = doc.signers.find((s) => s.signerRole === 'client');
const devSigner = doc.signers.find((s) => s.signerRole === 'developer');
expect(clientSigner?.signerEmail).toBe('reza@example.com');
expect(clientSigner?.status).toBe('signed');
expect(clientSigner?.signingUrl).toBe('https://documenso.example/sign/abc');
expect(devSigner?.status).toBe('signed');
});
it('infers status=partially_signed when EOI Status missing but client has signed', () => {
const plan = transformSnapshot(
eoiFixture({
Id: 800,
documensoID: '200',
// No EOI Status, no developer sign — only client has signed.
clientSignTime: '2026-04-01T12:00:00.000Z',
}),
);
expect(plan.documents[0]!.status).toBe('partially_signed');
});
it('infers status=sent when EOI Time Sent present but no signatures yet', () => {
const plan = transformSnapshot(
eoiFixture({
Id: 801,
documensoID: '201',
'EOI Time Sent': '2026-04-23T03:43:14.593Z',
}),
);
expect(plan.documents[0]!.status).toBe('sent');
});
it('preserves the parent interest sourceId on the document so apply can stitch', () => {
const plan = transformSnapshot(
eoiFixture({ Id: 720, documensoID: '107', 'EOI Status': 'Signed' }),
);
const doc = plan.documents[0]!;
expect(doc.sourceId).toBe(720);
expect(doc.clientTempId).toBe('client-720');
});
it('skips the CC slot when the legacy row has no CC signature data', () => {
const plan = transformSnapshot(
eoiFixture({
Id: 720,
documensoID: '107',
'EOI Status': 'Signed',
// No Signature Link CC, no ccSignTime, no Embedded CC
}),
);
const ccSigner = plan.documents[0]!.signers.find((s) => s.signerRole === 'cc');
expect(ccSigner).toBeUndefined();
});
});
// ─── Residential transform ─────────────────────────────────────────────────
describe('parseFlexibleDate format handling', () => {
// The legacy NocoDB base mixes ISO datetimes with manual DD-MM-YYYY
// entries from the Caribbean marina office. parseFlexibleDate handles
// both. Locking the disambiguation rule with explicit assertions
// because a regression here would silently shift dates by months,
// which is exactly the kind of bug nobody notices until much later.
it('parses unambiguous ISO datetimes verbatim', () => {
const plan = transformSnapshot({
fetchedAt: '2026-05-04T00:00:00.000Z',
berths: [],
residentialInterests: [],
websiteInterestSubmissions: [],
websiteContactFormSubmissions: [],
websiteBerthEoiSupplements: [],
interests: [
row({
Id: 1,
'Full Name': 'ISO Test',
'Email Address': 'iso@example.com',
'EOI Time Sent': '2026-04-08T18:07:39.582Z',
}),
],
});
expect(plan.interests[0]!.dateEoiSent).toBe('2026-04-08T18:07:39.582Z');
});
it('parses 01-02-2025 as Feb 1 (DD-MM-YYYY), not Jan 2 (MM-DD-YYYY)', () => {
// The Caribbean office uses day-first dates. Feb 1 = 01-02-2025.
// If a regression flips this to MM-DD parsing, the migration would
// mis-stamp every manually-entered date by ~30 days.
const plan = transformSnapshot({
fetchedAt: '2026-05-04T00:00:00.000Z',
berths: [],
residentialInterests: [],
websiteInterestSubmissions: [],
websiteContactFormSubmissions: [],
websiteBerthEoiSupplements: [],
interests: [
row({
Id: 2,
'Full Name': 'DDMM Test',
'Email Address': 'ddmm@example.com',
'Time LOI Sent': '01-02-2025',
}),
],
});
const iso = plan.interests[0]!.dateContractSent;
expect(iso?.startsWith('2025-02-01')).toBe(true);
});
});
describe('transformSnapshot — residential leads', () => {
it('produces one PlannedResidentialClient per source row', () => {
const plan = transformSnapshot({
fetchedAt: '2026-05-04T00:00:00.000Z',
berths: [],
websiteInterestSubmissions: [],
websiteContactFormSubmissions: [],
websiteBerthEoiSupplements: [],
interests: [],
residentialInterests: [
row({
Id: 6,
'Full Name': 'FABIO GOMEZ',
'Email Address': 'fabio@example.com',
'Phone Number': '+19143371482',
'Place of Residence': 'USA',
}),
row({
Id: 7,
'Full Name': 'James Wilkinson',
'Email Address': 'jcw@example.com',
'Phone Number': '+12485684256',
}),
],
});
expect(plan.residentialClients).toHaveLength(2);
expect(plan.stats.outputResidentialClients).toBe(2);
const fabio = plan.residentialClients.find((c) => c.sourceId === 6);
expect(fabio?.fullName).toBe('Fabio Gomez');
expect(fabio?.email).toBe('fabio@example.com');
expect(fabio?.phoneE164).toBe('+19143371482');
expect(fabio?.placeOfResidenceCountryIso).toBe('US');
});
it('skips residential rows with no name', () => {
const plan = transformSnapshot({
fetchedAt: '2026-05-04T00:00:00.000Z',
berths: [],
websiteInterestSubmissions: [],
websiteContactFormSubmissions: [],
websiteBerthEoiSupplements: [],
interests: [],
residentialInterests: [row({ Id: 100, 'Full Name': '', 'Email Address': 'x@y.com' })],
});
expect(plan.residentialClients).toHaveLength(0);
expect(plan.flags.some((f) => f.sourceId === 100)).toBe(true);
});
});