Files
pn-new-crm/scripts/backfill-legacy-lead-source.ts

136 lines
4.6 KiB
TypeScript
Raw Permalink Normal View History

/**
* One-shot: backfill `interests.source` for legacy NocoDB-imported rows.
*
* Why this exists: the legacy NocoDB Interests table left the `Source`
* column null for ~95 % of rows. The migration mapped null null, so the
* Lead Source Attribution chart shows them as "Unspecified". Per the
* operator's best knowledge, almost all of those legacy rows came in
* through the website (web form / portal) the few that didn't are the
* ones that already carry an explicit `Source` value (Form / portal /
* External). Defaulting null 'website' is therefore the closest
* truth we can reconstruct without per-row sales notes review.
*
* Idempotent: only updates rows where `source IS NULL` AND the row has a
* `migration_source_links` entry tying it back to the legacy NocoDB import,
* so net-new manually-created interests with null source aren't touched.
*
* Usage:
* pnpm tsx scripts/backfill-legacy-lead-source.ts --port-slug port-nimara [--dry-run]
*/
import 'dotenv/config';
import { eq, and, isNull, inArray } from 'drizzle-orm';
import { db } from '@/lib/db';
import { ports } from '@/lib/db/schema/ports';
import { interests } from '@/lib/db/schema/interests';
import { migrationSourceLinks } from '@/lib/db/schema/migration';
interface CliArgs {
portSlug: string | null;
dryRun: boolean;
}
function parseArgs(argv: string[]): CliArgs {
const args: CliArgs = { portSlug: null, dryRun: false };
for (let i = 0; i < argv.length; i += 1) {
const a = argv[i]!;
if (a === '--port-slug') args.portSlug = argv[++i] ?? null;
else if (a === '--dry-run') args.dryRun = true;
else if (a === '-h' || a === '--help') {
console.log(
'Usage: pnpm tsx scripts/backfill-legacy-lead-source.ts --port-slug <slug> [--dry-run]',
);
process.exit(0);
}
}
if (!args.portSlug) {
console.error('Missing required --port-slug');
process.exit(1);
}
return args;
}
async function main() {
const args = parseArgs(process.argv.slice(2));
const [port] = await db
.select({ id: ports.id, name: ports.name })
.from(ports)
.where(eq(ports.slug, args.portSlug!))
.limit(1);
if (!port) {
console.error(`No port found with slug "${args.portSlug}"`);
process.exit(1);
}
console.log(`[backfill] target: ${port.name} (${port.id})`);
// Pull every interest id this port owns that has a NULL source.
const candidateInterests = await db
.select({ id: interests.id })
.from(interests)
.where(and(eq(interests.portId, port.id), isNull(interests.source)));
console.log(`[backfill] interests with NULL source in this port: ${candidateInterests.length}`);
if (candidateInterests.length === 0) {
console.log('Nothing to backfill.');
return;
}
// Filter to ONLY those that came in via the legacy migration — preserves
// null on net-new rows where the operator hasn't picked a source yet.
const candidateIds = candidateInterests.map((r) => r.id);
const legacyLinks = await db
.select({ targetEntityId: migrationSourceLinks.targetEntityId })
.from(migrationSourceLinks)
.where(
and(
eq(migrationSourceLinks.sourceSystem, 'nocodb_interests'),
eq(migrationSourceLinks.targetEntityType, 'interest'),
inArray(migrationSourceLinks.targetEntityId, candidateIds),
),
);
const legacyIds = new Set(legacyLinks.map((l) => l.targetEntityId));
const toUpdate = candidateIds.filter((id) => legacyIds.has(id));
console.log(
`[backfill] of those, ${toUpdate.length} are legacy migration rows (will set source='website')`,
);
console.log(
`[backfill] ${candidateInterests.length - toUpdate.length} are net-new rows (left untouched)`,
);
if (args.dryRun) {
console.log('[backfill] --dry-run set; no writes.');
return;
}
if (toUpdate.length === 0) {
console.log('Nothing to write.');
return;
}
// Update in chunks of 500 to keep query size sane.
const CHUNK = 500;
let updated = 0;
for (let i = 0; i < toUpdate.length; i += CHUNK) {
const chunk = toUpdate.slice(i, i + CHUNK);
// Belt-and-suspenders: re-assert `source IS NULL` in the WHERE so
// a concurrent process that set source on one of these rows
// between SELECT and UPDATE doesn't get its value clobbered.
const result = await db
.update(interests)
.set({ source: 'website' })
.where(and(inArray(interests.id, chunk), isNull(interests.source)))
.returning({ id: interests.id });
updated += result.length;
}
console.log(`[backfill] updated ${updated} rows.`);
}
main().catch((err) => {
console.error('FATAL', err);
process.exit(1);
});