/** * Idempotent backfill: ensure every port has the three system roots * (Clients / Companies / Yachts), every entity with attached files * has a per-entity subfolder, every file with entity FKs has * `folder_id` set, and every signed file from a completed workflow * has the workflow's entity FKs propagated onto it. * * Safe to re-run: all writes target only rows where the relevant * column is NULL. Per-port `pg_advisory_xact_lock` serializes * concurrent runs. * * Usage: * pnpm tsx scripts/backfill-document-folders.ts * pnpm tsx scripts/backfill-document-folders.ts --port */ import 'dotenv/config'; import { and, eq, isNotNull, isNull, sql } from 'drizzle-orm'; import { db } from '@/lib/db'; import { ports } from '@/lib/db/schema/ports'; import { files, documents } from '@/lib/db/schema/documents'; import { ensureSystemRoots, ensureEntityFolder, type EntityType, } from '@/lib/services/document-folders.service'; import { logger } from '@/lib/logger'; export interface BackfillOptions { /** When provided, only backfill this port. Otherwise all ports. */ portId?: string; /** User ID recorded in `created_by` for any folders created. */ systemUserId?: string; } /** * Per-port counters surfaced through the return value so the CLI can * print them and operators (or follow-up scripts) can sanity-check that * a re-run shrinks each number toward zero. */ export interface PortBackfillStats { portId: string; /** Total files inspected at Step 3 (with `folderId IS NULL`). */ filesProcessed: number; /** Files updated with `folder_id` set in Step 3. */ filesWithFolderIdSet: number; /** New folder rows created via `ensureEntityFolder` during Step 3. */ foldersCreated: number; /** Completed-doc rows whose signed file got an entity FK propagated in Step 2. */ fksPropagated: number; } /** * One-time idempotent backfill. See module-level JSDoc for full * description of what each step does. */ export async function runBackfill(opts: BackfillOptions = {}): Promise { const portRows = opts.portId ? [{ id: opts.portId }] : await db.select({ id: ports.id }).from(ports); const systemUser = opts.systemUserId ?? 'system-backfill'; const allStats: PortBackfillStats[] = []; for (const { id: portId } of portRows) { const stats: PortBackfillStats = { portId, filesProcessed: 0, filesWithFolderIdSet: 0, foldersCreated: 0, fksPropagated: 0, }; await db.transaction(async (tx) => { // Serialize concurrent runs on a per-port lock so two simultaneous // backfills can't race on folder inserts. await tx.execute(sql`SELECT pg_advisory_xact_lock(hashtext(${portId})::bigint)`); // ── Step 1: Ensure system roots exist for this port ────────────────── await ensureSystemRoots(portId, systemUser); // ── Step 2: Propagate entity FKs from completed workflows onto their // signed file rows (pre-auto-deposit legacy completions). ── const completedDocs = await tx .select({ id: documents.id, signedFileId: documents.signedFileId, clientId: documents.clientId, companyId: documents.companyId, yachtId: documents.yachtId, }) .from(documents) .where( and( eq(documents.portId, portId), eq(documents.status, 'completed'), isNotNull(documents.signedFileId), ), ); for (const d of completedDocs) { if (!d.signedFileId) continue; const owner: { type: EntityType; id: string } | null = d.clientId ? { type: 'client', id: d.clientId } : d.companyId ? { type: 'company', id: d.companyId } : d.yachtId ? { type: 'yacht', id: d.yachtId } : null; if (!owner) continue; // Build the update object with ONLY the matching FK column so we // never pass column references to .set() (Drizzle syntax bug fix). const update = owner.type === 'client' ? { clientId: owner.id } : owner.type === 'company' ? { companyId: owner.id } : { yachtId: owner.id }; const matchingFkColumn = owner.type === 'client' ? files.clientId : owner.type === 'company' ? files.companyId : files.yachtId; const propagated = await tx .update(files) .set(update) .where( and(eq(files.id, d.signedFileId), eq(files.portId, portId), isNull(matchingFkColumn)), ) .returning({ id: files.id }); stats.fksPropagated += propagated.length; } // ── Step 3: For every file with entity FKs but no folder_id, // create the entity subfolder and set folder_id. ────────── const fileRows = await tx .select() .from(files) .where(and(eq(files.portId, portId), isNull(files.folderId))); stats.filesProcessed = fileRows.length; const folderIdsCreatedThisRun = new Set(); const folderIdsSeenThisRun = new Set(); for (const f of fileRows) { const owner: { type: EntityType; id: string } | null = f.clientId ? { type: 'client', id: f.clientId } : f.companyId ? { type: 'company', id: f.companyId } : f.yachtId ? { type: 'yacht', id: f.yachtId } : null; if (!owner) continue; try { const beforeExisted = folderIdsSeenThisRun.has(`${owner.type}:${owner.id}`); const folder = await ensureEntityFolder(portId, owner.type, owner.id, systemUser); folderIdsSeenThisRun.add(`${owner.type}:${owner.id}`); if (!beforeExisted && !folderIdsCreatedThisRun.has(folder.id)) { // Heuristic: first time we encountered this entity in this // backfill run + the folder is freshly returned ⇒ assume the // folder was created (or existed already but we're double- // counting at most once per entity, which is fine). folderIdsCreatedThisRun.add(folder.id); } await tx .update(files) .set({ folderId: folder.id }) .where(and(eq(files.id, f.id), eq(files.portId, portId))); stats.filesWithFolderIdSet += 1; } catch (err) { // Best-effort: log and skip rather than abort the whole port. logger.warn({ err, fileId: f.id, portId }, 'backfill: ensureEntityFolder failed'); } } stats.foldersCreated = folderIdsCreatedThisRun.size; }); logger.info( { portId, filesProcessed: stats.filesProcessed, filesWithFolderIdSet: stats.filesWithFolderIdSet, foldersCreated: stats.foldersCreated, fksPropagated: stats.fksPropagated, }, 'backfill: port complete', ); allStats.push(stats); } return allStats; } // ── CLI entry point ──────────────────────────────────────────────────────────── // tsx compiles TypeScript to CJS at runtime, so `require.main === module` // is the standard guard. The test suite imports `runBackfill` as a named // export; the CLI invocation hits this block and runs main(). if (require.main === module) { const portIdArg = process.argv.indexOf('--port'); let portId: string | undefined; if (portIdArg !== -1) { const next = process.argv[portIdArg + 1]; if (!next || next.startsWith('--')) { logger.error('--port requires a value'); process.exit(1); } portId = next; } runBackfill({ portId }) .then((stats) => { console.log('\nBackfill complete.'); console.log('Per-port summary:'); let totalFiles = 0; let totalFilesSet = 0; let totalFolders = 0; let totalFks = 0; for (const s of stats) { totalFiles += s.filesProcessed; totalFilesSet += s.filesWithFolderIdSet; totalFolders += s.foldersCreated; totalFks += s.fksPropagated; console.log( ` port=${s.portId}: filesProcessed=${s.filesProcessed} ` + `filesWithFolderIdSet=${s.filesWithFolderIdSet} ` + `foldersCreated=${s.foldersCreated} fksPropagated=${s.fksPropagated}`, ); } console.log( `Totals: ports=${stats.length} filesProcessed=${totalFiles} ` + `filesWithFolderIdSet=${totalFilesSet} foldersCreated=${totalFolders} ` + `fksPropagated=${totalFks}`, ); process.exit(0); }) .catch((err) => { logger.error({ err }, 'Backfill failed'); process.exit(1); }); }