Files
pn-new-crm/scripts/backfill-document-folders.ts

247 lines
8.8 KiB
TypeScript
Raw Permalink Normal View History

/**
* Idempotent backfill: ensure every port has the three system roots
* (Clients / Companies / Yachts), every entity with attached files
* has a per-entity subfolder, every file with entity FKs has
* `folder_id` set, and every signed file from a completed workflow
* has the workflow's entity FKs propagated onto it.
*
* Safe to re-run: all writes target only rows where the relevant
* column is NULL. Per-port `pg_advisory_xact_lock` serializes
* concurrent runs.
*
* Usage:
* pnpm tsx scripts/backfill-document-folders.ts
* pnpm tsx scripts/backfill-document-folders.ts --port <portId>
*/
import 'dotenv/config';
import { and, eq, isNotNull, isNull, sql } from 'drizzle-orm';
import { db } from '@/lib/db';
import { ports } from '@/lib/db/schema/ports';
import { files, documents } from '@/lib/db/schema/documents';
import {
ensureSystemRoots,
ensureEntityFolder,
type EntityType,
} from '@/lib/services/document-folders.service';
import { logger } from '@/lib/logger';
export interface BackfillOptions {
/** When provided, only backfill this port. Otherwise all ports. */
portId?: string;
/** User ID recorded in `created_by` for any folders created. */
systemUserId?: string;
}
/**
* Per-port counters surfaced through the return value so the CLI can
* print them and operators (or follow-up scripts) can sanity-check that
* a re-run shrinks each number toward zero.
*/
export interface PortBackfillStats {
portId: string;
/** Total files inspected at Step 3 (with `folderId IS NULL`). */
filesProcessed: number;
/** Files updated with `folder_id` set in Step 3. */
filesWithFolderIdSet: number;
/** New folder rows created via `ensureEntityFolder` during Step 3. */
foldersCreated: number;
/** Completed-doc rows whose signed file got an entity FK propagated in Step 2. */
fksPropagated: number;
}
/**
* One-time idempotent backfill. See module-level JSDoc for full
* description of what each step does.
*/
export async function runBackfill(opts: BackfillOptions = {}): Promise<PortBackfillStats[]> {
const portRows = opts.portId
? [{ id: opts.portId }]
: await db.select({ id: ports.id }).from(ports);
const systemUser = opts.systemUserId ?? 'system-backfill';
const allStats: PortBackfillStats[] = [];
for (const { id: portId } of portRows) {
const stats: PortBackfillStats = {
portId,
filesProcessed: 0,
filesWithFolderIdSet: 0,
foldersCreated: 0,
fksPropagated: 0,
};
await db.transaction(async (tx) => {
// Serialize concurrent runs on a per-port lock so two simultaneous
// backfills can't race on folder inserts.
await tx.execute(sql`SELECT pg_advisory_xact_lock(hashtext(${portId})::bigint)`);
// ── Step 1: Ensure system roots exist for this port ──────────────────
await ensureSystemRoots(portId, systemUser);
// ── Step 2: Propagate entity FKs from completed workflows onto their
// signed file rows (pre-auto-deposit legacy completions). ──
const completedDocs = await tx
.select({
id: documents.id,
signedFileId: documents.signedFileId,
clientId: documents.clientId,
companyId: documents.companyId,
yachtId: documents.yachtId,
})
.from(documents)
.where(
and(
eq(documents.portId, portId),
eq(documents.status, 'completed'),
isNotNull(documents.signedFileId),
),
);
for (const d of completedDocs) {
if (!d.signedFileId) continue;
const owner: { type: EntityType; id: string } | null = d.clientId
? { type: 'client', id: d.clientId }
: d.companyId
? { type: 'company', id: d.companyId }
: d.yachtId
? { type: 'yacht', id: d.yachtId }
: null;
if (!owner) continue;
// Build the update object with ONLY the matching FK column so we
// never pass column references to .set() (Drizzle syntax bug fix).
const update =
owner.type === 'client'
? { clientId: owner.id }
: owner.type === 'company'
? { companyId: owner.id }
: { yachtId: owner.id };
const matchingFkColumn =
owner.type === 'client'
? files.clientId
: owner.type === 'company'
? files.companyId
: files.yachtId;
const propagated = await tx
.update(files)
.set(update)
.where(
and(eq(files.id, d.signedFileId), eq(files.portId, portId), isNull(matchingFkColumn)),
)
.returning({ id: files.id });
stats.fksPropagated += propagated.length;
}
// ── Step 3: For every file with entity FKs but no folder_id,
// create the entity subfolder and set folder_id. ──────────
const fileRows = await tx
.select()
.from(files)
.where(and(eq(files.portId, portId), isNull(files.folderId)));
stats.filesProcessed = fileRows.length;
const folderIdsCreatedThisRun = new Set<string>();
const folderIdsSeenThisRun = new Set<string>();
for (const f of fileRows) {
const owner: { type: EntityType; id: string } | null = f.clientId
? { type: 'client', id: f.clientId }
: f.companyId
? { type: 'company', id: f.companyId }
: f.yachtId
? { type: 'yacht', id: f.yachtId }
: null;
if (!owner) continue;
try {
const beforeExisted = folderIdsSeenThisRun.has(`${owner.type}:${owner.id}`);
const folder = await ensureEntityFolder(portId, owner.type, owner.id, systemUser);
folderIdsSeenThisRun.add(`${owner.type}:${owner.id}`);
if (!beforeExisted && !folderIdsCreatedThisRun.has(folder.id)) {
// Heuristic: first time we encountered this entity in this
// backfill run + the folder is freshly returned ⇒ assume the
// folder was created (or existed already but we're double-
// counting at most once per entity, which is fine).
folderIdsCreatedThisRun.add(folder.id);
}
await tx
.update(files)
.set({ folderId: folder.id })
.where(and(eq(files.id, f.id), eq(files.portId, portId)));
stats.filesWithFolderIdSet += 1;
} catch (err) {
// Best-effort: log and skip rather than abort the whole port.
logger.warn({ err, fileId: f.id, portId }, 'backfill: ensureEntityFolder failed');
}
}
stats.foldersCreated = folderIdsCreatedThisRun.size;
});
logger.info(
{
portId,
filesProcessed: stats.filesProcessed,
filesWithFolderIdSet: stats.filesWithFolderIdSet,
foldersCreated: stats.foldersCreated,
fksPropagated: stats.fksPropagated,
},
'backfill: port complete',
);
allStats.push(stats);
}
return allStats;
}
// ── CLI entry point ────────────────────────────────────────────────────────────
// tsx compiles TypeScript to CJS at runtime, so `require.main === module`
// is the standard guard. The test suite imports `runBackfill` as a named
// export; the CLI invocation hits this block and runs main().
if (require.main === module) {
const portIdArg = process.argv.indexOf('--port');
let portId: string | undefined;
if (portIdArg !== -1) {
const next = process.argv[portIdArg + 1];
if (!next || next.startsWith('--')) {
logger.error('--port requires a value');
process.exit(1);
}
portId = next;
}
runBackfill({ portId })
.then((stats) => {
console.log('\nBackfill complete.');
console.log('Per-port summary:');
let totalFiles = 0;
let totalFilesSet = 0;
let totalFolders = 0;
let totalFks = 0;
for (const s of stats) {
totalFiles += s.filesProcessed;
totalFilesSet += s.filesWithFolderIdSet;
totalFolders += s.foldersCreated;
totalFks += s.fksPropagated;
console.log(
` port=${s.portId}: filesProcessed=${s.filesProcessed} ` +
`filesWithFolderIdSet=${s.filesWithFolderIdSet} ` +
`foldersCreated=${s.foldersCreated} fksPropagated=${s.fksPropagated}`,
);
}
console.log(
`Totals: ports=${stats.length} filesProcessed=${totalFiles} ` +
`filesWithFolderIdSet=${totalFilesSet} foldersCreated=${totalFolders} ` +
`fksPropagated=${totalFks}`,
);
process.exit(0);
})
.catch((err) => {
logger.error({ err }, 'Backfill failed');
process.exit(1);
});
}