Files
pn-new-crm/scripts/import-organized-documents.ts
Matt 955911302b fix(folders): logging, files-rescue, hard-delete wiring, audit logs
- A6: logger import + warn calls in document-folders.service.ts
- G-C1: re-parent files (not just documents) in deleteFolderSoftRescue
- A4: importer sets files.folder_id (was only setting documents.folder_id)
- A7 + G-C3: demote system folder + nullify scratchpadNotes in client-hard-delete
- Defense-in-depth portId on folder-move UPDATE
- Audit logs for createFolder, syncEntityFolderName, archive/restore suffix
- portId in companies/yachts archive log context
- Row-count telemetry in backfill CLI

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 13:57:42 +02:00

327 lines
9.7 KiB
TypeScript

/**
* Importer for an organized S3 / filesystem bucket whose folder structure
* already represents real organisation. Walks every key under `--bucket-prefix`,
* builds matching `document_folders` rows mirroring the path, then inserts
* `documents` + `files` rows pointing at the existing storage keys verbatim
* — no path rewrite. Use when migrating from a legacy MinIO bucket whose
* tree is the source of truth.
*
* Usage:
* pnpm tsx scripts/import-organized-documents.ts --port-slug <slug> \
* --bucket-prefix "legacy-imports/" --dry-run
* pnpm tsx scripts/import-organized-documents.ts --port-slug <slug> \
* --bucket-prefix "legacy-imports/" --apply
*
* Idempotency:
* - Folders: sibling-name unique index swallows duplicate creates and we
* reuse the existing row.
* - Documents: skipped when a row with `(port_id, fileStoragePath)` already
* exists — the storage key is the natural identity for this importer.
*/
import 'dotenv/config';
import path from 'node:path';
import { and, eq } from 'drizzle-orm';
import { db } from '@/lib/db';
import { ports } from '@/lib/db/schema/ports';
import { documents, documentFolders, files } from '@/lib/db/schema/documents';
import { user } from '@/lib/db/schema/users';
import { createAuditLog } from '@/lib/audit';
import { ConflictError } from '@/lib/errors';
import { createFolder } from '@/lib/services/document-folders.service';
import { parseImportPath } from '@/lib/services/document-import';
import { getStorageBackend } from '@/lib/storage';
interface CliArgs {
portSlug: string;
bucketPrefix: string;
dryRun: boolean;
apply: boolean;
uploadedByUserId: string | null;
}
function parseArgs(argv: string[]): CliArgs {
const args: CliArgs = {
portSlug: '',
bucketPrefix: '',
dryRun: false,
apply: false,
uploadedByUserId: null,
};
for (let i = 0; i < argv.length; i += 1) {
const a = argv[i]!;
if (a === '--port-slug') args.portSlug = argv[++i] ?? '';
else if (a === '--bucket-prefix') args.bucketPrefix = argv[++i] ?? '';
else if (a === '--uploaded-by') args.uploadedByUserId = argv[++i] ?? null;
else if (a === '--dry-run') args.dryRun = true;
else if (a === '--apply') args.apply = true;
else if (a === '-h' || a === '--help') {
printHelp();
process.exit(0);
} else {
console.error(`Unknown argument: ${a}`);
printHelp();
process.exit(1);
}
}
if (!args.portSlug) {
console.error('Missing required --port-slug');
process.exit(1);
}
if (!args.dryRun && !args.apply) {
console.error('Must specify either --dry-run or --apply.');
process.exit(1);
}
if (args.dryRun && args.apply) {
console.error('--dry-run and --apply are mutually exclusive.');
process.exit(1);
}
return args;
}
function printHelp(): void {
console.log(`Usage:
pnpm tsx scripts/import-organized-documents.ts \\
--port-slug <slug> \\
--bucket-prefix <prefix> \\
(--dry-run | --apply) \\
[--uploaded-by <userId>]
`);
}
interface PlannedDoc {
key: string;
folderSegments: string[];
filename: string;
bytes: number | null;
contentType: string;
alreadyImported: boolean;
}
const CONTENT_TYPE_BY_EXT: Record<string, string> = {
'.pdf': 'application/pdf',
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.gif': 'image/gif',
'.webp': 'image/webp',
'.txt': 'text/plain',
'.csv': 'text/csv',
'.doc': 'application/msword',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.xls': 'application/vnd.ms-excel',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
};
function guessContentType(filename: string): string {
const ext = path.extname(filename).toLowerCase();
return CONTENT_TYPE_BY_EXT[ext] ?? 'application/octet-stream';
}
async function main(): Promise<void> {
const args = parseArgs(process.argv.slice(2));
const port = await db.query.ports.findFirst({ where: eq(ports.slug, args.portSlug) });
if (!port) {
console.error(`Port not found: ${args.portSlug}`);
process.exit(1);
}
let uploadedById = args.uploadedByUserId;
if (!uploadedById) {
const [u] = await db.select({ id: user.id }).from(user).limit(1);
if (!u) {
console.error(
'No user rows exist; pass --uploaded-by <userId> or seed at least one user before running.',
);
process.exit(1);
}
uploadedById = u.id;
console.log(`No --uploaded-by provided; falling back to first user: ${uploadedById}`);
}
const backend = await getStorageBackend();
console.log(`Listing keys under prefix "${args.bucketPrefix}" via ${backend.name} backend …`);
const keys = await backend.listByPrefix(args.bucketPrefix);
console.log(`Found ${keys.length} candidate keys.`);
const plan: PlannedDoc[] = [];
for (const key of keys) {
const parsed = parseImportPath(args.bucketPrefix, key);
if (!parsed.filename) continue;
const head = await backend.head(key);
const existing = await db.query.files.findFirst({
where: and(eq(files.portId, port.id), eq(files.storagePath, key)),
columns: { id: true },
});
plan.push({
key,
folderSegments: parsed.folderSegments,
filename: parsed.filename,
bytes: head?.sizeBytes ?? null,
contentType: head?.contentType ?? guessContentType(parsed.filename),
alreadyImported: !!existing,
});
}
printPlan(plan);
if (args.dryRun) {
console.log('\nDry-run complete. No changes written.');
return;
}
const folderIdByPath = new Map<string, string | null>();
folderIdByPath.set('', null);
let createdCount = 0;
let skippedCount = 0;
for (const entry of plan) {
if (entry.alreadyImported) {
skippedCount += 1;
continue;
}
const folderId = await ensureFolderChain(
port.id,
uploadedById,
entry.folderSegments,
folderIdByPath,
);
await db.transaction(async (tx) => {
const [fileRow] = await tx
.insert(files)
.values({
portId: port.id,
filename: entry.filename,
originalName: entry.filename,
mimeType: entry.contentType,
sizeBytes: entry.bytes !== null ? String(entry.bytes) : null,
storagePath: entry.key,
uploadedBy: uploadedById,
category: 'misc',
folderId,
})
.returning();
const [docRow] = await tx
.insert(documents)
.values({
portId: port.id,
documentType: 'other',
title: entry.filename,
createdBy: uploadedById,
folderId,
fileId: fileRow!.id,
status: 'completed',
isManualUpload: true,
})
.returning();
void createAuditLog({
userId: uploadedById,
portId: port.id,
action: 'create',
entityType: 'document',
entityId: docRow!.id,
metadata: {
source: 'organized-bucket-importer',
storageKey: entry.key,
folderSegments: entry.folderSegments,
},
});
});
createdCount += 1;
console.log(`✓ Imported ${entry.key}`);
}
console.log(
`\nDone. Created ${createdCount} documents, skipped ${skippedCount} (already imported).`,
);
}
async function ensureFolderChain(
portId: string,
userId: string,
segments: string[],
cache: Map<string, string | null>,
): Promise<string | null> {
if (segments.length === 0) return null;
let parentId: string | null = null;
for (let i = 0; i < segments.length; i += 1) {
const pathKey = segments.slice(0, i + 1).join('/');
const cached = cache.get(pathKey);
if (cached !== undefined) {
parentId = cached;
continue;
}
const name = segments[i]!;
parentId = await createOrFindFolder(portId, userId, name, parentId);
cache.set(pathKey, parentId);
}
return parentId;
}
async function createOrFindFolder(
portId: string,
userId: string,
name: string,
parentId: string | null,
): Promise<string> {
try {
const created = await createFolder(portId, userId, { name, parentId });
return created.id;
} catch (err) {
if (!(err instanceof ConflictError)) throw err;
// Sibling-name unique index hit — fetch the existing row so the import
// remains idempotent across re-runs.
const trimmed = name.trim();
const candidates = await db.query.documentFolders.findMany({
where: parentId
? and(eq(documentFolders.portId, portId), eq(documentFolders.parentId, parentId))
: eq(documentFolders.portId, portId),
});
const existing = candidates.find(
(row) =>
(parentId ? row.parentId === parentId : row.parentId === null) &&
row.name.toLowerCase() === trimmed.toLowerCase(),
);
if (!existing) throw err;
return existing.id;
}
}
function printPlan(plan: PlannedDoc[]): void {
const grouped = new Map<string, PlannedDoc[]>();
for (const entry of plan) {
const folder = entry.folderSegments.join('/') || '(root)';
if (!grouped.has(folder)) grouped.set(folder, []);
grouped.get(folder)!.push(entry);
}
const folderNames = Array.from(grouped.keys()).sort();
console.log('\nPlan:');
for (const folder of folderNames) {
console.log(` ${folder}/`);
for (const entry of grouped.get(folder)!) {
const flag = entry.alreadyImported ? '·' : '+';
const size = entry.bytes !== null ? ` (${entry.bytes}B)` : '';
console.log(` ${flag} ${entry.filename}${size}`);
}
}
const newCount = plan.filter((p) => !p.alreadyImported).length;
const dupCount = plan.length - newCount;
console.log(`\nTotal: ${plan.length} keys → ${newCount} new, ${dupCount} already imported.`);
}
main()
.then(() => process.exit(0))
.catch((err) => {
console.error(err);
process.exit(1);
});