/** * Pure helpers for the organized-bucket document importer * (`scripts/import-organized-documents.ts`). * * The script walks an existing storage prefix that already represents real * organisation (e.g. `legacy-imports/Deals 2026/Q1/contract.pdf`) and * materialises matching `document_folders` + `documents` rows in the CRM * without rewriting the storage keys. Splitting these helpers out of the * script body makes the path-parser unit-testable in isolation. */ export interface ParsedImportPath { /** Folder names from outermost to innermost; empty when the file is at the prefix root. */ folderSegments: string[]; /** Filename only, never empty. */ filename: string; } /** * Decompose a storage key into folder segments + filename relative to the * importer prefix. Both `prefix` and `key` use POSIX separators (the * filesystem backend's `listByPrefix` already normalises Windows paths). * * Edge cases: * - Trailing slashes on prefix are tolerated (`legacy/` ≡ `legacy`). * - Empty intermediate segments (`a//b`) collapse to `[a, b]`. * - Leading-prefix mismatch throws — the caller should never feed in keys * outside the prefix it asked the backend to list. * - A key that ends in `/` (directory placeholder) yields an empty * filename — the caller must filter those out before invoking. */ export function parseImportPath(prefix: string, key: string): ParsedImportPath { const normalizedPrefix = prefix.replace(/\/+$/, ''); let relative = key; if (normalizedPrefix.length > 0) { if (!key.startsWith(`${normalizedPrefix}/`)) { throw new Error(`Key "${key}" is not under prefix "${prefix}"`); } relative = key.slice(normalizedPrefix.length + 1); } const parts = relative.split('/').filter((segment) => segment.length > 0); if (parts.length === 0) { throw new Error(`Key "${key}" has no filename after stripping prefix`); } const filename = parts.pop()!; return { folderSegments: parts, filename }; }