feat(documents): importer for organized S3/filesystem buckets
One-shot script that walks an existing organized bucket tree, builds
matching document_folders rows mirroring the path, then inserts
documents + files rows pointing at the existing storage keys verbatim
— no path rewrite. For migrating from a legacy MinIO bucket whose
folder structure is already the source of truth.
Idempotency:
• Folders: sibling-name unique index swallows duplicate creates;
we reuse the row on ConflictError.
• Documents: skipped when (port_id, fileStoragePath) already exists.
Adds StorageBackend.listByPrefix (recursive readdir on filesystem;
listObjectsV2 stream-drain on s3) — the first one-shot caller, not
a hot path. Pure parseImportPath helper extracted to its own module
and unit-tested for trailing slashes, empty intermediate segments,
prefix mismatch, and special-character folder names (8 tests).
Audit log per imported doc carries source='organized-bucket-importer',
storageKey, and folderSegments so the documents inspector can filter
on imports later.
CLI:
pnpm tsx scripts/import-organized-documents.ts \\
--port-slug <slug> \\
--bucket-prefix "legacy-imports/" \\
(--dry-run | --apply) [--uploaded-by <userId>]
Folds in Prettier post-hook drift on documents.service.ts +
download handler — same lint-staged formatting the earlier commits
already absorbed.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -361,6 +361,43 @@ export class FilesystemBackend implements StorageBackend {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursive readdir under `${root}/${prefix}`. Returns relative-to-root
|
||||
* keys using POSIX separators, sorted alphabetically. Empty prefix lists
|
||||
* every file in the storage root. Used by one-shot importers; not a hot
|
||||
* path. We tolerate ENOENT (prefix doesn't exist) by returning [] so the
|
||||
* caller doesn't have to special-case empty trees.
|
||||
*/
|
||||
async listByPrefix(prefix: string): Promise<string[]> {
|
||||
const startAbs = prefix
|
||||
? this.resolveKey(prefix.replace(/\/+$/, ''))
|
||||
: this.rootResolved;
|
||||
|
||||
const out: string[] = [];
|
||||
async function walk(dir: string): Promise<void> {
|
||||
let entries: import('node:fs').Dirent[];
|
||||
try {
|
||||
entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException).code === 'ENOENT') return;
|
||||
throw err;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
const child = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
await walk(child);
|
||||
} else if (entry.isFile()) {
|
||||
out.push(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
await walk(startAbs);
|
||||
|
||||
return out
|
||||
.map((abs) => path.relative(this.rootResolved, abs).split(path.sep).join('/'))
|
||||
.sort();
|
||||
}
|
||||
|
||||
/** Used by the proxy route — returns the validated absolute path. */
|
||||
resolveKeyForProxy(key: string): string {
|
||||
return this.resolveKey(key);
|
||||
|
||||
@@ -72,6 +72,15 @@ export interface StorageBackend {
|
||||
/** Generate a short-lived URL the browser can GET from. */
|
||||
presignDownload(key: string, opts: PresignOpts): Promise<{ url: string; expiresAt: Date }>;
|
||||
|
||||
/**
|
||||
* Recursively list keys under `prefix`. Returns the relative key for each
|
||||
* object, sorted alphabetically. Empty prefix means "the entire bucket /
|
||||
* storage root". Used by one-shot importers (e.g. organized-bucket
|
||||
* document import) that need to walk a flat key namespace; not meant for
|
||||
* runtime hot paths.
|
||||
*/
|
||||
listByPrefix(prefix: string): Promise<string[]>;
|
||||
|
||||
readonly name: StorageBackendName;
|
||||
}
|
||||
|
||||
|
||||
@@ -211,6 +211,22 @@ export class S3Backend implements StorageBackend {
|
||||
return { url, expiresAt: new Date(Date.now() + expiry * 1000) };
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursive listObjectsV2 walk under `prefix`. The minio-js stream emits
|
||||
* one entry per object; we drain it into a flat key array sorted
|
||||
* alphabetically. Used by one-shot importers; not a hot path. Object
|
||||
* "directories" (zero-byte placeholders ending in `/`) are filtered out.
|
||||
*/
|
||||
async listByPrefix(prefix: string): Promise<string[]> {
|
||||
const stream = this.client.listObjectsV2(this.bucket, prefix, true);
|
||||
const keys: string[] = [];
|
||||
for await (const obj of stream as AsyncIterable<{ name?: string }>) {
|
||||
if (obj.name && !obj.name.endsWith('/')) keys.push(obj.name);
|
||||
}
|
||||
keys.sort();
|
||||
return keys;
|
||||
}
|
||||
|
||||
/** Used by the admin UI's "Test connection" button. */
|
||||
async healthCheck(): Promise<{ ok: true } | { ok: false; error: string }> {
|
||||
const sentinelKey = `_health/${Date.now()}.txt`;
|
||||
|
||||
Reference in New Issue
Block a user