One-shot script that walks an existing organized bucket tree, builds
matching document_folders rows mirroring the path, then inserts
documents + files rows pointing at the existing storage keys verbatim
— no path rewrite. For migrating from a legacy MinIO bucket whose
folder structure is already the source of truth.
Idempotency:
• Folders: sibling-name unique index swallows duplicate creates;
we reuse the row on ConflictError.
• Documents: skipped when (port_id, fileStoragePath) already exists.
Adds StorageBackend.listByPrefix (recursive readdir on filesystem;
listObjectsV2 stream-drain on s3) — the first one-shot caller, not
a hot path. Pure parseImportPath helper extracted to its own module
and unit-tested for trailing slashes, empty intermediate segments,
prefix mismatch, and special-character folder names (8 tests).
Audit log per imported doc carries source='organized-bucket-importer',
storageKey, and folderSegments so the documents inspector can filter
on imports later.
CLI:
pnpm tsx scripts/import-organized-documents.ts \\
--port-slug <slug> \\
--bucket-prefix "legacy-imports/" \\
(--dry-run | --apply) [--uploaded-by <userId>]
Folds in Prettier post-hook drift on documents.service.ts +
download handler — same lint-staged formatting the earlier commits
already absorbed.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
248 lines
8.8 KiB
TypeScript
248 lines
8.8 KiB
TypeScript
/**
|
|
* Pluggable storage backend (Phase 6a — see docs/berth-recommender-and-pdf-plan.md §4.7a).
|
|
*
|
|
* The CRM stores files (per-berth PDFs, brochures, GDPR exports, etc.) through a
|
|
* single `StorageBackend` abstraction. The deployment chooses between an
|
|
* S3-compatible store (MinIO / AWS S3 / Backblaze B2 / Cloudflare R2 / Wasabi /
|
|
* Tigris) and a local filesystem at runtime via `system_settings.storage_backend`.
|
|
*
|
|
* Callers should always import from this barrel — never from `s3.ts` or
|
|
* `filesystem.ts` directly — so the factory wiring stays the single source of
|
|
* truth.
|
|
*/
|
|
|
|
import { and, eq, isNull } from 'drizzle-orm';
|
|
|
|
import { db } from '@/lib/db';
|
|
import { systemSettings } from '@/lib/db/schema/system';
|
|
import { logger } from '@/lib/logger';
|
|
|
|
import { FilesystemBackend } from './filesystem';
|
|
import { S3Backend } from './s3';
|
|
|
|
export type StorageBackendName = 's3' | 'filesystem';
|
|
|
|
export interface PutOpts {
|
|
contentType: string;
|
|
/** Optional pre-computed sha256 hex — if absent, the backend computes one. */
|
|
sha256?: string;
|
|
/** Bytes (for streams that don't expose .length); used for capacity pre-flight. */
|
|
sizeBytes?: number;
|
|
/** Optional content-disposition for downloads (filesystem proxy only). */
|
|
contentDisposition?: string;
|
|
}
|
|
|
|
export interface PresignOpts {
|
|
/** TTL in seconds. Default 900 (15min) per SECURITY-GUIDELINES §7.1. */
|
|
expirySeconds?: number;
|
|
contentType?: string;
|
|
/** Filename used in Content-Disposition for downloads. */
|
|
filename?: string;
|
|
/**
|
|
* Optional port slug to bind the token to. The filesystem proxy
|
|
* verifier asserts the storage key starts with `${portSlug}/` when
|
|
* present. S3 backend ignores this field (presigned S3 URLs carry
|
|
* their own signature scope). Pass it whenever the issuer is in a
|
|
* port-scoped request — `generateStorageKey()` already prefixes the
|
|
* slug, so this is the matching enforcement.
|
|
*/
|
|
portSlug?: string;
|
|
}
|
|
|
|
export interface StorageBackend {
|
|
/** Upload bytes. Returns the canonical key, size, and sha256 hex. */
|
|
put(
|
|
key: string,
|
|
body: Buffer | NodeJS.ReadableStream,
|
|
opts: PutOpts,
|
|
): Promise<{ key: string; sizeBytes: number; sha256: string }>;
|
|
|
|
/** Stream a file out. Throws NotFoundError if missing. */
|
|
get(key: string): Promise<NodeJS.ReadableStream>;
|
|
|
|
/** Existence + size check without reading the full body. Returns null when missing. */
|
|
head(key: string): Promise<{ sizeBytes: number; contentType: string } | null>;
|
|
|
|
/** Delete. Idempotent — missing keys must not throw. */
|
|
delete(key: string): Promise<void>;
|
|
|
|
/** Generate a short-lived URL the browser can PUT to. */
|
|
presignUpload(key: string, opts: PresignOpts): Promise<{ url: string; method: 'PUT' | 'POST' }>;
|
|
|
|
/** Generate a short-lived URL the browser can GET from. */
|
|
presignDownload(key: string, opts: PresignOpts): Promise<{ url: string; expiresAt: Date }>;
|
|
|
|
/**
|
|
* Recursively list keys under `prefix`. Returns the relative key for each
|
|
* object, sorted alphabetically. Empty prefix means "the entire bucket /
|
|
* storage root". Used by one-shot importers (e.g. organized-bucket
|
|
* document import) that need to walk a flat key namespace; not meant for
|
|
* runtime hot paths.
|
|
*/
|
|
listByPrefix(prefix: string): Promise<string[]>;
|
|
|
|
readonly name: StorageBackendName;
|
|
}
|
|
|
|
// ─── factory ────────────────────────────────────────────────────────────────
|
|
|
|
interface CachedFactory {
|
|
backend: StorageBackend;
|
|
/** Resolved at cache-time so we can re-fetch when settings change. */
|
|
configFingerprint: string;
|
|
}
|
|
|
|
let cached: CachedFactory | null = null;
|
|
|
|
/**
|
|
* Reset the per-process backend cache. Called after `system_settings` writes
|
|
* via the existing pub/sub invalidation hook, and exposed for tests.
|
|
*/
|
|
export function resetStorageBackendCache(): void {
|
|
cached = null;
|
|
}
|
|
|
|
interface StorageConfigSnapshot {
|
|
backend: StorageBackendName;
|
|
s3?: {
|
|
endpoint?: string;
|
|
region?: string;
|
|
bucket?: string;
|
|
accessKey?: string;
|
|
secretKeyEncrypted?: string;
|
|
forcePathStyle?: boolean;
|
|
};
|
|
filesystem?: {
|
|
root?: string;
|
|
proxyHmacSecretEncrypted?: string;
|
|
};
|
|
}
|
|
|
|
async function readGlobalSetting<T = unknown>(key: string): Promise<T | null> {
|
|
const [row] = await db
|
|
.select()
|
|
.from(systemSettings)
|
|
.where(and(eq(systemSettings.key, key), isNull(systemSettings.portId)));
|
|
return (row?.value as T | undefined) ?? null;
|
|
}
|
|
|
|
async function loadStorageConfig(): Promise<StorageConfigSnapshot> {
|
|
// Each setting key is a separate row. We read them in parallel.
|
|
const keys = [
|
|
'storage_backend',
|
|
'storage_s3_endpoint',
|
|
'storage_s3_region',
|
|
'storage_s3_bucket',
|
|
'storage_s3_access_key',
|
|
'storage_s3_secret_key_encrypted',
|
|
'storage_s3_force_path_style',
|
|
'storage_filesystem_root',
|
|
'storage_proxy_hmac_secret_encrypted',
|
|
] as const;
|
|
const [
|
|
backendRaw,
|
|
s3Endpoint,
|
|
s3Region,
|
|
s3Bucket,
|
|
s3AccessKey,
|
|
s3SecretKeyEncrypted,
|
|
s3ForcePathStyle,
|
|
fsRoot,
|
|
fsHmacSecretEncrypted,
|
|
] = await Promise.all(keys.map((k) => readGlobalSetting<unknown>(k)));
|
|
|
|
const backend: StorageBackendName = backendRaw === 'filesystem' ? 'filesystem' : 's3';
|
|
|
|
return {
|
|
backend,
|
|
s3: {
|
|
endpoint: typeof s3Endpoint === 'string' ? s3Endpoint : undefined,
|
|
region: typeof s3Region === 'string' ? s3Region : undefined,
|
|
bucket: typeof s3Bucket === 'string' ? s3Bucket : undefined,
|
|
accessKey: typeof s3AccessKey === 'string' ? s3AccessKey : undefined,
|
|
secretKeyEncrypted:
|
|
typeof s3SecretKeyEncrypted === 'string' ? s3SecretKeyEncrypted : undefined,
|
|
forcePathStyle:
|
|
typeof s3ForcePathStyle === 'boolean' ? s3ForcePathStyle : Boolean(s3ForcePathStyle),
|
|
},
|
|
filesystem: {
|
|
root: typeof fsRoot === 'string' ? fsRoot : undefined,
|
|
proxyHmacSecretEncrypted:
|
|
typeof fsHmacSecretEncrypted === 'string' ? fsHmacSecretEncrypted : undefined,
|
|
},
|
|
};
|
|
}
|
|
|
|
/**
|
|
* The fingerprint includes encrypted-secret material because rotating the
|
|
* secret should invalidate the cached client. After a key rotation the
|
|
* settings-write hook calls `resetStorageBackendCache()` explicitly, so
|
|
* this comparison is a defense-in-depth backstop rather than the primary
|
|
* invalidation path. If you ever change `loadStorageConfig` to read
|
|
* additional sensitive material, make sure the rotation flow keeps
|
|
* resetting the cache — relying on fingerprint diff alone means the old
|
|
* client is held in memory until the next mismatch.
|
|
*/
|
|
function fingerprint(cfg: StorageConfigSnapshot): string {
|
|
return JSON.stringify(cfg);
|
|
}
|
|
|
|
/**
|
|
* Resolve the active backend. Caches per-process; the cache is invalidated by
|
|
* `resetStorageBackendCache()` (called when `system_settings.storage_backend`
|
|
* changes via the migration flow).
|
|
*/
|
|
export async function getStorageBackend(): Promise<StorageBackend> {
|
|
const cfg = await loadStorageConfig();
|
|
const fp = fingerprint(cfg);
|
|
if (cached && cached.configFingerprint === fp) {
|
|
return cached.backend;
|
|
}
|
|
|
|
const backend = await buildBackend(cfg);
|
|
cached = { backend, configFingerprint: fp };
|
|
logger.info({ backend: backend.name }, 'Storage backend resolved');
|
|
return backend;
|
|
}
|
|
|
|
async function buildBackend(cfg: StorageConfigSnapshot): Promise<StorageBackend> {
|
|
if (cfg.backend === 'filesystem') {
|
|
return FilesystemBackend.create({
|
|
root: cfg.filesystem?.root ?? './storage',
|
|
proxyHmacSecretEncrypted: cfg.filesystem?.proxyHmacSecretEncrypted ?? null,
|
|
});
|
|
}
|
|
return S3Backend.create({
|
|
endpoint: cfg.s3?.endpoint,
|
|
region: cfg.s3?.region,
|
|
bucket: cfg.s3?.bucket,
|
|
accessKey: cfg.s3?.accessKey,
|
|
secretKeyEncrypted: cfg.s3?.secretKeyEncrypted,
|
|
forcePathStyle: cfg.s3?.forcePathStyle,
|
|
});
|
|
}
|
|
|
|
// ─── url helpers ────────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Convenience wrapper that returns just the presigned download URL — the most
|
|
* common need at call sites that don't track expiry. Mirrors the legacy
|
|
* `getPresignedUrl(key)` helper in `@/lib/minio` but routes through the
|
|
* active backend so filesystem-mode deployments work too.
|
|
*/
|
|
export async function presignDownloadUrl(
|
|
key: string,
|
|
expirySeconds = 900,
|
|
filename?: string,
|
|
portSlug?: string,
|
|
): Promise<string> {
|
|
const backend = await getStorageBackend();
|
|
const { url } = await backend.presignDownload(key, { expirySeconds, filename, portSlug });
|
|
return url;
|
|
}
|
|
|
|
// ─── re-exports ─────────────────────────────────────────────────────────────
|
|
|
|
export { S3Backend } from './s3';
|
|
export { FilesystemBackend, validateStorageKey } from './filesystem';
|