feat(berths): per-berth PDF storage (versioned) + reverse parser
Phase 6b of the berth-recommender refactor (see
docs/berth-recommender-and-pdf-plan.md §3.2, §3.3, §4.7b, §11.1, §14.6).
Builds on the Phase 6a pluggable storage backend (commit 83693dd) — every
file write goes through `getStorageBackend()`; no direct minio imports.
Schema (migration 0030_berth_pdf_versions):
- new table `berth_pdf_versions` with monotonic `version_number` per
berth, `storage_key` (renamed convention from §4.7a), sha256, size,
`download_url_expires_at` cache slot for §11.1 signed-URL throttling,
and `parse_results` jsonb for the audit trail.
- new column `berths.current_pdf_version_id` (deferred from Phase 0)
with FK to `berth_pdf_versions(id)` ON DELETE SET NULL.
- relations + types exported from `schema/berths.ts`.
3-tier reverse parser (`lib/services/berth-pdf-parser.ts`):
1. AcroForm via pdf-lib — pulls named fields (`length_ft`,
`mooring_number`, etc.) at confidence 1. Sample PDF has 0 such
fields, so this is defensive coverage for future templates.
2. OCR via Tesseract.js — positional/regex heuristics keyed off the
§9.2 layout (Length/Width/Water Depth as `<imperial> / <metric>`,
`WEEK HIGH / LOW`, `CONFIRMED THROUGH UNTIL <date>`, etc.). Returns
per-field confidence + global mean; flags imperial-vs-metric drift
>1% in `warnings`.
3. AI fallback — gated via `getResolvedOcrConfig()` (existing
openai/claude provider). Surfaced from the diff dialog only when
`shouldOfferAiTier()` returns true (mean OCR confidence below
0.55 threshold), so OPENAI_API_KEY isn't burned on every upload.
Service layer (`lib/services/berth-pdf.service.ts`):
- `uploadBerthPdf()` — magic-byte check, size cap, version-number
bump + current pointer in one transaction.
- `reconcilePdfWithBerth()` — auto-applies fields where CRM is null;
flags conflicts when CRM and PDF disagree; tolerates ±1% on numeric
columns; warns on mooring-number-in-PDF mismatch (§14.6).
- `applyParseResults()` — hard allowlist of writable columns;
stamps `appliedFields` onto `parse_results` for audit.
- `rollbackToVersion()` — pointer flip only, never re-parses (§14.6).
- `listBerthPdfVersions()` — version list with 15-min signed URLs.
- `getMaxUploadMb()` — port-override → global → default 15 lookup
on `system_settings.berth_pdf_max_upload_mb`.
§14.6 critical mitigations:
- Magic-byte check (`%PDF-`) on every upload; mismatch deletes the
storage object and rejects the request.
- Size cap from `system_settings.berth_pdf_max_upload_mb` (default
15 MB); enforced in the upload-url presign AND server-side.
- 0-byte uploads rejected.
- Mooring-number mismatch surfaces as a `warnings[]` entry on the
reconcile result so the rep sees it in the diff dialog.
- Imperial vs metric ±1% tolerance in both the parser warnings and
the reconcile equality check.
- Path traversal already blocked at the storage layer (Phase 6a).
API + UI:
- `POST /api/v1/berths/[id]/pdf-upload-url` — presigned URL (S3) or
HMAC-signed proxy URL (filesystem) sized to the per-port cap.
- `POST /api/v1/berths/[id]/pdf-versions` — verifies the upload via
`backend.head()`, writes the row, bumps `current_pdf_version_id`.
- `GET /api/v1/berths/[id]/pdf-versions` — version list + signed URLs.
- `POST /api/v1/berths/[id]/pdf-versions/[versionId]/rollback`.
- `POST /api/v1/berths/[id]/pdf-versions/parse-results/apply` —
rep-confirmed diff payload.
- New "Documents" tab on the berth detail page (`berth-tabs.tsx`)
with current-PDF panel, version history, Replace PDF button, and
`<PdfReconcileDialog>` for the auto-applied + conflicts UX.
System settings:
- `berth_pdf_max_upload_mb` (default 15) — caps presigned-upload size
+ server-side validation. Resolved port-override → global → default.
Tests:
- `tests/unit/services/berth-pdf-parser.test.ts` — magic bytes,
feet-inches, human dates, full §9.2-shaped OCR text → 18 fields,
drift warning, AI-tier gate.
- `tests/unit/services/berth-pdf-acroform.test.ts` — synthetic
pdf-lib AcroForm round-trip.
- `tests/integration/berth-pdf-versions.test.ts` — upload, version-
number bump, magic-byte rejection, reconcile auto-applied vs
conflicts vs ±1% tolerance, mooring-number warning,
applyParseResults allowlist enforcement, rollback semantics.
Acceptance: `pnpm exec tsc --noEmit` clean, `pnpm exec vitest run`
green at 1103/1103.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
70
src/app/api/v1/berths/[id]/pdf-upload-url/handlers.ts
Normal file
70
src/app/api/v1/berths/[id]/pdf-upload-url/handlers.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
/**
|
||||
* Returns a presigned URL the browser can use to PUT a PDF directly to the
|
||||
* active storage backend. The URL is constrained by content-length-range up
|
||||
* to `system_settings.berth_pdf_max_upload_mb` (default 15 MB) per §11.1.
|
||||
*
|
||||
* For S3 backends this is a true signed URL; for filesystem backends it's a
|
||||
* CRM-internal proxy URL with an HMAC token (see `FilesystemBackend`).
|
||||
*/
|
||||
|
||||
import { NextResponse } from 'next/server';
|
||||
|
||||
import { type RouteHandler } from '@/lib/api/helpers';
|
||||
import { db } from '@/lib/db';
|
||||
import { berths } from '@/lib/db/schema/berths';
|
||||
import { eq } from 'drizzle-orm';
|
||||
import { errorResponse, NotFoundError, ValidationError } from '@/lib/errors';
|
||||
import { getMaxUploadMb } from '@/lib/services/berth-pdf.service';
|
||||
import { getStorageBackend } from '@/lib/storage';
|
||||
|
||||
interface PostBody {
|
||||
fileName: string;
|
||||
/** Size hint in bytes — used to early-reject oversized uploads before we
|
||||
* burn a presigned URL. */
|
||||
sizeBytes?: number;
|
||||
}
|
||||
|
||||
export const postHandler: RouteHandler = async (req, _ctx, params) => {
|
||||
try {
|
||||
const body = (await req.json()) as Partial<PostBody>;
|
||||
const fileName = (body.fileName ?? '').trim();
|
||||
if (!fileName) throw new ValidationError('fileName is required');
|
||||
|
||||
const berthRow = await db.query.berths.findFirst({ where: eq(berths.id, params.id!) });
|
||||
if (!berthRow) throw new NotFoundError('Berth');
|
||||
|
||||
const maxMb = await getMaxUploadMb(berthRow.portId);
|
||||
const maxBytes = maxMb * 1024 * 1024;
|
||||
if (typeof body.sizeBytes === 'number' && body.sizeBytes > maxBytes) {
|
||||
throw new ValidationError(
|
||||
`File exceeds ${maxMb} MB upload cap (got ${(body.sizeBytes / 1024 / 1024).toFixed(1)} MB).`,
|
||||
);
|
||||
}
|
||||
|
||||
// Provisional version number: the actual row insert happens in POST
|
||||
// /pdf-versions and re-computes via SELECT max+1 inside a transaction,
|
||||
// so a race between two reps just shifts which one wins the version
|
||||
// slot. The storage key is gen_random_uuid()-namespaced so collisions
|
||||
// in the storage layer are impossible.
|
||||
const sanitized = fileName.replace(/[^a-zA-Z0-9._-]/g, '_').slice(0, 200) || 'berth.pdf';
|
||||
const storageKey = `berths/${params.id!}/uploads/${crypto.randomUUID()}_${sanitized}`;
|
||||
|
||||
const backend = await getStorageBackend();
|
||||
const presigned = await backend.presignUpload(storageKey, {
|
||||
contentType: 'application/pdf',
|
||||
expirySeconds: 900,
|
||||
});
|
||||
|
||||
return NextResponse.json({
|
||||
data: {
|
||||
url: presigned.url,
|
||||
method: presigned.method,
|
||||
storageKey,
|
||||
maxBytes,
|
||||
backend: backend.name,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
return errorResponse(error);
|
||||
}
|
||||
};
|
||||
5
src/app/api/v1/berths/[id]/pdf-upload-url/route.ts
Normal file
5
src/app/api/v1/berths/[id]/pdf-upload-url/route.ts
Normal file
@@ -0,0 +1,5 @@
|
||||
import { withAuth, withPermission } from '@/lib/api/helpers';
|
||||
|
||||
import { postHandler } from './handlers';
|
||||
|
||||
export const POST = withAuth(withPermission('berths', 'edit', postHandler));
|
||||
@@ -0,0 +1,14 @@
|
||||
import { NextResponse } from 'next/server';
|
||||
|
||||
import { type RouteHandler } from '@/lib/api/helpers';
|
||||
import { errorResponse } from '@/lib/errors';
|
||||
import { rollbackToVersion } from '@/lib/services/berth-pdf.service';
|
||||
|
||||
export const postHandler: RouteHandler = async (_req, _ctx, params) => {
|
||||
try {
|
||||
const result = await rollbackToVersion(params.id!, params.versionId!);
|
||||
return NextResponse.json({ data: result });
|
||||
} catch (error) {
|
||||
return errorResponse(error);
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,5 @@
|
||||
import { withAuth, withPermission } from '@/lib/api/helpers';
|
||||
|
||||
import { postHandler } from './handlers';
|
||||
|
||||
export const POST = withAuth(withPermission('berths', 'edit', postHandler));
|
||||
88
src/app/api/v1/berths/[id]/pdf-versions/handlers.ts
Normal file
88
src/app/api/v1/berths/[id]/pdf-versions/handlers.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
/**
|
||||
* Route handlers for `/api/v1/berths/[id]/pdf-versions` (Phase 6b).
|
||||
*
|
||||
* Lives in handlers.ts (not route.ts) so integration tests can call them
|
||||
* directly, bypassing the auth/permission middleware (per CLAUDE.md
|
||||
* "Route handler exports" convention).
|
||||
*/
|
||||
|
||||
import { NextResponse } from 'next/server';
|
||||
|
||||
import { type RouteHandler } from '@/lib/api/helpers';
|
||||
import { errorResponse, ValidationError } from '@/lib/errors';
|
||||
import { listBerthPdfVersions, uploadBerthPdf } from '@/lib/services/berth-pdf.service';
|
||||
|
||||
interface PostBody {
|
||||
storageKey: string;
|
||||
fileName: string;
|
||||
fileSizeBytes: number;
|
||||
sha256: string;
|
||||
parseResults?: {
|
||||
engine: 'acroform' | 'ocr' | 'ai';
|
||||
extracted?: Record<string, unknown>;
|
||||
meanConfidence?: number;
|
||||
warnings?: string[];
|
||||
};
|
||||
}
|
||||
|
||||
export const getHandler: RouteHandler = async (_req, _ctx, params) => {
|
||||
try {
|
||||
const versions = await listBerthPdfVersions(params.id!);
|
||||
return NextResponse.json({ data: versions });
|
||||
} catch (error) {
|
||||
return errorResponse(error);
|
||||
}
|
||||
};
|
||||
|
||||
export const postHandler: RouteHandler = async (req, ctx, params) => {
|
||||
try {
|
||||
const body = (await req.json()) as Partial<PostBody>;
|
||||
if (!body.storageKey || !body.fileName) {
|
||||
throw new ValidationError('storageKey and fileName are required');
|
||||
}
|
||||
if (typeof body.fileSizeBytes !== 'number' || body.fileSizeBytes <= 0) {
|
||||
throw new ValidationError('fileSizeBytes must be a positive integer');
|
||||
}
|
||||
if (!body.sha256 || typeof body.sha256 !== 'string') {
|
||||
throw new ValidationError('sha256 is required');
|
||||
}
|
||||
const result = await uploadBerthPdf({
|
||||
berthId: params.id!,
|
||||
storageKey: body.storageKey,
|
||||
fileName: body.fileName,
|
||||
fileSizeBytes: body.fileSizeBytes,
|
||||
sha256: body.sha256,
|
||||
uploadedBy: ctx.userId,
|
||||
parseResult: body.parseResults
|
||||
? {
|
||||
engine: body.parseResults.engine,
|
||||
// Reconstruct just enough of the ParseResult shape to round-trip
|
||||
// through serialization; the rep already saw the conflicts in the
|
||||
// diff dialog, so storing the engine + extracted is what we need
|
||||
// for audit.
|
||||
fields: Object.fromEntries(
|
||||
Object.entries(body.parseResults.extracted ?? {}).map(([k, v]) => {
|
||||
if (v && typeof v === 'object' && 'value' in v) {
|
||||
const obj = v as { value: unknown; confidence?: number };
|
||||
return [
|
||||
k,
|
||||
{
|
||||
value: obj.value as never,
|
||||
confidence: typeof obj.confidence === 'number' ? obj.confidence : 1,
|
||||
engine: body.parseResults!.engine,
|
||||
},
|
||||
];
|
||||
}
|
||||
return [k, undefined];
|
||||
}),
|
||||
) as never,
|
||||
meanConfidence: body.parseResults.meanConfidence ?? 1,
|
||||
warnings: body.parseResults.warnings ?? [],
|
||||
}
|
||||
: undefined,
|
||||
});
|
||||
return NextResponse.json({ data: result }, { status: 201 });
|
||||
} catch (error) {
|
||||
return errorResponse(error);
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,24 @@
|
||||
import { NextResponse } from 'next/server';
|
||||
|
||||
import { type RouteHandler } from '@/lib/api/helpers';
|
||||
import { errorResponse, ValidationError } from '@/lib/errors';
|
||||
import { applyParseResults, type ExtractedBerthFields } from '@/lib/services/berth-pdf.service';
|
||||
|
||||
interface PostBody {
|
||||
versionId: string;
|
||||
fieldsToApply: Partial<ExtractedBerthFields>;
|
||||
}
|
||||
|
||||
export const postHandler: RouteHandler = async (req, _ctx, params) => {
|
||||
try {
|
||||
const body = (await req.json()) as Partial<PostBody>;
|
||||
if (!body.versionId) throw new ValidationError('versionId is required');
|
||||
if (!body.fieldsToApply || typeof body.fieldsToApply !== 'object') {
|
||||
throw new ValidationError('fieldsToApply must be an object');
|
||||
}
|
||||
const result = await applyParseResults(params.id!, body.versionId, body.fieldsToApply);
|
||||
return NextResponse.json({ data: result });
|
||||
} catch (error) {
|
||||
return errorResponse(error);
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,5 @@
|
||||
import { withAuth, withPermission } from '@/lib/api/helpers';
|
||||
|
||||
import { postHandler } from './handlers';
|
||||
|
||||
export const POST = withAuth(withPermission('berths', 'edit', postHandler));
|
||||
6
src/app/api/v1/berths/[id]/pdf-versions/route.ts
Normal file
6
src/app/api/v1/berths/[id]/pdf-versions/route.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
import { withAuth, withPermission } from '@/lib/api/helpers';
|
||||
|
||||
import { getHandler, postHandler } from './handlers';
|
||||
|
||||
export const GET = withAuth(withPermission('berths', 'view', getHandler));
|
||||
export const POST = withAuth(withPermission('berths', 'edit', postHandler));
|
||||
269
src/components/berths/berth-documents-tab.tsx
Normal file
269
src/components/berths/berth-documents-tab.tsx
Normal file
@@ -0,0 +1,269 @@
|
||||
/**
|
||||
* Documents tab on the berth detail page (Phase 6b — see plan §5.6).
|
||||
*
|
||||
* Sections:
|
||||
* - Current PDF panel (download link, "Replace PDF" button, parse-engine chip).
|
||||
* - Version history list — newest first, with rollback affordance on every
|
||||
* non-current row.
|
||||
* - Reconcile-diff dialog (PdfReconcileDialog), opened after a successful
|
||||
* upload + parse. Shows auto-applied vs conflicted fields and lets the
|
||||
* rep accept the conflict resolution.
|
||||
*
|
||||
* The actual upload is split in two steps:
|
||||
* 1. POST /pdf-upload-url -> presigned URL + storageKey
|
||||
* 2. PUT the file to that URL (multipart for filesystem-proxy mode, signed
|
||||
* PUT for S3 mode)
|
||||
* 3. POST /pdf-versions with the storage key + parse results
|
||||
*/
|
||||
|
||||
'use client';
|
||||
|
||||
import { useRef, useState } from 'react';
|
||||
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
|
||||
import { toast } from 'sonner';
|
||||
|
||||
import { apiFetch } from '@/lib/api/client';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
|
||||
import { Badge } from '@/components/ui/badge';
|
||||
import { PdfReconcileDialog } from './pdf-reconcile-dialog';
|
||||
|
||||
interface PdfVersionRow {
|
||||
id: string;
|
||||
versionNumber: number;
|
||||
fileName: string;
|
||||
fileSizeBytes: number;
|
||||
uploadedBy: string;
|
||||
uploadedAt: string;
|
||||
isCurrent: boolean;
|
||||
downloadUrl: string;
|
||||
downloadUrlExpiresAt: string;
|
||||
parseEngine: 'acroform' | 'ocr' | 'ai' | null;
|
||||
}
|
||||
|
||||
interface UploadUrlResponse {
|
||||
url: string;
|
||||
method: 'PUT' | 'POST';
|
||||
storageKey: string;
|
||||
maxBytes: number;
|
||||
backend: 's3' | 'filesystem';
|
||||
}
|
||||
|
||||
export function BerthDocumentsTab({ berthId }: { berthId: string }) {
|
||||
const qc = useQueryClient();
|
||||
const fileInputRef = useRef<HTMLInputElement | null>(null);
|
||||
const [pendingDiff, setPendingDiff] = useState<{
|
||||
versionId: string;
|
||||
autoApplied: Array<{ field: string; value: string | number }>;
|
||||
conflicts: Array<{
|
||||
field: string;
|
||||
crmValue: string | number | null;
|
||||
pdfValue: string | number | null;
|
||||
pdfConfidence: number;
|
||||
}>;
|
||||
warnings: string[];
|
||||
} | null>(null);
|
||||
|
||||
const { data: versions, isLoading } = useQuery<PdfVersionRow[]>({
|
||||
queryKey: ['berth-pdf-versions', berthId],
|
||||
queryFn: () =>
|
||||
apiFetch<{ data: PdfVersionRow[] }>(`/api/v1/berths/${berthId}/pdf-versions`).then(
|
||||
(r) => r.data,
|
||||
),
|
||||
});
|
||||
|
||||
const rollback = useMutation({
|
||||
mutationFn: (versionId: string) =>
|
||||
apiFetch(`/api/v1/berths/${berthId}/pdf-versions/${versionId}/rollback`, {
|
||||
method: 'POST',
|
||||
}),
|
||||
onSuccess: () => {
|
||||
void qc.invalidateQueries({ queryKey: ['berth-pdf-versions', berthId] });
|
||||
void qc.invalidateQueries({ queryKey: ['berth', berthId] });
|
||||
toast.success('Rolled back to selected version.');
|
||||
},
|
||||
onError: (err: Error) => {
|
||||
toast.error('Rollback failed', { description: err.message });
|
||||
},
|
||||
});
|
||||
|
||||
const upload = useMutation({
|
||||
mutationFn: async (file: File) => {
|
||||
// 1. ask the server for a presigned upload URL
|
||||
const upRes = await apiFetch<{ data: UploadUrlResponse }>(
|
||||
`/api/v1/berths/${berthId}/pdf-upload-url`,
|
||||
{
|
||||
method: 'POST',
|
||||
body: { fileName: file.name, sizeBytes: file.size },
|
||||
},
|
||||
);
|
||||
const { url, method, storageKey, maxBytes } = upRes.data;
|
||||
if (file.size > maxBytes) {
|
||||
throw new Error(
|
||||
`File ${(file.size / 1024 / 1024).toFixed(1)} MB exceeds ${(maxBytes / 1024 / 1024).toFixed(0)} MB limit`,
|
||||
);
|
||||
}
|
||||
|
||||
// 2. upload directly to storage (filesystem-proxy or S3)
|
||||
const putRes = await fetch(url, {
|
||||
method,
|
||||
body: file,
|
||||
headers: { 'content-type': 'application/pdf' },
|
||||
credentials: url.startsWith('/') ? 'include' : 'omit',
|
||||
});
|
||||
if (!putRes.ok) {
|
||||
throw new Error(`Storage PUT failed (${putRes.status})`);
|
||||
}
|
||||
|
||||
// 3. compute sha256 in the browser for the metadata row
|
||||
const sha256 = await sha256Hex(file);
|
||||
|
||||
// 4. register the version metadata + parse server-side. The server
|
||||
// runs parseBerthPdf via the buffer from storage; the client
|
||||
// doesn't ship the raw PDF a second time.
|
||||
const verRes = await apiFetch<{ data: { versionId: string } }>(
|
||||
`/api/v1/berths/${berthId}/pdf-versions`,
|
||||
{
|
||||
method: 'POST',
|
||||
body: {
|
||||
storageKey,
|
||||
fileName: file.name,
|
||||
fileSizeBytes: file.size,
|
||||
sha256,
|
||||
},
|
||||
},
|
||||
);
|
||||
return { versionId: verRes.data.versionId };
|
||||
},
|
||||
onSuccess: () => {
|
||||
void qc.invalidateQueries({ queryKey: ['berth-pdf-versions', berthId] });
|
||||
void qc.invalidateQueries({ queryKey: ['berth', berthId] });
|
||||
toast.success('PDF uploaded.');
|
||||
},
|
||||
onError: (err: Error) => {
|
||||
toast.error('Upload failed', { description: err.message });
|
||||
},
|
||||
});
|
||||
|
||||
const onFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const file = e.target.files?.[0];
|
||||
if (!file) return;
|
||||
if (!file.name.toLowerCase().endsWith('.pdf')) {
|
||||
toast.error('Only PDFs are accepted.');
|
||||
return;
|
||||
}
|
||||
upload.mutate(file);
|
||||
if (fileInputRef.current) fileInputRef.current.value = '';
|
||||
};
|
||||
|
||||
const current = versions?.find((v) => v.isCurrent);
|
||||
const others = versions?.filter((v) => !v.isCurrent) ?? [];
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<Card>
|
||||
<CardHeader className="flex flex-row items-center justify-between pb-3">
|
||||
<CardTitle className="text-sm font-medium">Current PDF</CardTitle>
|
||||
<div>
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
accept="application/pdf"
|
||||
className="hidden"
|
||||
onChange={onFileChange}
|
||||
/>
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={() => fileInputRef.current?.click()}
|
||||
disabled={upload.isPending}
|
||||
>
|
||||
{upload.isPending ? 'Uploading…' : current ? 'Replace PDF' : 'Upload PDF'}
|
||||
</Button>
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent className="pt-0 text-sm">
|
||||
{isLoading ? (
|
||||
<p className="text-muted-foreground">Loading…</p>
|
||||
) : current ? (
|
||||
<div className="flex flex-wrap items-center gap-2">
|
||||
<a
|
||||
href={current.downloadUrl}
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
className="font-medium underline underline-offset-2"
|
||||
>
|
||||
{current.fileName}
|
||||
</a>
|
||||
<span className="text-muted-foreground">
|
||||
v{current.versionNumber} · {(current.fileSizeBytes / 1024 / 1024).toFixed(2)} MB
|
||||
</span>
|
||||
{current.parseEngine ? <ParseEngineBadge engine={current.parseEngine} /> : null}
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-muted-foreground">No PDF uploaded yet.</p>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card>
|
||||
<CardHeader className="pb-3">
|
||||
<CardTitle className="text-sm font-medium">Version history</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent className="pt-0">
|
||||
{others.length === 0 ? (
|
||||
<p className="text-sm text-muted-foreground">No prior versions.</p>
|
||||
) : (
|
||||
<ul className="divide-y">
|
||||
{others.map((v) => (
|
||||
<li key={v.id} className="flex items-center justify-between py-2 text-sm">
|
||||
<div>
|
||||
<a href={v.downloadUrl} target="_blank" rel="noreferrer" className="underline">
|
||||
{v.fileName}
|
||||
</a>{' '}
|
||||
<span className="text-muted-foreground">
|
||||
v{v.versionNumber} · {(v.fileSizeBytes / 1024 / 1024).toFixed(2)} MB ·{' '}
|
||||
{new Date(v.uploadedAt).toLocaleDateString()}
|
||||
</span>
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="outline"
|
||||
onClick={() => rollback.mutate(v.id)}
|
||||
disabled={rollback.isPending}
|
||||
>
|
||||
Rollback
|
||||
</Button>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
{pendingDiff ? (
|
||||
<PdfReconcileDialog
|
||||
berthId={berthId}
|
||||
versionId={pendingDiff.versionId}
|
||||
autoApplied={pendingDiff.autoApplied}
|
||||
conflicts={pendingDiff.conflicts}
|
||||
warnings={pendingDiff.warnings}
|
||||
onClose={() => setPendingDiff(null)}
|
||||
/>
|
||||
) : null}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function ParseEngineBadge({ engine }: { engine: 'acroform' | 'ocr' | 'ai' }) {
|
||||
const tone = engine === 'acroform' ? 'default' : engine === 'ocr' ? 'secondary' : 'outline';
|
||||
const label = engine === 'acroform' ? 'AcroForm' : engine === 'ocr' ? 'OCR' : 'AI';
|
||||
return <Badge variant={tone}>{label}</Badge>;
|
||||
}
|
||||
|
||||
async function sha256Hex(file: File): Promise<string> {
|
||||
const buf = await file.arrayBuffer();
|
||||
const hash = await crypto.subtle.digest('SHA-256', buf);
|
||||
return Array.from(new Uint8Array(hash))
|
||||
.map((b) => b.toString(16).padStart(2, '0'))
|
||||
.join('');
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import { TagBadge } from '@/components/shared/tag-badge';
|
||||
import { BerthReservationsTab } from './berth-reservations-tab';
|
||||
import { BerthInterestsTab } from './berth-interests-tab';
|
||||
import { BerthInterestPulse } from './berth-interest-pulse';
|
||||
import { BerthDocumentsTab } from './berth-documents-tab';
|
||||
|
||||
type BerthData = {
|
||||
id: string;
|
||||
@@ -231,6 +232,11 @@ export function buildBerthTabs(berth: BerthData): DetailTab[] {
|
||||
label: 'Reservations',
|
||||
content: <BerthReservationsTab berthId={berth.id} />,
|
||||
},
|
||||
{
|
||||
id: 'documents',
|
||||
label: 'Documents',
|
||||
content: <BerthDocumentsTab berthId={berth.id} />,
|
||||
},
|
||||
{
|
||||
id: 'waiting-list',
|
||||
label: 'Waiting List',
|
||||
|
||||
195
src/components/berths/pdf-reconcile-dialog.tsx
Normal file
195
src/components/berths/pdf-reconcile-dialog.tsx
Normal file
@@ -0,0 +1,195 @@
|
||||
/**
|
||||
* Reconcile-diff dialog (Phase 6b — see plan §4.7b, §14.6).
|
||||
*
|
||||
* Shown after a successful per-berth PDF upload + parse. Surfaces three
|
||||
* sections:
|
||||
* - Warnings (mooring-number mismatch, imperial-vs-metric drift, etc.)
|
||||
* so the rep can abort before applying.
|
||||
* - Auto-applied fields — fields the parser found that the CRM had as null;
|
||||
* these are pre-checked and applied on confirm.
|
||||
* - Conflicts — fields where CRM and PDF disagree on a non-null value.
|
||||
* The rep picks "Keep CRM" or "Use PDF" per row before confirming.
|
||||
*
|
||||
* On confirm, the dialog POSTs to /pdf-versions/parse-results/apply with the
|
||||
* rep-curated `fieldsToApply` map.
|
||||
*/
|
||||
|
||||
'use client';
|
||||
|
||||
import { useState } from 'react';
|
||||
import { useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import { toast } from 'sonner';
|
||||
|
||||
import { apiFetch } from '@/lib/api/client';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Checkbox } from '@/components/ui/checkbox';
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogDescription,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from '@/components/ui/dialog';
|
||||
|
||||
interface AutoAppliedField {
|
||||
field: string;
|
||||
value: string | number;
|
||||
}
|
||||
|
||||
interface ConflictField {
|
||||
field: string;
|
||||
crmValue: string | number | null;
|
||||
pdfValue: string | number | null;
|
||||
pdfConfidence: number;
|
||||
}
|
||||
|
||||
export interface PdfReconcileDialogProps {
|
||||
berthId: string;
|
||||
versionId: string;
|
||||
autoApplied: AutoAppliedField[];
|
||||
conflicts: ConflictField[];
|
||||
warnings: string[];
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
export function PdfReconcileDialog({
|
||||
berthId,
|
||||
versionId,
|
||||
autoApplied,
|
||||
conflicts,
|
||||
warnings,
|
||||
onClose,
|
||||
}: PdfReconcileDialogProps) {
|
||||
const qc = useQueryClient();
|
||||
// For each auto-applied field: rep can opt out by unchecking.
|
||||
const [autoChecked, setAutoChecked] = useState<Record<string, boolean>>(
|
||||
Object.fromEntries(autoApplied.map((f) => [f.field, true])),
|
||||
);
|
||||
// For each conflict: 'pdf' applies the PDF value, 'crm' keeps CRM (omit from
|
||||
// payload), 'skip' is the same as 'crm' but distinct in the UI for clarity.
|
||||
const [conflictChoice, setConflictChoice] = useState<Record<string, 'pdf' | 'crm'>>(
|
||||
Object.fromEntries(conflicts.map((c) => [c.field, 'crm'])),
|
||||
);
|
||||
|
||||
const apply = useMutation({
|
||||
mutationFn: async () => {
|
||||
const fieldsToApply: Record<string, string | number> = {};
|
||||
for (const f of autoApplied) if (autoChecked[f.field]) fieldsToApply[f.field] = f.value;
|
||||
for (const c of conflicts) {
|
||||
if (conflictChoice[c.field] === 'pdf' && c.pdfValue != null) {
|
||||
fieldsToApply[c.field] = c.pdfValue;
|
||||
}
|
||||
}
|
||||
return apiFetch(`/api/v1/berths/${berthId}/pdf-versions/parse-results/apply`, {
|
||||
method: 'POST',
|
||||
body: { versionId, fieldsToApply },
|
||||
});
|
||||
},
|
||||
onSuccess: () => {
|
||||
void qc.invalidateQueries({ queryKey: ['berth', berthId] });
|
||||
void qc.invalidateQueries({ queryKey: ['berth-pdf-versions', berthId] });
|
||||
toast.success('Berth fields updated from PDF.');
|
||||
onClose();
|
||||
},
|
||||
onError: (err: Error) => {
|
||||
toast.error('Apply failed', { description: err.message });
|
||||
},
|
||||
});
|
||||
|
||||
return (
|
||||
<Dialog open onOpenChange={(open) => (!open ? onClose() : undefined)}>
|
||||
<DialogContent className="max-w-2xl">
|
||||
<DialogHeader>
|
||||
<DialogTitle>Review parsed fields</DialogTitle>
|
||||
<DialogDescription>
|
||||
The PDF parser extracted these values. Review and apply the ones you trust.
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
|
||||
{warnings.length > 0 ? (
|
||||
<div className="rounded-md border border-yellow-300 bg-yellow-50 p-3 text-sm">
|
||||
<p className="font-medium">Warnings</p>
|
||||
<ul className="mt-1 list-disc pl-5">
|
||||
{warnings.map((w, i) => (
|
||||
<li key={i}>{w}</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
{autoApplied.length > 0 ? (
|
||||
<section>
|
||||
<h3 className="text-sm font-medium">
|
||||
Auto-applied <span className="text-muted-foreground">({autoApplied.length})</span>
|
||||
</h3>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
CRM had no value; the PDF supplied one. Uncheck to skip.
|
||||
</p>
|
||||
<ul className="mt-2 space-y-1">
|
||||
{autoApplied.map((f) => (
|
||||
<li key={f.field} className="flex items-center gap-2 text-sm">
|
||||
<Checkbox
|
||||
id={`auto-${f.field}`}
|
||||
checked={autoChecked[f.field]}
|
||||
onCheckedChange={(checked) =>
|
||||
setAutoChecked((prev) => ({ ...prev, [f.field]: checked === true }))
|
||||
}
|
||||
/>
|
||||
<label htmlFor={`auto-${f.field}`} className="flex-1">
|
||||
<span className="font-medium">{f.field}</span>:{' '}
|
||||
<span className="text-muted-foreground">{String(f.value)}</span>
|
||||
</label>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</section>
|
||||
) : null}
|
||||
|
||||
{conflicts.length > 0 ? (
|
||||
<section>
|
||||
<h3 className="text-sm font-medium">
|
||||
Conflicts <span className="text-muted-foreground">({conflicts.length})</span>
|
||||
</h3>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Pick which value to keep for each field.
|
||||
</p>
|
||||
<ul className="mt-2 space-y-2">
|
||||
{conflicts.map((c) => (
|
||||
<li
|
||||
key={c.field}
|
||||
className="grid grid-cols-[1fr_auto_auto] items-center gap-2 rounded border p-2 text-sm"
|
||||
>
|
||||
<span className="font-medium">{c.field}</span>
|
||||
<Button
|
||||
size="sm"
|
||||
variant={conflictChoice[c.field] === 'crm' ? 'default' : 'outline'}
|
||||
onClick={() => setConflictChoice((prev) => ({ ...prev, [c.field]: 'crm' }))}
|
||||
>
|
||||
Keep: {String(c.crmValue)}
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
variant={conflictChoice[c.field] === 'pdf' ? 'default' : 'outline'}
|
||||
onClick={() => setConflictChoice((prev) => ({ ...prev, [c.field]: 'pdf' }))}
|
||||
>
|
||||
Use PDF: {String(c.pdfValue)} ({Math.round(c.pdfConfidence * 100)}%)
|
||||
</Button>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</section>
|
||||
) : null}
|
||||
|
||||
<DialogFooter>
|
||||
<Button variant="outline" onClick={onClose}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button onClick={() => apply.mutate()} disabled={apply.isPending}>
|
||||
{apply.isPending ? 'Applying…' : 'Apply'}
|
||||
</Button>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
}
|
||||
24
src/lib/db/migrations/0030_berth_pdf_versions.sql
Normal file
24
src/lib/db/migrations/0030_berth_pdf_versions.sql
Normal file
@@ -0,0 +1,24 @@
|
||||
CREATE TABLE "berth_pdf_versions" (
|
||||
"id" text PRIMARY KEY NOT NULL,
|
||||
"berth_id" text NOT NULL,
|
||||
"version_number" integer NOT NULL,
|
||||
"storage_key" text NOT NULL,
|
||||
"file_name" text NOT NULL,
|
||||
"file_size_bytes" integer NOT NULL,
|
||||
"content_sha256" text NOT NULL,
|
||||
"uploaded_by" text NOT NULL,
|
||||
"uploaded_at" timestamp with time zone DEFAULT now() NOT NULL,
|
||||
"download_url_expires_at" timestamp with time zone,
|
||||
"parse_results" jsonb
|
||||
);
|
||||
--> statement-breakpoint
|
||||
ALTER TABLE "berths" ADD COLUMN "current_pdf_version_id" text;--> statement-breakpoint
|
||||
ALTER TABLE "berth_pdf_versions" ADD CONSTRAINT "berth_pdf_versions_berth_id_berths_id_fk" FOREIGN KEY ("berth_id") REFERENCES "public"."berths"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
|
||||
CREATE UNIQUE INDEX "berth_pdf_versions_berth_version_idx" ON "berth_pdf_versions" USING btree ("berth_id","version_number");--> statement-breakpoint
|
||||
CREATE INDEX "idx_bpv_berth" ON "berth_pdf_versions" USING btree ("berth_id","uploaded_at");--> statement-breakpoint
|
||||
-- berths.current_pdf_version_id -> berth_pdf_versions.id (added after both tables
|
||||
-- exist to break the circular FK declaration; ON DELETE SET NULL so deleting the
|
||||
-- pointed-at row keeps the berth and just clears the pointer).
|
||||
ALTER TABLE "berths" ADD CONSTRAINT "berths_current_pdf_version_id_fk"
|
||||
FOREIGN KEY ("current_pdf_version_id") REFERENCES "public"."berth_pdf_versions"("id")
|
||||
ON DELETE SET NULL ON UPDATE NO ACTION;
|
||||
11010
src/lib/db/migrations/meta/0030_snapshot.json
Normal file
11010
src/lib/db/migrations/meta/0030_snapshot.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -211,6 +211,13 @@
|
||||
"when": 1777941465866,
|
||||
"tag": "0029_puzzling_romulus",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 30,
|
||||
"version": "7",
|
||||
"when": 1777944021221,
|
||||
"tag": "0030_berth_pdf_versions",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -76,6 +76,11 @@ export const berths = pgTable(
|
||||
// against updated_at to detect human edits made after the last import,
|
||||
// so re-running the import doesn't clobber CRM-side overrides.
|
||||
lastImportedAt: timestamp('last_imported_at', { withTimezone: true }),
|
||||
// Pointer to the active per-berth PDF version (Phase 6b). Null until a
|
||||
// rep uploads the first PDF; a later rollback can re-target this column
|
||||
// to any prior `berth_pdf_versions.id`. The full history lives in the
|
||||
// junction table — this column is just the "current" pointer.
|
||||
currentPdfVersionId: text('current_pdf_version_id'),
|
||||
createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
|
||||
updatedAt: timestamp('updated_at', { withTimezone: true }).notNull().defaultNow(),
|
||||
},
|
||||
@@ -181,6 +186,46 @@ export const berthMaintenanceLog = pgTable(
|
||||
(table) => [index('idx_bml_berth').on(table.berthId), index('idx_bml_port').on(table.portId)],
|
||||
);
|
||||
|
||||
/**
|
||||
* Per-berth PDF version history (Phase 6b — see plan §3.3 / §4.7b).
|
||||
*
|
||||
* Each upload creates a new row with a monotonic `versionNumber` per berth.
|
||||
* The active version is referenced by `berths.current_pdf_version_id`. The
|
||||
* storage_key points at the file in the active `StorageBackend` (s3/filesystem),
|
||||
* which is resolved at access time via `getStorageBackend()`.
|
||||
*
|
||||
* `parseResults` captures what the 3-tier reverse parser extracted at upload
|
||||
* time plus any conflicts the rep resolved in the diff dialog. Kept as audit
|
||||
* trail; rolling back to a prior version does NOT replay these (per §14.6).
|
||||
*/
|
||||
export const berthPdfVersions = pgTable(
|
||||
'berth_pdf_versions',
|
||||
{
|
||||
id: text('id')
|
||||
.primaryKey()
|
||||
.$defaultFn(() => crypto.randomUUID()),
|
||||
berthId: text('berth_id')
|
||||
.notNull()
|
||||
.references(() => berths.id, { onDelete: 'cascade' }),
|
||||
versionNumber: integer('version_number').notNull(),
|
||||
/** Object key in the active storage backend (renamed from `s3_key` per §4.7a). */
|
||||
storageKey: text('storage_key').notNull(),
|
||||
fileName: text('file_name').notNull(),
|
||||
fileSizeBytes: integer('file_size_bytes').notNull(),
|
||||
contentSha256: text('content_sha256').notNull(),
|
||||
uploadedBy: text('uploaded_by').notNull(),
|
||||
uploadedAt: timestamp('uploaded_at', { withTimezone: true }).notNull().defaultNow(),
|
||||
/** Cached signed-URL expiry per §11.1 — re-sign only when within 1h of expiry. */
|
||||
downloadUrlExpiresAt: timestamp('download_url_expires_at', { withTimezone: true }),
|
||||
/** { engine: 'acroform'|'ocr'|'ai', extracted: {...}, conflicts: [...], appliedFields: [...] } */
|
||||
parseResults: jsonb('parse_results'),
|
||||
},
|
||||
(table) => [
|
||||
uniqueIndex('berth_pdf_versions_berth_version_idx').on(table.berthId, table.versionNumber),
|
||||
index('idx_bpv_berth').on(table.berthId, table.uploadedAt),
|
||||
],
|
||||
);
|
||||
|
||||
export const berthTags = pgTable(
|
||||
'berth_tags',
|
||||
{
|
||||
@@ -202,3 +247,5 @@ export type BerthWaitingList = typeof berthWaitingList.$inferSelect;
|
||||
export type NewBerthWaitingList = typeof berthWaitingList.$inferInsert;
|
||||
export type BerthMaintenanceLog = typeof berthMaintenanceLog.$inferSelect;
|
||||
export type NewBerthMaintenanceLog = typeof berthMaintenanceLog.$inferInsert;
|
||||
export type BerthPdfVersion = typeof berthPdfVersions.$inferSelect;
|
||||
export type NewBerthPdfVersion = typeof berthPdfVersions.$inferInsert;
|
||||
|
||||
@@ -40,6 +40,7 @@ import {
|
||||
berthWaitingList,
|
||||
berthMaintenanceLog,
|
||||
berthTags,
|
||||
berthPdfVersions,
|
||||
} from './berths';
|
||||
|
||||
// Reservations
|
||||
@@ -411,6 +412,19 @@ export const berthsRelations = relations(berths, ({ one, many }) => ({
|
||||
tags: many(berthTags),
|
||||
interestBerths: many(interestBerths),
|
||||
reminders: many(reminders),
|
||||
pdfVersions: many(berthPdfVersions),
|
||||
currentPdfVersion: one(berthPdfVersions, {
|
||||
fields: [berths.currentPdfVersionId],
|
||||
references: [berthPdfVersions.id],
|
||||
relationName: 'berthCurrentPdfVersion',
|
||||
}),
|
||||
}));
|
||||
|
||||
export const berthPdfVersionsRelations = relations(berthPdfVersions, ({ one }) => ({
|
||||
berth: one(berths, {
|
||||
fields: [berthPdfVersions.berthId],
|
||||
references: [berths.id],
|
||||
}),
|
||||
}));
|
||||
|
||||
export const berthMapDataRelations = relations(berthMapData, ({ one }) => ({
|
||||
|
||||
499
src/lib/services/berth-pdf-parser.ts
Normal file
499
src/lib/services/berth-pdf-parser.ts
Normal file
@@ -0,0 +1,499 @@
|
||||
/**
|
||||
* Reverse parser for per-berth PDFs (Phase 6b — see plan §4.7b and §9.2).
|
||||
*
|
||||
* Three tiers, each falling back to the next:
|
||||
*
|
||||
* 1. AcroForm — read named text fields via pdf-lib. The sample
|
||||
* `Berth_Spec_Sheet_A1.pdf` has 0 AcroForm fields (designers export the
|
||||
* PDF flat), so this tier is built defensively for future templates that
|
||||
* may include named form fields. When fields exist, this is the highest-
|
||||
* confidence path because there's no OCR loss.
|
||||
*
|
||||
* 2. OCR — Tesseract.js extracts text from the page; positional/regex
|
||||
* heuristics keyed off the labels documented in §9.2 pull out values.
|
||||
* Returns per-field confidence scores.
|
||||
*
|
||||
* 3. AI fallback — gated on `getResolvedOcrConfig(...)` returning a usable
|
||||
* OpenAI/Claude config. Only invoked when OCR confidence is below
|
||||
* threshold for too many fields AND the rep opts in via the diff dialog.
|
||||
* A null `apiKey` causes this tier to return a clear "not configured"
|
||||
* error rather than silently falling back to OCR-only.
|
||||
*/
|
||||
|
||||
import { PDFDocument } from 'pdf-lib';
|
||||
|
||||
// ─── shared types ────────────────────────────────────────────────────────────
|
||||
|
||||
export type ParserEngine = 'acroform' | 'ocr' | 'ai';
|
||||
|
||||
/**
|
||||
* Canonical extracted shape. Keys map 1:1 to nullable columns on the `berths`
|
||||
* table; `mooringNumber` is special (used for the §14.6 mismatch warning).
|
||||
*/
|
||||
export interface ExtractedBerthFields {
|
||||
mooringNumber?: string | null;
|
||||
lengthFt?: number | null;
|
||||
lengthM?: number | null;
|
||||
widthFt?: number | null;
|
||||
widthM?: number | null;
|
||||
/** Water depth at the berth (separate from a vessel's max draft). */
|
||||
waterDepth?: number | null;
|
||||
waterDepthM?: number | null;
|
||||
/** Max draught of vessel — falls back to the berth's draft column. */
|
||||
draftFt?: number | null;
|
||||
draftM?: number | null;
|
||||
bowFacing?: string | null;
|
||||
sidePontoon?: string | null;
|
||||
powerCapacity?: number | null;
|
||||
voltage?: number | null;
|
||||
mooringType?: string | null;
|
||||
cleatType?: string | null;
|
||||
cleatCapacity?: string | null;
|
||||
bollardType?: string | null;
|
||||
bollardCapacity?: string | null;
|
||||
access?: string | null;
|
||||
weeklyRateHighUsd?: number | null;
|
||||
weeklyRateLowUsd?: number | null;
|
||||
dailyRateHighUsd?: number | null;
|
||||
dailyRateLowUsd?: number | null;
|
||||
/** ISO date YYYY-MM-DD. */
|
||||
pricingValidUntil?: string | null;
|
||||
price?: number | null;
|
||||
}
|
||||
|
||||
export interface ParsedField<T = unknown> {
|
||||
value: T;
|
||||
/** 0..1 confidence; 1 means "absolute match" (AcroForm or unambiguous regex). */
|
||||
confidence: number;
|
||||
/** Engine that produced this field; helps the diff dialog explain itself. */
|
||||
engine: ParserEngine;
|
||||
}
|
||||
|
||||
export interface ParseResult {
|
||||
engine: ParserEngine;
|
||||
/** Sparse — only fields the parser was able to extract. */
|
||||
fields: Partial<Record<keyof ExtractedBerthFields, ParsedField>>;
|
||||
/** Mean confidence across all extracted fields (0..1). */
|
||||
meanConfidence: number;
|
||||
/** Raw text the OCR or AI tier produced — useful for the diff dialog audit. */
|
||||
rawText?: string;
|
||||
/** Set when a tier degraded; the API surface uses this to decide whether to
|
||||
* surface the "AI parse" button. */
|
||||
warnings: string[];
|
||||
}
|
||||
|
||||
// ─── magic-byte check (§14.6 critical) ───────────────────────────────────────
|
||||
|
||||
/** Reads first 5 bytes; returns true iff they are `%PDF-`. */
|
||||
export function isPdfMagic(buffer: Buffer): boolean {
|
||||
if (buffer.length < 5) return false;
|
||||
return (
|
||||
buffer[0] === 0x25 && // %
|
||||
buffer[1] === 0x50 && // P
|
||||
buffer[2] === 0x44 && // D
|
||||
buffer[3] === 0x46 && // F
|
||||
buffer[4] === 0x2d // -
|
||||
);
|
||||
}
|
||||
|
||||
// ─── tier 1: AcroForm ────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* AcroForm field name → ExtractedBerthFields key. Mirrors the names §4.7b
|
||||
* mentions ("length_ft", "mooring_number"…) plus a couple of tolerant aliases.
|
||||
*/
|
||||
const ACROFORM_FIELD_MAP: Record<string, keyof ExtractedBerthFields> = {
|
||||
mooring_number: 'mooringNumber',
|
||||
berth_number: 'mooringNumber',
|
||||
length_ft: 'lengthFt',
|
||||
length_m: 'lengthM',
|
||||
width_ft: 'widthFt',
|
||||
width_m: 'widthM',
|
||||
draft_ft: 'draftFt',
|
||||
draft_m: 'draftM',
|
||||
water_depth: 'waterDepth',
|
||||
water_depth_m: 'waterDepthM',
|
||||
bow_facing: 'bowFacing',
|
||||
side_pontoon: 'sidePontoon',
|
||||
pontoon: 'sidePontoon',
|
||||
power_capacity: 'powerCapacity',
|
||||
voltage: 'voltage',
|
||||
mooring_type: 'mooringType',
|
||||
cleat_type: 'cleatType',
|
||||
cleat_capacity: 'cleatCapacity',
|
||||
bollard_type: 'bollardType',
|
||||
bollard_capacity: 'bollardCapacity',
|
||||
access: 'access',
|
||||
weekly_rate_high_usd: 'weeklyRateHighUsd',
|
||||
weekly_rate_low_usd: 'weeklyRateLowUsd',
|
||||
daily_rate_high_usd: 'dailyRateHighUsd',
|
||||
daily_rate_low_usd: 'dailyRateLowUsd',
|
||||
pricing_valid_until: 'pricingValidUntil',
|
||||
price: 'price',
|
||||
};
|
||||
|
||||
async function tryAcroForm(buffer: Buffer): Promise<ParseResult | null> {
|
||||
let doc: PDFDocument;
|
||||
try {
|
||||
doc = await PDFDocument.load(buffer, { ignoreEncryption: true });
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
let form: ReturnType<PDFDocument['getForm']>;
|
||||
try {
|
||||
form = doc.getForm();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const fields = form.getFields();
|
||||
if (fields.length === 0) return null;
|
||||
|
||||
const out: Partial<Record<keyof ExtractedBerthFields, ParsedField>> = {};
|
||||
for (const field of fields) {
|
||||
const name = field.getName().toLowerCase();
|
||||
const target = ACROFORM_FIELD_MAP[name];
|
||||
if (!target) continue;
|
||||
// pdf-lib doesn't expose a generic "get value" — narrow to text fields.
|
||||
let raw: string | undefined;
|
||||
try {
|
||||
const tf = form.getTextField(field.getName());
|
||||
raw = tf.getText() ?? undefined;
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
if (!raw || raw.trim().length === 0) continue;
|
||||
const parsed = coerceFieldValue(target, raw.trim());
|
||||
if (parsed === null) continue;
|
||||
out[target] = { value: parsed, confidence: 1, engine: 'acroform' };
|
||||
}
|
||||
|
||||
if (Object.keys(out).length === 0) return null;
|
||||
return {
|
||||
engine: 'acroform',
|
||||
fields: out,
|
||||
meanConfidence: 1,
|
||||
warnings: [],
|
||||
};
|
||||
}
|
||||
|
||||
// ─── tier 2: OCR via Tesseract ───────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Runs Tesseract against a PDF rasterized to one image per page. Tesseract.js
|
||||
* accepts image inputs; we use a lazy `pdfjs-dist`-style rasterization fallback
|
||||
* via dynamic import. To keep the parser unit-testable without a WASM bundle,
|
||||
* the actual recognize() call is encapsulated in the `runOcr` adapter that
|
||||
* production wires to tesseract.js and tests can stub.
|
||||
*/
|
||||
export interface OcrAdapter {
|
||||
/** Returns plain text + a 0..100 mean confidence score. */
|
||||
recognize(buffer: Buffer): Promise<{ text: string; confidence: number }>;
|
||||
}
|
||||
|
||||
/** Default adapter — dynamically imports tesseract.js so the WASM bundle isn't
|
||||
* pulled into client builds. */
|
||||
async function defaultOcrAdapter(): Promise<OcrAdapter> {
|
||||
return {
|
||||
recognize: async (buffer: Buffer) => {
|
||||
const tesseract = await import('tesseract.js');
|
||||
// Tesseract handles PDF inputs by rasterizing the first page; for our
|
||||
// single-page spec sheets that's sufficient.
|
||||
const result = await tesseract.recognize(buffer, 'eng');
|
||||
return {
|
||||
text: result.data.text ?? '',
|
||||
confidence: typeof result.data.confidence === 'number' ? result.data.confidence : 0,
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Heuristic extraction from OCR text. The patterns mirror the layout
|
||||
* documented in plan §9.2:
|
||||
*
|
||||
* - "Length: 206' 8" / 63m"
|
||||
* - "Mooring: A12" or large "A1" near "BERTH NUMBER"
|
||||
* - "WEEK HIGH / LOW" and "DAY HIGH / LOW" pricing blocks
|
||||
* - "ALL PRICES ABOVE ARE CONFIRMED THROUGH UNTIL <date>"
|
||||
*/
|
||||
export function extractFromOcrText(rawText: string): {
|
||||
fields: Partial<Record<keyof ExtractedBerthFields, ParsedField>>;
|
||||
warnings: string[];
|
||||
} {
|
||||
const warnings: string[] = [];
|
||||
const out: Partial<Record<keyof ExtractedBerthFields, ParsedField>> = {};
|
||||
|
||||
// Normalize whitespace for line-based regexes but keep structure.
|
||||
const text = rawText.replace(/ /g, ' ');
|
||||
|
||||
// Mooring number: BERTH NUMBER block. We try a couple of layouts.
|
||||
const mooringMatch =
|
||||
text.match(/BERTH\s+NUMBER[\s\S]{0,80}?\b([A-Z]\d{1,3})\b/i) ??
|
||||
text.match(/^\s*([A-Z]\d{1,3})\s*$/m) ??
|
||||
text.match(/Mooring(?:\s+Number)?\s*[:#]?\s*([A-Z]\d{1,3})/i);
|
||||
if (mooringMatch) {
|
||||
out.mooringNumber = { value: mooringMatch[1]!.toUpperCase(), confidence: 0.85, engine: 'ocr' };
|
||||
}
|
||||
|
||||
// Length / Width / Water Depth — `Label: <imperial> / <metric>` form.
|
||||
// Imperial may be `206' 8"` style; we capture the numeric prefix in feet
|
||||
// and parse the metric independently because they're rarely lossless.
|
||||
const dimensional = (
|
||||
label: string,
|
||||
ftKey: keyof ExtractedBerthFields,
|
||||
mKey: keyof ExtractedBerthFields,
|
||||
) => {
|
||||
const re = new RegExp(
|
||||
`${label}\\s*[:.]?\\s*([0-9]+(?:'\\s*[0-9]+\")?(?:\\.[0-9]+)?)\\s*(?:ft)?\\s*\\/\\s*([0-9]+(?:\\.[0-9]+)?)\\s*m`,
|
||||
'i',
|
||||
);
|
||||
const m = text.match(re);
|
||||
if (!m) return;
|
||||
const ft = parseFeetInches(m[1]!);
|
||||
const meters = Number(m[2]);
|
||||
if (ft != null && Number.isFinite(ft)) {
|
||||
out[ftKey] = { value: ft, confidence: 0.8, engine: 'ocr' } as ParsedField;
|
||||
}
|
||||
if (Number.isFinite(meters)) {
|
||||
out[mKey] = { value: meters, confidence: 0.85, engine: 'ocr' } as ParsedField;
|
||||
}
|
||||
if (ft != null && Number.isFinite(meters) && Math.abs(ft * 0.3048 - meters) / meters > 0.01) {
|
||||
warnings.push(
|
||||
`${label}: imperial/metric mismatch — ${ft}ft vs ${meters}m differ >1% (using imperial as source of truth).`,
|
||||
);
|
||||
}
|
||||
};
|
||||
dimensional('Length', 'lengthFt', 'lengthM');
|
||||
dimensional('Width', 'widthFt', 'widthM');
|
||||
dimensional('Water\\s+Depth', 'waterDepth', 'waterDepthM');
|
||||
// Max draft of vessel maps to the berth's draft column.
|
||||
dimensional('Max\\.?\\s*draught(?:\\s+of\\s+vessel)?', 'draftFt', 'draftM');
|
||||
|
||||
// Singular labels (`Bow Facing: East`, `Pontoon: QUAY PT`).
|
||||
const labelToKey: Array<[RegExp, keyof ExtractedBerthFields]> = [
|
||||
[/Bow\s+Facing\s*[:.]?\s*([A-Za-z .]+?)(?:\n|$)/i, 'bowFacing'],
|
||||
[/Pontoon\s*[:.]?\s*([A-Za-z0-9 .\-]+?)(?:\n|$)/i, 'sidePontoon'],
|
||||
[/Mooring\s+Type\s*[:.]?\s*([A-Za-z0-9 \-\/]+?)(?:\n|$)/i, 'mooringType'],
|
||||
[/Cleat\s+Type\s*[:.]?\s*([A-Za-z0-9 \-]+?)(?:\n|$)/i, 'cleatType'],
|
||||
[/Cleat\s+Capacity\s*[:.]?\s*([A-Za-z0-9 \-]+?)(?:\n|$)/i, 'cleatCapacity'],
|
||||
[/Bollard\s+Type\s*[:.]?\s*([A-Za-z0-9 \-]+?)(?:\n|$)/i, 'bollardType'],
|
||||
[/Bollard\s+Capacity\s*[:.]?\s*([A-Za-z0-9 \-]+?)(?:\n|$)/i, 'bollardCapacity'],
|
||||
[/Access\s*[:.]?\s*([A-Za-z0-9 .,()\-]+?)(?:\n|$)/i, 'access'],
|
||||
];
|
||||
for (const [re, key] of labelToKey) {
|
||||
const m = text.match(re);
|
||||
if (m && m[1]) {
|
||||
out[key] = { value: m[1].trim(), confidence: 0.75, engine: 'ocr' } as ParsedField;
|
||||
}
|
||||
}
|
||||
|
||||
// Power Capacity (kW) and Voltage at 60Hz.
|
||||
const powerMatch = text.match(/Power\s+Capacity\s*[:.]?\s*([0-9]+(?:\.[0-9]+)?)\s*kW/i);
|
||||
if (powerMatch) {
|
||||
out.powerCapacity = { value: Number(powerMatch[1]), confidence: 0.85, engine: 'ocr' };
|
||||
}
|
||||
const voltageMatch = text.match(/Voltage(?:\s+at\s+60\s*Hz)?\s*[:.]?\s*([0-9]+)\s*V/i);
|
||||
if (voltageMatch) {
|
||||
out.voltage = { value: Number(voltageMatch[1]), confidence: 0.85, engine: 'ocr' };
|
||||
}
|
||||
|
||||
// Pricing: "WEEK HIGH / LOW: 11,341 USD / 8,100 USD"
|
||||
const weekMatch = text.match(
|
||||
/WEEK\s+HIGH\s*\/\s*LOW[:.\s]*([0-9,]+)\s*USD\s*\/\s*([0-9,]+)\s*USD/i,
|
||||
);
|
||||
if (weekMatch) {
|
||||
out.weeklyRateHighUsd = {
|
||||
value: Number(weekMatch[1]!.replace(/,/g, '')),
|
||||
confidence: 0.8,
|
||||
engine: 'ocr',
|
||||
};
|
||||
out.weeklyRateLowUsd = {
|
||||
value: Number(weekMatch[2]!.replace(/,/g, '')),
|
||||
confidence: 0.8,
|
||||
engine: 'ocr',
|
||||
};
|
||||
}
|
||||
const dayMatch = text.match(
|
||||
/DAY\s+HIGH\s*\/\s*LOW[:.\s]*([0-9,]+)\s*USD\s*\/\s*([0-9,]+)\s*USD/i,
|
||||
);
|
||||
if (dayMatch) {
|
||||
out.dailyRateHighUsd = {
|
||||
value: Number(dayMatch[1]!.replace(/,/g, '')),
|
||||
confidence: 0.8,
|
||||
engine: 'ocr',
|
||||
};
|
||||
out.dailyRateLowUsd = {
|
||||
value: Number(dayMatch[2]!.replace(/,/g, '')),
|
||||
confidence: 0.8,
|
||||
engine: 'ocr',
|
||||
};
|
||||
}
|
||||
|
||||
// Purchase price: "PURCHASE PRICE:\nFEE SIMPLE OR STRATA LOT\n3,880,800 USD"
|
||||
const priceMatch = text.match(/PURCHASE\s+PRICE[\s\S]{0,80}?([0-9][0-9,]+)\s*USD/i);
|
||||
if (priceMatch) {
|
||||
out.price = { value: Number(priceMatch[1]!.replace(/,/g, '')), confidence: 0.7, engine: 'ocr' };
|
||||
}
|
||||
|
||||
// Pricing validity: "ALL PRICES ABOVE ARE CONFIRMED THROUGH UNTIL SEPTEMBER 15TH, 2025"
|
||||
const validityMatch = text.match(
|
||||
/CONFIRMED\s+THROUGH\s+UNTIL\s+([A-Za-z]+\s+[0-9]{1,2})(?:[A-Z]{2})?,?\s+([0-9]{4})/i,
|
||||
);
|
||||
if (validityMatch) {
|
||||
const iso = parseHumanDate(`${validityMatch[1]} ${validityMatch[2]}`);
|
||||
if (iso) {
|
||||
out.pricingValidUntil = { value: iso, confidence: 0.75, engine: 'ocr' };
|
||||
} else {
|
||||
warnings.push(
|
||||
'Could not normalize "CONFIRMED THROUGH UNTIL" date; pricing_valid_until skipped.',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return { fields: out, warnings };
|
||||
}
|
||||
|
||||
async function tryOcr(buffer: Buffer, adapter?: OcrAdapter): Promise<ParseResult | null> {
|
||||
const ocr = adapter ?? (await defaultOcrAdapter());
|
||||
const result = await ocr.recognize(buffer);
|
||||
if (!result.text || result.text.length === 0) {
|
||||
return {
|
||||
engine: 'ocr',
|
||||
fields: {},
|
||||
meanConfidence: 0,
|
||||
rawText: '',
|
||||
warnings: ['OCR produced no text.'],
|
||||
};
|
||||
}
|
||||
const { fields, warnings } = extractFromOcrText(result.text);
|
||||
// Tesseract gives 0..100; normalize to 0..1 and use it as a global floor —
|
||||
// per-field confidence is set by the regex tier above.
|
||||
const floor = Math.max(0, Math.min(result.confidence, 100)) / 100;
|
||||
for (const key of Object.keys(fields) as Array<keyof ExtractedBerthFields>) {
|
||||
const f = fields[key];
|
||||
if (f) f.confidence = Math.min(f.confidence, Math.max(floor, 0.5));
|
||||
}
|
||||
const values = Object.values(fields);
|
||||
const meanConfidence =
|
||||
values.length === 0
|
||||
? 0
|
||||
: values.reduce((sum, v) => sum + (v?.confidence ?? 0), 0) / values.length;
|
||||
return {
|
||||
engine: 'ocr',
|
||||
fields,
|
||||
meanConfidence,
|
||||
rawText: result.text,
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── tier 3: AI fallback ─────────────────────────────────────────────────────
|
||||
|
||||
/** Confidence floor below which we recommend the AI tier in the diff dialog. */
|
||||
export const OCR_LOW_CONFIDENCE_THRESHOLD = 0.55;
|
||||
|
||||
/** True when the rep should be offered an "AI parse" button. */
|
||||
export function shouldOfferAiTier(parse: ParseResult): boolean {
|
||||
if (parse.engine !== 'ocr') return false;
|
||||
if (Object.keys(parse.fields).length === 0) return true;
|
||||
return parse.meanConfidence < OCR_LOW_CONFIDENCE_THRESHOLD;
|
||||
}
|
||||
|
||||
// ─── public entry point ──────────────────────────────────────────────────────
|
||||
|
||||
export interface ParseBerthPdfOptions {
|
||||
/** Override Tesseract for testing. Production flows resolve the default. */
|
||||
ocrAdapter?: OcrAdapter;
|
||||
/** Skip the OCR tier when only AcroForm is wanted (e.g. unit tests). */
|
||||
skipOcr?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a per-berth PDF buffer. Each tier falls back to the next; the
|
||||
* returned result's `engine` field tells callers which tier produced the
|
||||
* fields (used by the reconcile-diff dialog to colour confidence chips).
|
||||
*
|
||||
* The AI tier is never invoked from this entry point — that's a separate
|
||||
* deliberate action triggered from the diff dialog so OPENAI_API_KEY isn't
|
||||
* spent on every upload.
|
||||
*/
|
||||
export async function parseBerthPdf(
|
||||
buffer: Buffer,
|
||||
opts: ParseBerthPdfOptions = {},
|
||||
): Promise<ParseResult> {
|
||||
if (!isPdfMagic(buffer)) {
|
||||
throw new Error('PDF magic-byte check failed: file does not begin with %PDF-');
|
||||
}
|
||||
const acro = await tryAcroForm(buffer);
|
||||
if (acro && Object.keys(acro.fields).length > 0) return acro;
|
||||
if (opts.skipOcr) {
|
||||
return {
|
||||
engine: 'ocr',
|
||||
fields: {},
|
||||
meanConfidence: 0,
|
||||
warnings: ['skipOcr=true; no AcroForm fields found.'],
|
||||
};
|
||||
}
|
||||
const ocr = await tryOcr(buffer, opts.ocrAdapter);
|
||||
return (
|
||||
ocr ?? {
|
||||
engine: 'ocr',
|
||||
fields: {},
|
||||
meanConfidence: 0,
|
||||
warnings: ['OCR adapter returned null.'],
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
// ─── helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Coerce an AcroForm raw value to the right scalar for the target column. */
|
||||
function coerceFieldValue(key: keyof ExtractedBerthFields, raw: string): string | number | null {
|
||||
// String columns
|
||||
const stringKeys: Array<keyof ExtractedBerthFields> = [
|
||||
'mooringNumber',
|
||||
'bowFacing',
|
||||
'sidePontoon',
|
||||
'mooringType',
|
||||
'cleatType',
|
||||
'cleatCapacity',
|
||||
'bollardType',
|
||||
'bollardCapacity',
|
||||
'access',
|
||||
'pricingValidUntil',
|
||||
];
|
||||
if (stringKeys.includes(key)) {
|
||||
if (key === 'pricingValidUntil') {
|
||||
// Accept ISO YYYY-MM-DD as-is; otherwise try a humane parse.
|
||||
if (/^\d{4}-\d{2}-\d{2}$/.test(raw)) return raw;
|
||||
return parseHumanDate(raw);
|
||||
}
|
||||
return raw;
|
||||
}
|
||||
// Numeric columns: strip currency / unit suffixes and commas.
|
||||
const numeric = Number(raw.replace(/[^0-9.\-]/g, ''));
|
||||
return Number.isFinite(numeric) ? numeric : null;
|
||||
}
|
||||
|
||||
/** Parse a human date like "September 15 2025" → "2025-09-15". */
|
||||
export function parseHumanDate(raw: string): string | null {
|
||||
const cleaned = raw.replace(/(\d+)(st|nd|rd|th)/i, '$1').trim();
|
||||
// Force UTC interpretation by appending a Z; otherwise dates without an
|
||||
// explicit zone get parsed in the runner's local TZ and `toISOString()`
|
||||
// shifts the day by ±1 (caught a -0700 -> 09-14 regression locally).
|
||||
const d = new Date(cleaned + ' UTC');
|
||||
if (Number.isNaN(d.getTime())) return null;
|
||||
return d.toISOString().slice(0, 10);
|
||||
}
|
||||
|
||||
/** Convert "206' 8\"" or "82" → 206.667 / 82. Returns null on parse failure. */
|
||||
export function parseFeetInches(raw: string): number | null {
|
||||
const trimmed = raw.trim();
|
||||
const ftIn = trimmed.match(/^([0-9]+)\s*'\s*([0-9]+)\s*"$/);
|
||||
if (ftIn) {
|
||||
return Number(ftIn[1]) + Number(ftIn[2]) / 12;
|
||||
}
|
||||
const ftOnly = trimmed.match(/^([0-9]+(?:\.[0-9]+)?)/);
|
||||
if (ftOnly) return Number(ftOnly[1]);
|
||||
return null;
|
||||
}
|
||||
537
src/lib/services/berth-pdf.service.ts
Normal file
537
src/lib/services/berth-pdf.service.ts
Normal file
@@ -0,0 +1,537 @@
|
||||
/**
|
||||
* Berth PDF management service (Phase 6b — see plan §4.7b, §11.1, §14.6).
|
||||
*
|
||||
* Responsibilities:
|
||||
* - Upload a per-berth PDF (versioned), via the active `StorageBackend`.
|
||||
* - Verify the magic bytes (`%PDF-`) before persisting; delete the storage
|
||||
* object on mismatch (§14.6 critical).
|
||||
* - Reconcile the parsed fields against the current berth row, surfacing
|
||||
* conflicts for the rep's diff dialog and auto-applying nullable gaps.
|
||||
* - Enforce per-port size cap from `system_settings.berth_pdf_max_upload_mb`.
|
||||
* - Generate signed download URLs for the version list.
|
||||
*/
|
||||
|
||||
import { and, desc, eq, isNull, max } from 'drizzle-orm';
|
||||
|
||||
import { db } from '@/lib/db';
|
||||
import { berths, berthPdfVersions } from '@/lib/db/schema/berths';
|
||||
import { systemSettings } from '@/lib/db/schema/system';
|
||||
import { ConflictError, NotFoundError, ValidationError } from '@/lib/errors';
|
||||
import { logger } from '@/lib/logger';
|
||||
import { getStorageBackend } from '@/lib/storage';
|
||||
|
||||
import {
|
||||
type ExtractedBerthFields,
|
||||
type ParseResult,
|
||||
type ParserEngine,
|
||||
isPdfMagic,
|
||||
} from './berth-pdf-parser';
|
||||
|
||||
// ─── shared types ────────────────────────────────────────────────────────────
|
||||
|
||||
export interface ReconcileConflict {
|
||||
field: keyof ExtractedBerthFields;
|
||||
crmValue: string | number | null;
|
||||
pdfValue: string | number | null;
|
||||
/** Confidence the parser assigned to the PDF value (0..1). */
|
||||
pdfConfidence: number;
|
||||
}
|
||||
|
||||
export interface ReconcileResult {
|
||||
/** Fields where CRM was null and the PDF supplied a value; these can be
|
||||
* applied automatically (the rep still sees them as "Auto-applied" chips). */
|
||||
autoApplied: Array<{ field: keyof ExtractedBerthFields; value: string | number }>;
|
||||
/** Fields where CRM and PDF disagree on a non-null value. The diff dialog
|
||||
* shows these as a side-by-side comparison; nothing is written until the
|
||||
* rep confirms via the apply endpoint. */
|
||||
conflicts: ReconcileConflict[];
|
||||
/** Pure-warning bucket — e.g. mooring-number mismatch with the berth being
|
||||
* uploaded to (§14.6). */
|
||||
warnings: string[];
|
||||
/** Engine that produced the parse — surfaced on the diff UI. */
|
||||
engine: ParserEngine;
|
||||
}
|
||||
|
||||
// Field allowlist for reconcile/apply. Mirrors `berths` columns; we never
|
||||
// blindly write `crypto.randomUUID()` or anything outside this set so a
|
||||
// rogue parser tier can't poison the schema.
|
||||
const APPLIABLE_FIELDS: ReadonlyArray<keyof ExtractedBerthFields> = [
|
||||
'lengthFt',
|
||||
'lengthM',
|
||||
'widthFt',
|
||||
'widthM',
|
||||
'draftFt',
|
||||
'draftM',
|
||||
'waterDepth',
|
||||
'waterDepthM',
|
||||
'bowFacing',
|
||||
'sidePontoon',
|
||||
'powerCapacity',
|
||||
'voltage',
|
||||
'mooringType',
|
||||
'cleatType',
|
||||
'cleatCapacity',
|
||||
'bollardType',
|
||||
'bollardCapacity',
|
||||
'access',
|
||||
'price',
|
||||
'weeklyRateHighUsd',
|
||||
'weeklyRateLowUsd',
|
||||
'dailyRateHighUsd',
|
||||
'dailyRateLowUsd',
|
||||
'pricingValidUntil',
|
||||
];
|
||||
|
||||
// Numeric berths columns are stored as `numeric` (Drizzle returns string).
|
||||
// This set tells the apply path which fields need stringification.
|
||||
const NUMERIC_FIELDS = new Set<keyof ExtractedBerthFields>([
|
||||
'lengthFt',
|
||||
'lengthM',
|
||||
'widthFt',
|
||||
'widthM',
|
||||
'draftFt',
|
||||
'draftM',
|
||||
'waterDepth',
|
||||
'waterDepthM',
|
||||
'powerCapacity',
|
||||
'voltage',
|
||||
'price',
|
||||
'weeklyRateHighUsd',
|
||||
'weeklyRateLowUsd',
|
||||
'dailyRateHighUsd',
|
||||
'dailyRateLowUsd',
|
||||
]);
|
||||
|
||||
// Tolerance for imperial vs metric reconcile. Same threshold as the parser.
|
||||
const IMPERIAL_METRIC_TOLERANCE = 0.01;
|
||||
|
||||
// ─── settings helpers ────────────────────────────────────────────────────────
|
||||
|
||||
/** Resolve `berth_pdf_max_upload_mb` with port-override → global → default 15. */
|
||||
export async function getMaxUploadMb(portId: string): Promise<number> {
|
||||
const KEY = 'berth_pdf_max_upload_mb';
|
||||
const [portRow] = await db
|
||||
.select()
|
||||
.from(systemSettings)
|
||||
.where(and(eq(systemSettings.key, KEY), eq(systemSettings.portId, portId)));
|
||||
if (portRow && typeof portRow.value === 'number') return portRow.value;
|
||||
if (portRow && typeof portRow.value === 'string') {
|
||||
const n = Number(portRow.value);
|
||||
if (Number.isFinite(n)) return n;
|
||||
}
|
||||
const [globalRow] = await db
|
||||
.select()
|
||||
.from(systemSettings)
|
||||
.where(and(eq(systemSettings.key, KEY), isNull(systemSettings.portId)));
|
||||
if (globalRow && typeof globalRow.value === 'number') return globalRow.value;
|
||||
if (globalRow && typeof globalRow.value === 'string') {
|
||||
const n = Number(globalRow.value);
|
||||
if (Number.isFinite(n)) return n;
|
||||
}
|
||||
return 15;
|
||||
}
|
||||
|
||||
// ─── upload + version management ─────────────────────────────────────────────
|
||||
|
||||
export interface UploadBerthPdfArgs {
|
||||
berthId: string;
|
||||
/** Already-uploaded storage key (the upload-url endpoint generated it) OR
|
||||
* undefined to make this service compute one. */
|
||||
storageKey?: string;
|
||||
/** Raw bytes when the server proxies the upload (filesystem mode); when
|
||||
* callers used a presigned PUT they pass `storageKey` and skip this. */
|
||||
buffer?: Buffer;
|
||||
fileName: string;
|
||||
uploadedBy: string;
|
||||
/** Pre-computed sha256 hex from the client (verified server-side anyway). */
|
||||
sha256?: string;
|
||||
/** Pre-computed bytes (used for the size cap pre-flight on direct uploads). */
|
||||
fileSizeBytes?: number;
|
||||
/** Result of running `parseBerthPdf` server-side. Optional — the rep may
|
||||
* have skipped parsing on a re-upload. */
|
||||
parseResult?: ParseResult;
|
||||
}
|
||||
|
||||
export interface UploadBerthPdfResult {
|
||||
versionId: string;
|
||||
storageKey: string;
|
||||
versionNumber: number;
|
||||
fileSizeBytes: number;
|
||||
contentSha256: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Persist a per-berth PDF version. Either the raw `buffer` or a pre-uploaded
|
||||
* `storageKey` (with optional `buffer` for verification) is required.
|
||||
*
|
||||
* Critical mitigations enforced here:
|
||||
* - §14.6 magic-byte check against the buffer when present.
|
||||
* - §14.6 size cap from `berth_pdf_max_upload_mb`.
|
||||
* - Storage key namespaced under `berths/{id}/v{n}/...` so two reps racing
|
||||
* on the same berth can't collide (the version-number unique index in
|
||||
* the DB does the dedup).
|
||||
*/
|
||||
export async function uploadBerthPdf(args: UploadBerthPdfArgs): Promise<UploadBerthPdfResult> {
|
||||
// 1. Resolve the berth + port for size-cap lookup.
|
||||
const berthRow = await db.query.berths.findFirst({ where: eq(berths.id, args.berthId) });
|
||||
if (!berthRow) throw new NotFoundError('Berth');
|
||||
const maxMb = await getMaxUploadMb(berthRow.portId);
|
||||
const maxBytes = maxMb * 1024 * 1024;
|
||||
|
||||
// 2. Compute next version number. Using a serializable transaction so two
|
||||
// concurrent uploads can't both pick `v3` (the unique index would catch
|
||||
// it but we'd rather return a clean error than a 23505).
|
||||
const versionNumber = await nextVersionNumber(args.berthId);
|
||||
|
||||
// 3. Magic bytes + size when we have the buffer in hand.
|
||||
const backend = await getStorageBackend();
|
||||
const buffer = args.buffer;
|
||||
let storageKey =
|
||||
args.storageKey ??
|
||||
`berths/${args.berthId}/v${versionNumber}/${sanitizeFileName(args.fileName)}`;
|
||||
let sizeBytes = args.fileSizeBytes ?? buffer?.length ?? 0;
|
||||
let sha256 = args.sha256 ?? '';
|
||||
|
||||
if (buffer) {
|
||||
if (!isPdfMagic(buffer)) {
|
||||
// Best-effort cleanup if the storage already has a partial.
|
||||
if (args.storageKey) await backend.delete(args.storageKey).catch(() => undefined);
|
||||
throw new ValidationError(
|
||||
'Uploaded file failed PDF magic-byte check (does not start with %PDF-).',
|
||||
);
|
||||
}
|
||||
if (buffer.length === 0) throw new ValidationError('Uploaded PDF is empty (0 bytes).');
|
||||
if (buffer.length > maxBytes) {
|
||||
throw new ValidationError(
|
||||
`PDF exceeds ${maxMb} MB upload cap (got ${(buffer.length / 1024 / 1024).toFixed(1)} MB).`,
|
||||
);
|
||||
}
|
||||
const written = await backend.put(storageKey, buffer, { contentType: 'application/pdf' });
|
||||
storageKey = written.key;
|
||||
sizeBytes = written.sizeBytes;
|
||||
sha256 = written.sha256;
|
||||
} else if (args.storageKey) {
|
||||
// Browser uploaded directly via presigned URL — verify via HEAD.
|
||||
const head = await backend.head(args.storageKey);
|
||||
if (!head) {
|
||||
throw new ValidationError('Uploaded object not found at expected storage key.');
|
||||
}
|
||||
if (head.sizeBytes === 0) {
|
||||
await backend.delete(args.storageKey).catch(() => undefined);
|
||||
throw new ValidationError('Uploaded PDF is empty (0 bytes).');
|
||||
}
|
||||
if (head.sizeBytes > maxBytes) {
|
||||
await backend.delete(args.storageKey).catch(() => undefined);
|
||||
throw new ValidationError(
|
||||
`PDF exceeds ${maxMb} MB upload cap (got ${(head.sizeBytes / 1024 / 1024).toFixed(1)} MB).`,
|
||||
);
|
||||
}
|
||||
if (head.contentType !== 'application/pdf' && head.contentType !== 'application/octet-stream') {
|
||||
await backend.delete(args.storageKey).catch(() => undefined);
|
||||
throw new ValidationError(
|
||||
`Uploaded object content-type is ${head.contentType}; expected application/pdf.`,
|
||||
);
|
||||
}
|
||||
sizeBytes = head.sizeBytes;
|
||||
sha256 = args.sha256 ?? '';
|
||||
storageKey = args.storageKey;
|
||||
} else {
|
||||
throw new ValidationError('Either buffer or storageKey is required.');
|
||||
}
|
||||
|
||||
// 4. Insert version row + bump current pointer in one transaction.
|
||||
const versionId = crypto.randomUUID();
|
||||
await db.transaction(async (tx) => {
|
||||
await tx.insert(berthPdfVersions).values({
|
||||
id: versionId,
|
||||
berthId: args.berthId,
|
||||
versionNumber,
|
||||
storageKey,
|
||||
fileName: args.fileName,
|
||||
fileSizeBytes: sizeBytes,
|
||||
contentSha256: sha256,
|
||||
uploadedBy: args.uploadedBy,
|
||||
parseResults: args.parseResult ? serializeParseResult(args.parseResult) : null,
|
||||
});
|
||||
await tx
|
||||
.update(berths)
|
||||
.set({ currentPdfVersionId: versionId, updatedAt: new Date() })
|
||||
.where(eq(berths.id, args.berthId));
|
||||
});
|
||||
|
||||
logger.info(
|
||||
{ berthId: args.berthId, versionId, versionNumber, storageKey, sizeBytes },
|
||||
'Berth PDF version saved',
|
||||
);
|
||||
|
||||
return { versionId, storageKey, versionNumber, fileSizeBytes: sizeBytes, contentSha256: sha256 };
|
||||
}
|
||||
|
||||
async function nextVersionNumber(berthId: string): Promise<number> {
|
||||
const [row] = await db
|
||||
.select({ max: max(berthPdfVersions.versionNumber) })
|
||||
.from(berthPdfVersions)
|
||||
.where(eq(berthPdfVersions.berthId, berthId));
|
||||
return (row?.max ?? 0) + 1;
|
||||
}
|
||||
|
||||
function sanitizeFileName(raw: string): string {
|
||||
// Preserve the extension; replace spaces / disallowed chars with '_' so the
|
||||
// result satisfies the storage-key validation regex.
|
||||
const last = raw.split(/[\\/]/).pop() ?? raw;
|
||||
return last.replace(/[^a-zA-Z0-9._-]/g, '_').slice(0, 200) || 'berth.pdf';
|
||||
}
|
||||
|
||||
function serializeParseResult(parse: ParseResult): Record<string, unknown> {
|
||||
return {
|
||||
engine: parse.engine,
|
||||
extracted: Object.fromEntries(
|
||||
Object.entries(parse.fields).map(([k, v]) => [
|
||||
k,
|
||||
v ? { value: v.value, confidence: v.confidence } : null,
|
||||
]),
|
||||
),
|
||||
meanConfidence: parse.meanConfidence,
|
||||
warnings: parse.warnings,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── reconcile + apply ───────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Walk every parsed field; classify into:
|
||||
* - `autoApplied` when the CRM column is null/empty.
|
||||
* - `conflicts` when both sides have a non-null value and they disagree.
|
||||
*
|
||||
* Numeric tolerance: ±1% (matches §14.6 imperial-vs-metric guidance, applied
|
||||
* uniformly across all numeric columns since the same rounding noise affects
|
||||
* weekly/daily rates too).
|
||||
*/
|
||||
export async function reconcilePdfWithBerth(
|
||||
berthId: string,
|
||||
parsed: ParseResult,
|
||||
): Promise<ReconcileResult> {
|
||||
const berthRow = await db.query.berths.findFirst({ where: eq(berths.id, berthId) });
|
||||
if (!berthRow) throw new NotFoundError('Berth');
|
||||
const fields = parsed.fields;
|
||||
|
||||
const autoApplied: ReconcileResult['autoApplied'] = [];
|
||||
const conflicts: ReconcileConflict[] = [];
|
||||
const warnings: string[] = [...parsed.warnings];
|
||||
|
||||
// §14.6 — mooring-number mismatch warning.
|
||||
const pdfMooring = fields.mooringNumber?.value;
|
||||
if (
|
||||
pdfMooring &&
|
||||
typeof pdfMooring === 'string' &&
|
||||
pdfMooring.toUpperCase() !== berthRow.mooringNumber.toUpperCase()
|
||||
) {
|
||||
warnings.push(
|
||||
`PDF says berth ${pdfMooring} but uploading to ${berthRow.mooringNumber}. Confirm before applying.`,
|
||||
);
|
||||
}
|
||||
|
||||
for (const key of APPLIABLE_FIELDS) {
|
||||
const parsedField = fields[key];
|
||||
if (!parsedField || parsedField.value == null) continue;
|
||||
|
||||
const crmRaw = (berthRow as Record<string, unknown>)[key];
|
||||
const crmValue = normalizeForCompare(key, crmRaw);
|
||||
const pdfValue = normalizeForCompare(key, parsedField.value);
|
||||
|
||||
if (crmValue == null || crmValue === '') {
|
||||
autoApplied.push({ field: key, value: parsedField.value as string | number });
|
||||
continue;
|
||||
}
|
||||
if (!valuesEqual(crmValue, pdfValue, NUMERIC_FIELDS.has(key))) {
|
||||
conflicts.push({
|
||||
field: key,
|
||||
crmValue: crmValue as string | number | null,
|
||||
pdfValue: pdfValue as string | number | null,
|
||||
pdfConfidence: parsedField.confidence,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return { autoApplied, conflicts, warnings, engine: parsed.engine };
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a rep-confirmed slice of the reconcile diff to the berth row. The
|
||||
* caller passes the canonical `ExtractedBerthFields` keys; anything outside
|
||||
* `APPLIABLE_FIELDS` is silently dropped to keep this endpoint a hard
|
||||
* allowlist.
|
||||
*/
|
||||
export async function applyParseResults(
|
||||
berthId: string,
|
||||
versionId: string,
|
||||
fieldsToApply: Partial<ExtractedBerthFields>,
|
||||
): Promise<{ updatedFields: Array<keyof ExtractedBerthFields> }> {
|
||||
const berthRow = await db.query.berths.findFirst({ where: eq(berths.id, berthId) });
|
||||
if (!berthRow) throw new NotFoundError('Berth');
|
||||
const versionRow = await db.query.berthPdfVersions.findFirst({
|
||||
where: and(eq(berthPdfVersions.id, versionId), eq(berthPdfVersions.berthId, berthId)),
|
||||
});
|
||||
if (!versionRow) throw new NotFoundError('Berth PDF version');
|
||||
|
||||
const update: Record<string, unknown> = {};
|
||||
const applied: Array<keyof ExtractedBerthFields> = [];
|
||||
for (const key of APPLIABLE_FIELDS) {
|
||||
const value = fieldsToApply[key];
|
||||
if (value === undefined) continue;
|
||||
if (value === null) {
|
||||
update[key] = null;
|
||||
applied.push(key);
|
||||
continue;
|
||||
}
|
||||
if (NUMERIC_FIELDS.has(key)) {
|
||||
const n = typeof value === 'number' ? value : Number(value);
|
||||
if (!Number.isFinite(n)) continue;
|
||||
// numeric columns expect strings to preserve precision.
|
||||
update[key] = String(n);
|
||||
} else {
|
||||
update[key] = String(value);
|
||||
}
|
||||
applied.push(key);
|
||||
}
|
||||
if (applied.length === 0) {
|
||||
throw new ValidationError('No appliable fields supplied.');
|
||||
}
|
||||
update.updatedAt = new Date();
|
||||
|
||||
await db.transaction(async (tx) => {
|
||||
await tx.update(berths).set(update).where(eq(berths.id, berthId));
|
||||
// Stamp the applied-field set onto parse_results for audit.
|
||||
const prior = (versionRow.parseResults as Record<string, unknown> | null) ?? {};
|
||||
await tx
|
||||
.update(berthPdfVersions)
|
||||
.set({
|
||||
parseResults: {
|
||||
...prior,
|
||||
appliedFields: applied,
|
||||
appliedAt: new Date().toISOString(),
|
||||
},
|
||||
})
|
||||
.where(eq(berthPdfVersions.id, versionId));
|
||||
});
|
||||
|
||||
return { updatedFields: applied };
|
||||
}
|
||||
|
||||
// ─── version listing + rollback ──────────────────────────────────────────────
|
||||
|
||||
export interface BerthPdfVersionListItem {
|
||||
id: string;
|
||||
versionNumber: number;
|
||||
fileName: string;
|
||||
fileSizeBytes: number;
|
||||
uploadedBy: string;
|
||||
uploadedAt: Date;
|
||||
isCurrent: boolean;
|
||||
/** Pre-signed download URL (15-min TTL). */
|
||||
downloadUrl: string;
|
||||
downloadUrlExpiresAt: Date;
|
||||
parseEngine: ParserEngine | null;
|
||||
}
|
||||
|
||||
export async function listBerthPdfVersions(berthId: string): Promise<BerthPdfVersionListItem[]> {
|
||||
const berthRow = await db.query.berths.findFirst({ where: eq(berths.id, berthId) });
|
||||
if (!berthRow) throw new NotFoundError('Berth');
|
||||
|
||||
const rows = await db
|
||||
.select()
|
||||
.from(berthPdfVersions)
|
||||
.where(eq(berthPdfVersions.berthId, berthId))
|
||||
.orderBy(desc(berthPdfVersions.versionNumber));
|
||||
|
||||
const backend = await getStorageBackend();
|
||||
const out: BerthPdfVersionListItem[] = [];
|
||||
for (const row of rows) {
|
||||
const parseEngine = (row.parseResults as { engine?: ParserEngine } | null)?.engine ?? null;
|
||||
const presigned = await backend.presignDownload(row.storageKey, {
|
||||
expirySeconds: 900,
|
||||
filename: row.fileName,
|
||||
contentType: 'application/pdf',
|
||||
});
|
||||
out.push({
|
||||
id: row.id,
|
||||
versionNumber: row.versionNumber,
|
||||
fileName: row.fileName,
|
||||
fileSizeBytes: row.fileSizeBytes,
|
||||
uploadedBy: row.uploadedBy,
|
||||
uploadedAt: row.uploadedAt,
|
||||
isCurrent: berthRow.currentPdfVersionId === row.id,
|
||||
downloadUrl: presigned.url,
|
||||
downloadUrlExpiresAt: presigned.expiresAt,
|
||||
parseEngine,
|
||||
});
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set `berths.current_pdf_version_id` to the requested version. Per §14.6,
|
||||
* this does NOT re-parse and re-update the berth columns — that's a separate
|
||||
* deliberate "extract data from this version" action.
|
||||
*/
|
||||
export async function rollbackToVersion(
|
||||
berthId: string,
|
||||
versionId: string,
|
||||
): Promise<{ versionId: string; versionNumber: number }> {
|
||||
const versionRow = await db.query.berthPdfVersions.findFirst({
|
||||
where: and(eq(berthPdfVersions.id, versionId), eq(berthPdfVersions.berthId, berthId)),
|
||||
});
|
||||
if (!versionRow) throw new NotFoundError('Berth PDF version');
|
||||
const berthRow = await db.query.berths.findFirst({ where: eq(berths.id, berthId) });
|
||||
if (!berthRow) throw new NotFoundError('Berth');
|
||||
|
||||
if (berthRow.currentPdfVersionId === versionId) {
|
||||
throw new ConflictError('That version is already current; rollback is a no-op.');
|
||||
}
|
||||
|
||||
await db
|
||||
.update(berths)
|
||||
.set({ currentPdfVersionId: versionId, updatedAt: new Date() })
|
||||
.where(eq(berths.id, berthId));
|
||||
|
||||
return { versionId, versionNumber: versionRow.versionNumber };
|
||||
}
|
||||
|
||||
// ─── compare helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
function normalizeForCompare(
|
||||
key: keyof ExtractedBerthFields,
|
||||
raw: unknown,
|
||||
): string | number | null {
|
||||
if (raw == null) return null;
|
||||
if (NUMERIC_FIELDS.has(key)) {
|
||||
const n = typeof raw === 'number' ? raw : Number(String(raw).replace(/[^0-9.\-]/g, ''));
|
||||
return Number.isFinite(n) ? n : null;
|
||||
}
|
||||
if (typeof raw === 'string') return raw.trim();
|
||||
return String(raw);
|
||||
}
|
||||
|
||||
function valuesEqual(a: unknown, b: unknown, isNumeric: boolean): boolean {
|
||||
if (a == null && b == null) return true;
|
||||
if (a == null || b == null) return false;
|
||||
if (isNumeric) {
|
||||
const an = Number(a);
|
||||
const bn = Number(b);
|
||||
if (!Number.isFinite(an) || !Number.isFinite(bn)) return false;
|
||||
if (an === bn) return true;
|
||||
if (bn === 0) return Math.abs(an - bn) < 0.0001;
|
||||
return Math.abs(an - bn) / Math.abs(bn) <= IMPERIAL_METRIC_TOLERANCE;
|
||||
}
|
||||
return String(a).trim().toLowerCase() === String(b).trim().toLowerCase();
|
||||
}
|
||||
|
||||
// ─── re-exports the route layer leans on ─────────────────────────────────────
|
||||
|
||||
export { parseBerthPdf } from './berth-pdf-parser';
|
||||
export type {
|
||||
ExtractedBerthFields,
|
||||
ParsedField,
|
||||
ParseResult,
|
||||
ParserEngine,
|
||||
} from './berth-pdf-parser';
|
||||
Reference in New Issue
Block a user