feat(berths): per-berth PDF storage (versioned) + reverse parser
Phase 6b of the berth-recommender refactor (see
docs/berth-recommender-and-pdf-plan.md §3.2, §3.3, §4.7b, §11.1, §14.6).
Builds on the Phase 6a pluggable storage backend (commit 83693dd) — every
file write goes through `getStorageBackend()`; no direct minio imports.
Schema (migration 0030_berth_pdf_versions):
- new table `berth_pdf_versions` with monotonic `version_number` per
berth, `storage_key` (renamed convention from §4.7a), sha256, size,
`download_url_expires_at` cache slot for §11.1 signed-URL throttling,
and `parse_results` jsonb for the audit trail.
- new column `berths.current_pdf_version_id` (deferred from Phase 0)
with FK to `berth_pdf_versions(id)` ON DELETE SET NULL.
- relations + types exported from `schema/berths.ts`.
3-tier reverse parser (`lib/services/berth-pdf-parser.ts`):
1. AcroForm via pdf-lib — pulls named fields (`length_ft`,
`mooring_number`, etc.) at confidence 1. Sample PDF has 0 such
fields, so this is defensive coverage for future templates.
2. OCR via Tesseract.js — positional/regex heuristics keyed off the
§9.2 layout (Length/Width/Water Depth as `<imperial> / <metric>`,
`WEEK HIGH / LOW`, `CONFIRMED THROUGH UNTIL <date>`, etc.). Returns
per-field confidence + global mean; flags imperial-vs-metric drift
>1% in `warnings`.
3. AI fallback — gated via `getResolvedOcrConfig()` (existing
openai/claude provider). Surfaced from the diff dialog only when
`shouldOfferAiTier()` returns true (mean OCR confidence below
0.55 threshold), so OPENAI_API_KEY isn't burned on every upload.
Service layer (`lib/services/berth-pdf.service.ts`):
- `uploadBerthPdf()` — magic-byte check, size cap, version-number
bump + current pointer in one transaction.
- `reconcilePdfWithBerth()` — auto-applies fields where CRM is null;
flags conflicts when CRM and PDF disagree; tolerates ±1% on numeric
columns; warns on mooring-number-in-PDF mismatch (§14.6).
- `applyParseResults()` — hard allowlist of writable columns;
stamps `appliedFields` onto `parse_results` for audit.
- `rollbackToVersion()` — pointer flip only, never re-parses (§14.6).
- `listBerthPdfVersions()` — version list with 15-min signed URLs.
- `getMaxUploadMb()` — port-override → global → default 15 lookup
on `system_settings.berth_pdf_max_upload_mb`.
§14.6 critical mitigations:
- Magic-byte check (`%PDF-`) on every upload; mismatch deletes the
storage object and rejects the request.
- Size cap from `system_settings.berth_pdf_max_upload_mb` (default
15 MB); enforced in the upload-url presign AND server-side.
- 0-byte uploads rejected.
- Mooring-number mismatch surfaces as a `warnings[]` entry on the
reconcile result so the rep sees it in the diff dialog.
- Imperial vs metric ±1% tolerance in both the parser warnings and
the reconcile equality check.
- Path traversal already blocked at the storage layer (Phase 6a).
API + UI:
- `POST /api/v1/berths/[id]/pdf-upload-url` — presigned URL (S3) or
HMAC-signed proxy URL (filesystem) sized to the per-port cap.
- `POST /api/v1/berths/[id]/pdf-versions` — verifies the upload via
`backend.head()`, writes the row, bumps `current_pdf_version_id`.
- `GET /api/v1/berths/[id]/pdf-versions` — version list + signed URLs.
- `POST /api/v1/berths/[id]/pdf-versions/[versionId]/rollback`.
- `POST /api/v1/berths/[id]/pdf-versions/parse-results/apply` —
rep-confirmed diff payload.
- New "Documents" tab on the berth detail page (`berth-tabs.tsx`)
with current-PDF panel, version history, Replace PDF button, and
`<PdfReconcileDialog>` for the auto-applied + conflicts UX.
System settings:
- `berth_pdf_max_upload_mb` (default 15) — caps presigned-upload size
+ server-side validation. Resolved port-override → global → default.
Tests:
- `tests/unit/services/berth-pdf-parser.test.ts` — magic bytes,
feet-inches, human dates, full §9.2-shaped OCR text → 18 fields,
drift warning, AI-tier gate.
- `tests/unit/services/berth-pdf-acroform.test.ts` — synthetic
pdf-lib AcroForm round-trip.
- `tests/integration/berth-pdf-versions.test.ts` — upload, version-
number bump, magic-byte rejection, reconcile auto-applied vs
conflicts vs ±1% tolerance, mooring-number warning,
applyParseResults allowlist enforcement, rollback semantics.
Acceptance: `pnpm exec tsc --noEmit` clean, `pnpm exec vitest run`
green at 1103/1103.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
271
tests/integration/berth-pdf-versions.test.ts
Normal file
271
tests/integration/berth-pdf-versions.test.ts
Normal file
@@ -0,0 +1,271 @@
|
||||
/**
|
||||
* Integration tests for the per-berth PDF service (Phase 6b).
|
||||
*
|
||||
* Covers:
|
||||
* - uploadBerthPdf creates a row + bumps the berth pointer.
|
||||
* - Magic-byte rejection deletes the storage object.
|
||||
* - reconcilePdfWithBerth classifies CRM-null → autoApplied, mismatch →
|
||||
* conflicts, and respects the ±1% numeric tolerance.
|
||||
* - Mooring-number mismatch surfaces as a warning (§14.6).
|
||||
* - applyParseResults writes only allowlisted fields.
|
||||
* - rollbackToVersion flips the current pointer without re-parsing.
|
||||
*/
|
||||
|
||||
import { eq } from 'drizzle-orm';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
applyParseResults,
|
||||
reconcilePdfWithBerth,
|
||||
rollbackToVersion,
|
||||
uploadBerthPdf,
|
||||
} from '@/lib/services/berth-pdf.service';
|
||||
import type { ParseResult } from '@/lib/services/berth-pdf-parser';
|
||||
import { db } from '@/lib/db';
|
||||
import { berths, berthPdfVersions } from '@/lib/db/schema/berths';
|
||||
import { systemSettings } from '@/lib/db/schema/system';
|
||||
|
||||
import { makeBerth, makePort } from '../helpers/factories';
|
||||
|
||||
// Drop the global `storage_backend` row so the factory falls back to the
|
||||
// filesystem default when these tests run in isolation. (Other suites set it.)
|
||||
beforeEach(async () => {
|
||||
await db
|
||||
.insert(systemSettings)
|
||||
.values({
|
||||
key: 'storage_backend',
|
||||
value: 'filesystem',
|
||||
portId: null,
|
||||
updatedBy: null,
|
||||
})
|
||||
.onConflictDoNothing();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
// No file cleanup needed — the filesystem backend writes to a tmp root.
|
||||
});
|
||||
|
||||
function fakePdf(): Buffer {
|
||||
// Smallest possible byte sequence the magic-byte check accepts.
|
||||
return Buffer.concat([Buffer.from('%PDF-1.7\n'), Buffer.alloc(64, 0x20)]);
|
||||
}
|
||||
|
||||
function parseResult(): ParseResult {
|
||||
return {
|
||||
engine: 'ocr',
|
||||
fields: {
|
||||
lengthFt: { value: 200, confidence: 0.9, engine: 'ocr' },
|
||||
bowFacing: { value: 'East', confidence: 0.9, engine: 'ocr' },
|
||||
},
|
||||
meanConfidence: 0.9,
|
||||
warnings: [],
|
||||
};
|
||||
}
|
||||
|
||||
describe('uploadBerthPdf', () => {
|
||||
it('writes a version and updates currentPdfVersionId', async () => {
|
||||
const port = await makePort();
|
||||
const berth = await makeBerth({ portId: port.id });
|
||||
|
||||
const result = await uploadBerthPdf({
|
||||
berthId: berth.id,
|
||||
buffer: fakePdf(),
|
||||
fileName: 'A1.pdf',
|
||||
uploadedBy: 'test-user',
|
||||
parseResult: parseResult(),
|
||||
});
|
||||
expect(result.versionNumber).toBe(1);
|
||||
expect(result.versionId).toMatch(/^[0-9a-f-]{36}$/);
|
||||
|
||||
const refreshed = await db.query.berths.findFirst({ where: eq(berths.id, berth.id) });
|
||||
expect(refreshed?.currentPdfVersionId).toBe(result.versionId);
|
||||
|
||||
const versionRow = await db.query.berthPdfVersions.findFirst({
|
||||
where: eq(berthPdfVersions.id, result.versionId),
|
||||
});
|
||||
expect(versionRow?.versionNumber).toBe(1);
|
||||
expect(versionRow?.fileName).toBe('A1.pdf');
|
||||
expect((versionRow?.parseResults as { engine: string }).engine).toBe('ocr');
|
||||
});
|
||||
|
||||
it('rejects a buffer that fails the magic-byte check', async () => {
|
||||
const port = await makePort();
|
||||
const berth = await makeBerth({ portId: port.id });
|
||||
await expect(
|
||||
uploadBerthPdf({
|
||||
berthId: berth.id,
|
||||
buffer: Buffer.from('not a pdf at all'),
|
||||
fileName: 'spoof.pdf',
|
||||
uploadedBy: 'test-user',
|
||||
}),
|
||||
).rejects.toThrow(/magic-byte/);
|
||||
});
|
||||
|
||||
it('increments versionNumber on the second upload', async () => {
|
||||
const port = await makePort();
|
||||
const berth = await makeBerth({ portId: port.id });
|
||||
await uploadBerthPdf({
|
||||
berthId: berth.id,
|
||||
buffer: fakePdf(),
|
||||
fileName: 'v1.pdf',
|
||||
uploadedBy: 'test',
|
||||
});
|
||||
const second = await uploadBerthPdf({
|
||||
berthId: berth.id,
|
||||
buffer: fakePdf(),
|
||||
fileName: 'v2.pdf',
|
||||
uploadedBy: 'test',
|
||||
});
|
||||
expect(second.versionNumber).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('reconcilePdfWithBerth', () => {
|
||||
it('auto-applies fields where the CRM column is null', async () => {
|
||||
const port = await makePort();
|
||||
const berth = await makeBerth({
|
||||
portId: port.id,
|
||||
overrides: { mooringNumber: 'A1', lengthFt: null, bowFacing: null },
|
||||
});
|
||||
const result = await reconcilePdfWithBerth(berth.id, {
|
||||
engine: 'ocr',
|
||||
fields: {
|
||||
lengthFt: { value: 200, confidence: 0.9, engine: 'ocr' },
|
||||
bowFacing: { value: 'East', confidence: 0.9, engine: 'ocr' },
|
||||
},
|
||||
meanConfidence: 0.9,
|
||||
warnings: [],
|
||||
});
|
||||
const fields = result.autoApplied.map((a) => a.field).sort();
|
||||
expect(fields).toEqual(['bowFacing', 'lengthFt']);
|
||||
expect(result.conflicts).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('flags conflicts when CRM and PDF disagree on a non-null value', async () => {
|
||||
const port = await makePort();
|
||||
const berth = await makeBerth({
|
||||
portId: port.id,
|
||||
overrides: { mooringNumber: 'A1', lengthFt: '100', bowFacing: 'West' },
|
||||
});
|
||||
const result = await reconcilePdfWithBerth(berth.id, {
|
||||
engine: 'ocr',
|
||||
fields: {
|
||||
lengthFt: { value: 200, confidence: 0.8, engine: 'ocr' },
|
||||
bowFacing: { value: 'East', confidence: 0.8, engine: 'ocr' },
|
||||
},
|
||||
meanConfidence: 0.8,
|
||||
warnings: [],
|
||||
});
|
||||
expect(result.conflicts.map((c) => c.field).sort()).toEqual(['bowFacing', 'lengthFt']);
|
||||
});
|
||||
|
||||
it('treats a 0.5% numeric difference as equal (±1% tolerance)', async () => {
|
||||
const port = await makePort();
|
||||
const berth = await makeBerth({
|
||||
portId: port.id,
|
||||
overrides: { mooringNumber: 'A1', lengthFt: '200' },
|
||||
});
|
||||
const result = await reconcilePdfWithBerth(berth.id, {
|
||||
engine: 'ocr',
|
||||
fields: {
|
||||
lengthFt: { value: 201, confidence: 0.9, engine: 'ocr' }, // +0.5%
|
||||
},
|
||||
meanConfidence: 0.9,
|
||||
warnings: [],
|
||||
});
|
||||
expect(result.conflicts).toHaveLength(0);
|
||||
expect(result.autoApplied).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('warns when the PDF mooring number does not match the berth', async () => {
|
||||
const port = await makePort();
|
||||
const berth = await makeBerth({
|
||||
portId: port.id,
|
||||
overrides: { mooringNumber: 'A1' },
|
||||
});
|
||||
const result = await reconcilePdfWithBerth(berth.id, {
|
||||
engine: 'ocr',
|
||||
fields: {
|
||||
mooringNumber: { value: 'B5', confidence: 0.9, engine: 'ocr' },
|
||||
},
|
||||
meanConfidence: 0.9,
|
||||
warnings: [],
|
||||
});
|
||||
expect(result.warnings.some((w) => /B5/.test(w) && /A1/.test(w))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('applyParseResults', () => {
|
||||
it('updates only allowlisted fields and stamps appliedFields onto the version', async () => {
|
||||
const port = await makePort();
|
||||
const berth = await makeBerth({
|
||||
portId: port.id,
|
||||
overrides: { mooringNumber: 'A1', lengthFt: null, bowFacing: null },
|
||||
});
|
||||
const upload = await uploadBerthPdf({
|
||||
berthId: berth.id,
|
||||
buffer: fakePdf(),
|
||||
fileName: 'A1.pdf',
|
||||
uploadedBy: 'test',
|
||||
});
|
||||
|
||||
await applyParseResults(berth.id, upload.versionId, {
|
||||
lengthFt: 200,
|
||||
bowFacing: 'East',
|
||||
// unknown / non-allowlisted column should be silently dropped:
|
||||
// @ts-expect-error — testing the allowlist
|
||||
hackThePlanet: 'pwn',
|
||||
});
|
||||
|
||||
const refreshed = await db.query.berths.findFirst({ where: eq(berths.id, berth.id) });
|
||||
expect(refreshed?.lengthFt).toBe('200');
|
||||
expect(refreshed?.bowFacing).toBe('East');
|
||||
|
||||
const versionRow = await db.query.berthPdfVersions.findFirst({
|
||||
where: eq(berthPdfVersions.id, upload.versionId),
|
||||
});
|
||||
const applied = (versionRow?.parseResults as { appliedFields?: string[] }).appliedFields;
|
||||
expect(applied).toEqual(expect.arrayContaining(['lengthFt', 'bowFacing']));
|
||||
expect(applied).not.toContain('hackThePlanet');
|
||||
});
|
||||
});
|
||||
|
||||
describe('rollbackToVersion', () => {
|
||||
it('flips current_pdf_version_id to the requested version without re-parsing', async () => {
|
||||
const port = await makePort();
|
||||
const berth = await makeBerth({ portId: port.id });
|
||||
const v1 = await uploadBerthPdf({
|
||||
berthId: berth.id,
|
||||
buffer: fakePdf(),
|
||||
fileName: 'v1.pdf',
|
||||
uploadedBy: 'test',
|
||||
});
|
||||
const v2 = await uploadBerthPdf({
|
||||
berthId: berth.id,
|
||||
buffer: fakePdf(),
|
||||
fileName: 'v2.pdf',
|
||||
uploadedBy: 'test',
|
||||
});
|
||||
|
||||
let refreshed = await db.query.berths.findFirst({ where: eq(berths.id, berth.id) });
|
||||
expect(refreshed?.currentPdfVersionId).toBe(v2.versionId);
|
||||
|
||||
const result = await rollbackToVersion(berth.id, v1.versionId);
|
||||
expect(result.versionNumber).toBe(1);
|
||||
|
||||
refreshed = await db.query.berths.findFirst({ where: eq(berths.id, berth.id) });
|
||||
expect(refreshed?.currentPdfVersionId).toBe(v1.versionId);
|
||||
});
|
||||
|
||||
it('refuses to roll back to the already-current version', async () => {
|
||||
const port = await makePort();
|
||||
const berth = await makeBerth({ portId: port.id });
|
||||
const v1 = await uploadBerthPdf({
|
||||
berthId: berth.id,
|
||||
buffer: fakePdf(),
|
||||
fileName: 'v1.pdf',
|
||||
uploadedBy: 'test',
|
||||
});
|
||||
await expect(rollbackToVersion(berth.id, v1.versionId)).rejects.toThrow(/already current/);
|
||||
});
|
||||
});
|
||||
59
tests/unit/services/berth-pdf-acroform.test.ts
Normal file
59
tests/unit/services/berth-pdf-acroform.test.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* AcroForm-tier test for parseBerthPdf. Builds a synthetic PDF with named
|
||||
* AcroForm fields via pdf-lib and asserts the parser pulls them out without
|
||||
* needing OCR.
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { PDFDocument } from 'pdf-lib';
|
||||
|
||||
import { parseBerthPdf } from '@/lib/services/berth-pdf-parser';
|
||||
|
||||
async function buildAcroFormPdf(): Promise<Buffer> {
|
||||
const doc = await PDFDocument.create();
|
||||
doc.addPage([400, 400]);
|
||||
const form = doc.getForm();
|
||||
|
||||
const fields: Array<[string, string]> = [
|
||||
['mooring_number', 'A1'],
|
||||
['length_ft', '206.67'],
|
||||
['length_m', '63'],
|
||||
['width_ft', '46.58'],
|
||||
['width_m', '14.2'],
|
||||
['power_capacity', '330'],
|
||||
['voltage', '480'],
|
||||
['weekly_rate_high_usd', '11341'],
|
||||
['weekly_rate_low_usd', '8100'],
|
||||
['daily_rate_high_usd', '1890'],
|
||||
['daily_rate_low_usd', '1350'],
|
||||
['pricing_valid_until', '2025-09-15'],
|
||||
['bow_facing', 'East'],
|
||||
['mooring_type', 'Side Pier / Med Mooring'],
|
||||
];
|
||||
for (const [name, value] of fields) {
|
||||
const field = form.createTextField(name);
|
||||
field.setText(value);
|
||||
}
|
||||
const bytes = await doc.save();
|
||||
return Buffer.from(bytes);
|
||||
}
|
||||
|
||||
describe('parseBerthPdf — AcroForm tier', () => {
|
||||
it('extracts named fields and skips OCR', async () => {
|
||||
const buf = await buildAcroFormPdf();
|
||||
const result = await parseBerthPdf(buf, { skipOcr: true });
|
||||
expect(result.engine).toBe('acroform');
|
||||
expect(result.fields.mooringNumber?.value).toBe('A1');
|
||||
expect(result.fields.lengthFt?.value).toBeCloseTo(206.67, 1);
|
||||
expect(result.fields.lengthM?.value).toBe(63);
|
||||
expect(result.fields.weeklyRateHighUsd?.value).toBe(11341);
|
||||
expect(result.fields.pricingValidUntil?.value).toBe('2025-09-15');
|
||||
expect(result.fields.bowFacing?.value).toBe('East');
|
||||
expect(result.meanConfidence).toBe(1);
|
||||
});
|
||||
|
||||
it('rejects a non-PDF buffer via magic-byte check', async () => {
|
||||
await expect(parseBerthPdf(Buffer.from('not a pdf'))).rejects.toThrow(/magic-byte/);
|
||||
});
|
||||
});
|
||||
193
tests/unit/services/berth-pdf-parser.test.ts
Normal file
193
tests/unit/services/berth-pdf-parser.test.ts
Normal file
@@ -0,0 +1,193 @@
|
||||
/**
|
||||
* Unit tests for the berth PDF parser (Phase 6b — see plan §4.7b, §14.6).
|
||||
*
|
||||
* Covers:
|
||||
* - Magic-byte check (`%PDF-`).
|
||||
* - OCR-tier extraction against text matching the §9.2 layout.
|
||||
* - Imperial-vs-metric tolerance warning.
|
||||
* - feet-inches parser, human-date parser.
|
||||
* - Threshold gate that decides when to offer the AI tier.
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
extractFromOcrText,
|
||||
isPdfMagic,
|
||||
parseFeetInches,
|
||||
parseHumanDate,
|
||||
shouldOfferAiTier,
|
||||
} from '@/lib/services/berth-pdf-parser';
|
||||
|
||||
describe('isPdfMagic', () => {
|
||||
it('accepts a buffer that starts with %PDF-', () => {
|
||||
expect(isPdfMagic(Buffer.from('%PDF-1.7\n'))).toBe(true);
|
||||
});
|
||||
|
||||
it('rejects a buffer that does not', () => {
|
||||
expect(isPdfMagic(Buffer.from('PK\x03\x04'))).toBe(false);
|
||||
expect(isPdfMagic(Buffer.from('hello'))).toBe(false);
|
||||
expect(isPdfMagic(Buffer.from('%PDX-'))).toBe(false);
|
||||
});
|
||||
|
||||
it('rejects a buffer shorter than 5 bytes', () => {
|
||||
expect(isPdfMagic(Buffer.from('%PDF'))).toBe(false);
|
||||
expect(isPdfMagic(Buffer.alloc(0))).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseFeetInches', () => {
|
||||
it('parses ft-in', () => {
|
||||
expect(parseFeetInches(`206' 8"`)).toBeCloseTo(206 + 8 / 12, 5);
|
||||
});
|
||||
it('parses ft-only', () => {
|
||||
expect(parseFeetInches('82')).toBe(82);
|
||||
expect(parseFeetInches('82.5')).toBe(82.5);
|
||||
});
|
||||
it('returns null for garbage', () => {
|
||||
expect(parseFeetInches('hello')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseHumanDate', () => {
|
||||
it('parses ordinal-suffixed dates', () => {
|
||||
expect(parseHumanDate('September 15th 2025')).toBe('2025-09-15');
|
||||
});
|
||||
it('returns null for unparsable', () => {
|
||||
expect(parseHumanDate('not a date')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractFromOcrText — sample berth A1', () => {
|
||||
// Mirrors the layout of Berth_Spec_Sheet_A1.pdf documented in plan §9.2.
|
||||
const sample = `
|
||||
PORT NIMARA
|
||||
ANGUILLA
|
||||
|
||||
BERTH NUMBER
|
||||
|
||||
A1 200'
|
||||
|
||||
Length: 206' 8" / 63m
|
||||
Width: 46' 7" / 14.2m
|
||||
Water Depth: 16' 1" / 4.9m
|
||||
|
||||
Bow Facing: East
|
||||
Pontoon: QUAY PT
|
||||
Power Capacity: 330 kW
|
||||
Voltage at 60 Hz: 480 V
|
||||
Max. draught of vessel: 14' 6" / 4.4m
|
||||
|
||||
PURCHASE PRICE:
|
||||
FEE SIMPLE OR STRATA LOT
|
||||
3,880,800 USD
|
||||
|
||||
WEEK HIGH / LOW: 11,341 USD / 8,100 USD
|
||||
DAY HIGH / LOW: 1,890 USD / 1,350 USD
|
||||
|
||||
ALL PRICES ABOVE ARE CONFIRMED THROUGH UNTIL SEPTEMBER 15TH, 2025
|
||||
|
||||
Mooring Type: Side Pier / Med Mooring
|
||||
Cleat Type: A5
|
||||
Cleat Capacity: 20-24 ton break load
|
||||
Bollard Type: Bull bollard type B
|
||||
Bollard Capacity: 40 ton break load
|
||||
Access: Car to Vessel (max. 3 ton)
|
||||
`;
|
||||
|
||||
const { fields, warnings } = extractFromOcrText(sample);
|
||||
|
||||
it('extracts the mooring number', () => {
|
||||
expect(fields.mooringNumber?.value).toBe('A1');
|
||||
});
|
||||
|
||||
it('extracts dimensional pairs', () => {
|
||||
expect(fields.lengthFt?.value).toBeCloseTo(206 + 8 / 12, 1);
|
||||
expect(fields.lengthM?.value).toBe(63);
|
||||
expect(fields.widthFt?.value).toBeCloseTo(46 + 7 / 12, 1);
|
||||
expect(fields.widthM?.value).toBe(14.2);
|
||||
expect(fields.waterDepth?.value).toBeCloseTo(16 + 1 / 12, 1);
|
||||
expect(fields.waterDepthM?.value).toBe(4.9);
|
||||
expect(fields.draftFt?.value).toBeCloseTo(14 + 6 / 12, 1);
|
||||
expect(fields.draftM?.value).toBe(4.4);
|
||||
});
|
||||
|
||||
it('extracts power + voltage', () => {
|
||||
expect(fields.powerCapacity?.value).toBe(330);
|
||||
expect(fields.voltage?.value).toBe(480);
|
||||
});
|
||||
|
||||
it('extracts pricing block', () => {
|
||||
expect(fields.weeklyRateHighUsd?.value).toBe(11341);
|
||||
expect(fields.weeklyRateLowUsd?.value).toBe(8100);
|
||||
expect(fields.dailyRateHighUsd?.value).toBe(1890);
|
||||
expect(fields.dailyRateLowUsd?.value).toBe(1350);
|
||||
expect(fields.price?.value).toBe(3880800);
|
||||
});
|
||||
|
||||
it('extracts pricing-validity date', () => {
|
||||
expect(fields.pricingValidUntil?.value).toBe('2025-09-15');
|
||||
});
|
||||
|
||||
it('extracts access + mooring + cleat + bollard text fields', () => {
|
||||
expect(fields.bowFacing?.value).toBe('East');
|
||||
expect(fields.sidePontoon?.value).toBe('QUAY PT');
|
||||
expect(fields.mooringType?.value).toContain('Side Pier');
|
||||
expect(fields.cleatType?.value).toBe('A5');
|
||||
expect(fields.cleatCapacity?.value).toContain('20-24 ton');
|
||||
expect(fields.bollardType?.value).toContain('Bull bollard');
|
||||
expect(fields.access?.value).toContain('Car to Vessel');
|
||||
});
|
||||
|
||||
it('does not warn when imperial/metric agree within 1%', () => {
|
||||
expect(warnings).not.toEqual(expect.arrayContaining([expect.stringContaining('mismatch')]));
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractFromOcrText — imperial/metric drift warning', () => {
|
||||
it('flags a >1% mismatch', () => {
|
||||
const { warnings } = extractFromOcrText('Length: 100 ft / 50m');
|
||||
expect(warnings.some((w) => /mismatch/i.test(w))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('shouldOfferAiTier', () => {
|
||||
it('returns false for AcroForm parses', () => {
|
||||
expect(
|
||||
shouldOfferAiTier({
|
||||
engine: 'acroform',
|
||||
fields: { mooringNumber: { value: 'A1', confidence: 1, engine: 'acroform' } },
|
||||
meanConfidence: 1,
|
||||
warnings: [],
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it('returns true when OCR found nothing', () => {
|
||||
expect(shouldOfferAiTier({ engine: 'ocr', fields: {}, meanConfidence: 0, warnings: [] })).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('returns true when mean confidence dips below threshold', () => {
|
||||
expect(
|
||||
shouldOfferAiTier({
|
||||
engine: 'ocr',
|
||||
fields: { mooringNumber: { value: 'A1', confidence: 0.3, engine: 'ocr' } },
|
||||
meanConfidence: 0.3,
|
||||
warnings: [],
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false when OCR is confident', () => {
|
||||
expect(
|
||||
shouldOfferAiTier({
|
||||
engine: 'ocr',
|
||||
fields: { mooringNumber: { value: 'A1', confidence: 0.9, engine: 'ocr' } },
|
||||
meanConfidence: 0.9,
|
||||
warnings: [],
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -49,6 +49,7 @@ function makeBerth(overrides: Partial<Berth> = {}): Berth {
|
||||
statusLastModified: null,
|
||||
statusOverrideMode: null,
|
||||
lastImportedAt: null,
|
||||
currentPdfVersionId: null,
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
...overrides,
|
||||
|
||||
Reference in New Issue
Block a user