/** * Unit tests for the berth PDF parser (Phase 6b — see plan §4.7b, §14.6). * * Covers: * - Magic-byte check (`%PDF-`). * - OCR-tier extraction against text matching the §9.2 layout. * - Imperial-vs-metric tolerance warning. * - feet-inches parser, human-date parser. * - Threshold gate that decides when to offer the AI tier. */ import { describe, expect, it } from 'vitest'; import { extractFromOcrText, isPdfMagic, parseFeetInches, parseHumanDate, shouldOfferAiTier, } from '@/lib/services/berth-pdf-parser'; describe('isPdfMagic', () => { it('accepts a buffer that starts with %PDF-', () => { expect(isPdfMagic(Buffer.from('%PDF-1.7\n'))).toBe(true); }); it('rejects a buffer that does not', () => { expect(isPdfMagic(Buffer.from('PK\x03\x04'))).toBe(false); expect(isPdfMagic(Buffer.from('hello'))).toBe(false); expect(isPdfMagic(Buffer.from('%PDX-'))).toBe(false); }); it('rejects a buffer shorter than 5 bytes', () => { expect(isPdfMagic(Buffer.from('%PDF'))).toBe(false); expect(isPdfMagic(Buffer.alloc(0))).toBe(false); }); }); describe('parseFeetInches', () => { it('parses ft-in', () => { expect(parseFeetInches(`206' 8"`)).toBeCloseTo(206 + 8 / 12, 5); }); it('parses ft-only', () => { expect(parseFeetInches('82')).toBe(82); expect(parseFeetInches('82.5')).toBe(82.5); }); it('returns null for garbage', () => { expect(parseFeetInches('hello')).toBeNull(); }); }); describe('parseHumanDate', () => { it('parses ordinal-suffixed dates', () => { expect(parseHumanDate('September 15th 2025')).toBe('2025-09-15'); }); it('returns null for unparsable', () => { expect(parseHumanDate('not a date')).toBeNull(); }); }); describe('extractFromOcrText — sample berth A1', () => { // Mirrors the layout of Berth_Spec_Sheet_A1.pdf documented in plan §9.2. const sample = ` PORT NIMARA ANGUILLA BERTH NUMBER A1 200' Length: 206' 8" / 63m Width: 46' 7" / 14.2m Water Depth: 16' 1" / 4.9m Bow Facing: East Pontoon: QUAY PT Power Capacity: 330 kW Voltage at 60 Hz: 480 V Max. draught of vessel: 14' 6" / 4.4m PURCHASE PRICE: FEE SIMPLE OR STRATA LOT 3,880,800 USD WEEK HIGH / LOW: 11,341 USD / 8,100 USD DAY HIGH / LOW: 1,890 USD / 1,350 USD ALL PRICES ABOVE ARE CONFIRMED THROUGH UNTIL SEPTEMBER 15TH, 2025 Mooring Type: Side Pier / Med Mooring Cleat Type: A5 Cleat Capacity: 20-24 ton break load Bollard Type: Bull bollard type B Bollard Capacity: 40 ton break load Access: Car to Vessel (max. 3 ton) `; const { fields, warnings } = extractFromOcrText(sample); it('extracts the mooring number', () => { expect(fields.mooringNumber?.value).toBe('A1'); }); it('extracts dimensional pairs', () => { expect(fields.lengthFt?.value).toBeCloseTo(206 + 8 / 12, 1); expect(fields.lengthM?.value).toBe(63); expect(fields.widthFt?.value).toBeCloseTo(46 + 7 / 12, 1); expect(fields.widthM?.value).toBe(14.2); expect(fields.waterDepth?.value).toBeCloseTo(16 + 1 / 12, 1); expect(fields.waterDepthM?.value).toBe(4.9); expect(fields.draftFt?.value).toBeCloseTo(14 + 6 / 12, 1); expect(fields.draftM?.value).toBe(4.4); }); it('extracts power + voltage', () => { expect(fields.powerCapacity?.value).toBe(330); expect(fields.voltage?.value).toBe(480); }); it('extracts pricing block', () => { expect(fields.weeklyRateHighUsd?.value).toBe(11341); expect(fields.weeklyRateLowUsd?.value).toBe(8100); expect(fields.dailyRateHighUsd?.value).toBe(1890); expect(fields.dailyRateLowUsd?.value).toBe(1350); expect(fields.price?.value).toBe(3880800); }); it('extracts pricing-validity date', () => { expect(fields.pricingValidUntil?.value).toBe('2025-09-15'); }); it('extracts access + mooring + cleat + bollard text fields', () => { expect(fields.bowFacing?.value).toBe('East'); expect(fields.sidePontoon?.value).toBe('QUAY PT'); expect(fields.mooringType?.value).toContain('Side Pier'); expect(fields.cleatType?.value).toBe('A5'); expect(fields.cleatCapacity?.value).toContain('20-24 ton'); expect(fields.bollardType?.value).toContain('Bull bollard'); expect(fields.access?.value).toContain('Car to Vessel'); }); it('does not warn when imperial/metric agree within 1%', () => { expect(warnings).not.toEqual(expect.arrayContaining([expect.stringContaining('mismatch')])); }); }); describe('extractFromOcrText — imperial/metric drift warning', () => { it('flags a >1% mismatch', () => { const { warnings } = extractFromOcrText('Length: 100 ft / 50m'); expect(warnings.some((w) => /mismatch/i.test(w))).toBe(true); }); }); describe('shouldOfferAiTier', () => { it('returns false for AcroForm parses', () => { expect( shouldOfferAiTier({ engine: 'acroform', fields: { mooringNumber: { value: 'A1', confidence: 1, engine: 'acroform' } }, meanConfidence: 1, warnings: [], }), ).toBe(false); }); it('returns true when OCR found nothing', () => { expect(shouldOfferAiTier({ engine: 'ocr', fields: {}, meanConfidence: 0, warnings: [] })).toBe( true, ); }); it('returns true when mean confidence dips below threshold', () => { expect( shouldOfferAiTier({ engine: 'ocr', fields: { mooringNumber: { value: 'A1', confidence: 0.3, engine: 'ocr' } }, meanConfidence: 0.3, warnings: [], }), ).toBe(true); }); it('returns false when OCR is confident', () => { expect( shouldOfferAiTier({ engine: 'ocr', fields: { mooringNumber: { value: 'A1', confidence: 0.9, engine: 'ocr' } }, meanConfidence: 0.9, warnings: [], }), ).toBe(false); }); });