194 lines
5.6 KiB
TypeScript
194 lines
5.6 KiB
TypeScript
|
|
/**
|
||
|
|
* Unit tests for the berth PDF parser (Phase 6b — see plan §4.7b, §14.6).
|
||
|
|
*
|
||
|
|
* Covers:
|
||
|
|
* - Magic-byte check (`%PDF-`).
|
||
|
|
* - OCR-tier extraction against text matching the §9.2 layout.
|
||
|
|
* - Imperial-vs-metric tolerance warning.
|
||
|
|
* - feet-inches parser, human-date parser.
|
||
|
|
* - Threshold gate that decides when to offer the AI tier.
|
||
|
|
*/
|
||
|
|
|
||
|
|
import { describe, expect, it } from 'vitest';
|
||
|
|
|
||
|
|
import {
|
||
|
|
extractFromOcrText,
|
||
|
|
isPdfMagic,
|
||
|
|
parseFeetInches,
|
||
|
|
parseHumanDate,
|
||
|
|
shouldOfferAiTier,
|
||
|
|
} from '@/lib/services/berth-pdf-parser';
|
||
|
|
|
||
|
|
describe('isPdfMagic', () => {
|
||
|
|
it('accepts a buffer that starts with %PDF-', () => {
|
||
|
|
expect(isPdfMagic(Buffer.from('%PDF-1.7\n'))).toBe(true);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('rejects a buffer that does not', () => {
|
||
|
|
expect(isPdfMagic(Buffer.from('PK\x03\x04'))).toBe(false);
|
||
|
|
expect(isPdfMagic(Buffer.from('hello'))).toBe(false);
|
||
|
|
expect(isPdfMagic(Buffer.from('%PDX-'))).toBe(false);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('rejects a buffer shorter than 5 bytes', () => {
|
||
|
|
expect(isPdfMagic(Buffer.from('%PDF'))).toBe(false);
|
||
|
|
expect(isPdfMagic(Buffer.alloc(0))).toBe(false);
|
||
|
|
});
|
||
|
|
});
|
||
|
|
|
||
|
|
describe('parseFeetInches', () => {
|
||
|
|
it('parses ft-in', () => {
|
||
|
|
expect(parseFeetInches(`206' 8"`)).toBeCloseTo(206 + 8 / 12, 5);
|
||
|
|
});
|
||
|
|
it('parses ft-only', () => {
|
||
|
|
expect(parseFeetInches('82')).toBe(82);
|
||
|
|
expect(parseFeetInches('82.5')).toBe(82.5);
|
||
|
|
});
|
||
|
|
it('returns null for garbage', () => {
|
||
|
|
expect(parseFeetInches('hello')).toBeNull();
|
||
|
|
});
|
||
|
|
});
|
||
|
|
|
||
|
|
describe('parseHumanDate', () => {
|
||
|
|
it('parses ordinal-suffixed dates', () => {
|
||
|
|
expect(parseHumanDate('September 15th 2025')).toBe('2025-09-15');
|
||
|
|
});
|
||
|
|
it('returns null for unparsable', () => {
|
||
|
|
expect(parseHumanDate('not a date')).toBeNull();
|
||
|
|
});
|
||
|
|
});
|
||
|
|
|
||
|
|
describe('extractFromOcrText — sample berth A1', () => {
|
||
|
|
// Mirrors the layout of Berth_Spec_Sheet_A1.pdf documented in plan §9.2.
|
||
|
|
const sample = `
|
||
|
|
PORT NIMARA
|
||
|
|
ANGUILLA
|
||
|
|
|
||
|
|
BERTH NUMBER
|
||
|
|
|
||
|
|
A1 200'
|
||
|
|
|
||
|
|
Length: 206' 8" / 63m
|
||
|
|
Width: 46' 7" / 14.2m
|
||
|
|
Water Depth: 16' 1" / 4.9m
|
||
|
|
|
||
|
|
Bow Facing: East
|
||
|
|
Pontoon: QUAY PT
|
||
|
|
Power Capacity: 330 kW
|
||
|
|
Voltage at 60 Hz: 480 V
|
||
|
|
Max. draught of vessel: 14' 6" / 4.4m
|
||
|
|
|
||
|
|
PURCHASE PRICE:
|
||
|
|
FEE SIMPLE OR STRATA LOT
|
||
|
|
3,880,800 USD
|
||
|
|
|
||
|
|
WEEK HIGH / LOW: 11,341 USD / 8,100 USD
|
||
|
|
DAY HIGH / LOW: 1,890 USD / 1,350 USD
|
||
|
|
|
||
|
|
ALL PRICES ABOVE ARE CONFIRMED THROUGH UNTIL SEPTEMBER 15TH, 2025
|
||
|
|
|
||
|
|
Mooring Type: Side Pier / Med Mooring
|
||
|
|
Cleat Type: A5
|
||
|
|
Cleat Capacity: 20-24 ton break load
|
||
|
|
Bollard Type: Bull bollard type B
|
||
|
|
Bollard Capacity: 40 ton break load
|
||
|
|
Access: Car to Vessel (max. 3 ton)
|
||
|
|
`;
|
||
|
|
|
||
|
|
const { fields, warnings } = extractFromOcrText(sample);
|
||
|
|
|
||
|
|
it('extracts the mooring number', () => {
|
||
|
|
expect(fields.mooringNumber?.value).toBe('A1');
|
||
|
|
});
|
||
|
|
|
||
|
|
it('extracts dimensional pairs', () => {
|
||
|
|
expect(fields.lengthFt?.value).toBeCloseTo(206 + 8 / 12, 1);
|
||
|
|
expect(fields.lengthM?.value).toBe(63);
|
||
|
|
expect(fields.widthFt?.value).toBeCloseTo(46 + 7 / 12, 1);
|
||
|
|
expect(fields.widthM?.value).toBe(14.2);
|
||
|
|
expect(fields.waterDepth?.value).toBeCloseTo(16 + 1 / 12, 1);
|
||
|
|
expect(fields.waterDepthM?.value).toBe(4.9);
|
||
|
|
expect(fields.draftFt?.value).toBeCloseTo(14 + 6 / 12, 1);
|
||
|
|
expect(fields.draftM?.value).toBe(4.4);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('extracts power + voltage', () => {
|
||
|
|
expect(fields.powerCapacity?.value).toBe(330);
|
||
|
|
expect(fields.voltage?.value).toBe(480);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('extracts pricing block', () => {
|
||
|
|
expect(fields.weeklyRateHighUsd?.value).toBe(11341);
|
||
|
|
expect(fields.weeklyRateLowUsd?.value).toBe(8100);
|
||
|
|
expect(fields.dailyRateHighUsd?.value).toBe(1890);
|
||
|
|
expect(fields.dailyRateLowUsd?.value).toBe(1350);
|
||
|
|
expect(fields.price?.value).toBe(3880800);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('extracts pricing-validity date', () => {
|
||
|
|
expect(fields.pricingValidUntil?.value).toBe('2025-09-15');
|
||
|
|
});
|
||
|
|
|
||
|
|
it('extracts access + mooring + cleat + bollard text fields', () => {
|
||
|
|
expect(fields.bowFacing?.value).toBe('East');
|
||
|
|
expect(fields.sidePontoon?.value).toBe('QUAY PT');
|
||
|
|
expect(fields.mooringType?.value).toContain('Side Pier');
|
||
|
|
expect(fields.cleatType?.value).toBe('A5');
|
||
|
|
expect(fields.cleatCapacity?.value).toContain('20-24 ton');
|
||
|
|
expect(fields.bollardType?.value).toContain('Bull bollard');
|
||
|
|
expect(fields.access?.value).toContain('Car to Vessel');
|
||
|
|
});
|
||
|
|
|
||
|
|
it('does not warn when imperial/metric agree within 1%', () => {
|
||
|
|
expect(warnings).not.toEqual(expect.arrayContaining([expect.stringContaining('mismatch')]));
|
||
|
|
});
|
||
|
|
});
|
||
|
|
|
||
|
|
describe('extractFromOcrText — imperial/metric drift warning', () => {
|
||
|
|
it('flags a >1% mismatch', () => {
|
||
|
|
const { warnings } = extractFromOcrText('Length: 100 ft / 50m');
|
||
|
|
expect(warnings.some((w) => /mismatch/i.test(w))).toBe(true);
|
||
|
|
});
|
||
|
|
});
|
||
|
|
|
||
|
|
describe('shouldOfferAiTier', () => {
|
||
|
|
it('returns false for AcroForm parses', () => {
|
||
|
|
expect(
|
||
|
|
shouldOfferAiTier({
|
||
|
|
engine: 'acroform',
|
||
|
|
fields: { mooringNumber: { value: 'A1', confidence: 1, engine: 'acroform' } },
|
||
|
|
meanConfidence: 1,
|
||
|
|
warnings: [],
|
||
|
|
}),
|
||
|
|
).toBe(false);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('returns true when OCR found nothing', () => {
|
||
|
|
expect(shouldOfferAiTier({ engine: 'ocr', fields: {}, meanConfidence: 0, warnings: [] })).toBe(
|
||
|
|
true,
|
||
|
|
);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('returns true when mean confidence dips below threshold', () => {
|
||
|
|
expect(
|
||
|
|
shouldOfferAiTier({
|
||
|
|
engine: 'ocr',
|
||
|
|
fields: { mooringNumber: { value: 'A1', confidence: 0.3, engine: 'ocr' } },
|
||
|
|
meanConfidence: 0.3,
|
||
|
|
warnings: [],
|
||
|
|
}),
|
||
|
|
).toBe(true);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('returns false when OCR is confident', () => {
|
||
|
|
expect(
|
||
|
|
shouldOfferAiTier({
|
||
|
|
engine: 'ocr',
|
||
|
|
fields: { mooringNumber: { value: 'A1', confidence: 0.9, engine: 'ocr' } },
|
||
|
|
meanConfidence: 0.9,
|
||
|
|
warnings: [],
|
||
|
|
}),
|
||
|
|
).toBe(false);
|
||
|
|
});
|
||
|
|
});
|