fix(berths): CM-2 — robust purchase-price extraction (clean-token + magnitude floor)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-19 10:30:12 +02:00
parent df8c26d1b3
commit f7425d1231
2 changed files with 106 additions and 4 deletions

View File

@@ -13,9 +13,11 @@ import { describe, expect, it } from 'vitest';
import {
extractFromOcrText,
extractPurchasePrice,
isPdfMagic,
parseFeetInches,
parseHumanDate,
PURCHASE_PRICE_FLOOR,
shouldOfferAiTier,
} from '@/lib/services/berth-pdf-parser';
@@ -191,3 +193,43 @@ describe('shouldOfferAiTier', () => {
).toBe(false);
});
});
describe('extractPurchasePrice', () => {
it('isolates the single clean main price among letter-spaced rate garble', () => {
// Real-sheet shape: rates are letter-spaced (so they never match the strict
// token); the main price renders clean.
const text =
'W E E K H I G H / LO W : 1 1 , 3 4 1 U S D / 8 , 1 0 0 U S D 3,880,800 USD ' +
'DAY H I G H / LO W : 1 , 8 9 0 U S D / 1 , 3 5 0 U S D';
const r = extractPurchasePrice(text);
expect(r.value).toBe(3880800);
expect(r.currency).toBe('USD');
expect(r.confidence).toBeGreaterThanOrEqual(0.9);
});
it('excludes clean rate tokens below the floor (synthetic clean sheet)', () => {
const text = '3,880,800 USD WEEK HIGH / LOW: 11,341 USD / 8,100 USD';
expect(extractPurchasePrice(text).value).toBe(3880800);
});
it('returns null + warning when no price-magnitude token is present', () => {
const r = extractPurchasePrice('no prices here, just 12 USD of nothing');
expect(r.value).toBeNull();
expect(r.warning).toMatch(/no purchase-price/i);
});
it('flags ambiguity when two DISTINCT above-floor tokens appear', () => {
const r = extractPurchasePrice('3,880,800 USD and also 1,247,400 USD');
expect(r.value).toBeNull();
expect(r.warning).toMatch(/multiple/i);
});
it('treats a repeated identical price as unambiguous', () => {
const r = extractPurchasePrice('720,720 USD ... header ... 720,720 USD');
expect(r.value).toBe(720720);
});
it('exposes the floor constant', () => {
expect(PURCHASE_PRICE_FLOOR).toBe(50_000);
});
});