fix(berths): CM-2 — robust purchase-price extraction (clean-token + magnitude floor)
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -13,9 +13,11 @@ import { describe, expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
extractFromOcrText,
|
||||
extractPurchasePrice,
|
||||
isPdfMagic,
|
||||
parseFeetInches,
|
||||
parseHumanDate,
|
||||
PURCHASE_PRICE_FLOOR,
|
||||
shouldOfferAiTier,
|
||||
} from '@/lib/services/berth-pdf-parser';
|
||||
|
||||
@@ -191,3 +193,43 @@ describe('shouldOfferAiTier', () => {
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractPurchasePrice', () => {
|
||||
it('isolates the single clean main price among letter-spaced rate garble', () => {
|
||||
// Real-sheet shape: rates are letter-spaced (so they never match the strict
|
||||
// token); the main price renders clean.
|
||||
const text =
|
||||
'W E E K H I G H / LO W : 1 1 , 3 4 1 U S D / 8 , 1 0 0 U S D 3,880,800 USD ' +
|
||||
'DAY H I G H / LO W : 1 , 8 9 0 U S D / 1 , 3 5 0 U S D';
|
||||
const r = extractPurchasePrice(text);
|
||||
expect(r.value).toBe(3880800);
|
||||
expect(r.currency).toBe('USD');
|
||||
expect(r.confidence).toBeGreaterThanOrEqual(0.9);
|
||||
});
|
||||
|
||||
it('excludes clean rate tokens below the floor (synthetic clean sheet)', () => {
|
||||
const text = '3,880,800 USD WEEK HIGH / LOW: 11,341 USD / 8,100 USD';
|
||||
expect(extractPurchasePrice(text).value).toBe(3880800);
|
||||
});
|
||||
|
||||
it('returns null + warning when no price-magnitude token is present', () => {
|
||||
const r = extractPurchasePrice('no prices here, just 12 USD of nothing');
|
||||
expect(r.value).toBeNull();
|
||||
expect(r.warning).toMatch(/no purchase-price/i);
|
||||
});
|
||||
|
||||
it('flags ambiguity when two DISTINCT above-floor tokens appear', () => {
|
||||
const r = extractPurchasePrice('3,880,800 USD and also 1,247,400 USD');
|
||||
expect(r.value).toBeNull();
|
||||
expect(r.warning).toMatch(/multiple/i);
|
||||
});
|
||||
|
||||
it('treats a repeated identical price as unambiguous', () => {
|
||||
const r = extractPurchasePrice('720,720 USD ... header ... 720,720 USD');
|
||||
expect(r.value).toBe(720720);
|
||||
});
|
||||
|
||||
it('exposes the floor constant', () => {
|
||||
expect(PURCHASE_PRICE_FLOOR).toBe(50_000);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user