import { describe, it, expect, vi } from 'vitest'; // Mock pdfjs-dist before importing the service. The detector calls // `import('pdfjs-dist/legacy/build/pdf.mjs')` dynamically; we stub the // module with a fake document whose pages return canned text items so // we can assert the anchor-matching + coordinate-conversion logic // without needing a real PDF. vi.mock('pdfjs-dist/legacy/build/pdf.mjs', () => ({ getDocument: (_opts: unknown) => ({ promise: Promise.resolve({ numPages: 1, getPage: async (_n: number) => ({ getViewport: ({ scale: _s }: { scale: number }) => ({ width: 595, // A4 in pt height: 842, }), getTextContent: async () => ({ items: [ // Item 0: a signature anchor near the bottom-left { str: 'Signature: ', transform: [1, 0, 0, 1, 50, 100], width: 70, }, // Item 1: a date anchor next to it { str: 'Date: ', transform: [1, 0, 0, 1, 250, 100], width: 40, }, // Item 2: recipient label nearby { str: 'Buyer', transform: [1, 0, 0, 1, 50, 130], width: 40, }, // Item 3: unrelated body text (should not match) { str: 'The parties hereby agree…', transform: [1, 0, 0, 1, 50, 200], width: 200, }, ], }), }), }), }), })); import { detectFields } from '@/lib/services/document-field-detector'; describe('detectFields', () => { it('returns matches for known anchors with the right type + page', async () => { const result = await detectFields(Buffer.from('%PDF-1.7')); expect(result.length).toBeGreaterThanOrEqual(2); const sig = result.find((r) => r.type === 'SIGNATURE'); const date = result.find((r) => r.type === 'DATE'); expect(sig).toBeDefined(); expect(date).toBeDefined(); expect(sig?.pageNumber).toBe(1); expect(date?.pageNumber).toBe(1); }); it('infers recipient label from nearby text', async () => { const result = await detectFields(Buffer.from('%PDF-1.7')); const sig = result.find((r) => r.type === 'SIGNATURE'); expect(sig?.inferredRecipientLabel).toBe('Buyer'); }); it('returns percent coordinates in [0, 100]', async () => { const result = await detectFields(Buffer.from('%PDF-1.7')); for (const f of result) { expect(f.pageX).toBeGreaterThanOrEqual(0); expect(f.pageX).toBeLessThanOrEqual(100); expect(f.pageY).toBeGreaterThanOrEqual(0); expect(f.pageY).toBeLessThanOrEqual(100); expect(f.pageWidth).toBeGreaterThan(0); expect(f.pageHeight).toBeGreaterThan(0); } }); it('attaches the anchor text + a confidence score', async () => { const result = await detectFields(Buffer.from('%PDF-1.7')); const sig = result.find((r) => r.type === 'SIGNATURE'); expect(sig?.anchorText).toMatch(/signature/i); expect(sig?.confidence).toBeGreaterThan(0.5); expect(sig?.confidence).toBeLessThanOrEqual(1); }); it('does not match body text that lacks a signing-block keyword', async () => { const result = await detectFields(Buffer.from('%PDF-1.7')); // The "The parties hereby agree" item should not produce a TEXT // detection (no underscore run, no keyword). expect(result.find((r) => r.anchorText?.includes('parties'))).toBeUndefined(); }); it('gracefully returns [] when pdfjs throws', async () => { // Force pdfjs to reject for this one call const mod = await import('pdfjs-dist/legacy/build/pdf.mjs'); const orig = mod.getDocument; (mod as unknown as { getDocument: typeof orig }).getDocument = () => ({ promise: Promise.reject(new Error('boom')) }) as ReturnType; const result = await detectFields(Buffer.from('not-a-pdf')); expect(result).toEqual([]); (mod as unknown as { getDocument: typeof orig }).getDocument = orig; }); });