108 lines
4.0 KiB
TypeScript
108 lines
4.0 KiB
TypeScript
|
|
import { describe, it, expect, vi } from 'vitest';
|
||
|
|
|
||
|
|
// Mock pdfjs-dist before importing the service. The detector calls
|
||
|
|
// `import('pdfjs-dist/legacy/build/pdf.mjs')` dynamically; we stub the
|
||
|
|
// module with a fake document whose pages return canned text items so
|
||
|
|
// we can assert the anchor-matching + coordinate-conversion logic
|
||
|
|
// without needing a real PDF.
|
||
|
|
vi.mock('pdfjs-dist/legacy/build/pdf.mjs', () => ({
|
||
|
|
getDocument: (_opts: unknown) => ({
|
||
|
|
promise: Promise.resolve({
|
||
|
|
numPages: 1,
|
||
|
|
getPage: async (_n: number) => ({
|
||
|
|
getViewport: ({ scale: _s }: { scale: number }) => ({
|
||
|
|
width: 595, // A4 in pt
|
||
|
|
height: 842,
|
||
|
|
}),
|
||
|
|
getTextContent: async () => ({
|
||
|
|
items: [
|
||
|
|
// Item 0: a signature anchor near the bottom-left
|
||
|
|
{
|
||
|
|
str: 'Signature: ',
|
||
|
|
transform: [1, 0, 0, 1, 50, 100],
|
||
|
|
width: 70,
|
||
|
|
},
|
||
|
|
// Item 1: a date anchor next to it
|
||
|
|
{
|
||
|
|
str: 'Date: ',
|
||
|
|
transform: [1, 0, 0, 1, 250, 100],
|
||
|
|
width: 40,
|
||
|
|
},
|
||
|
|
// Item 2: recipient label nearby
|
||
|
|
{
|
||
|
|
str: 'Buyer',
|
||
|
|
transform: [1, 0, 0, 1, 50, 130],
|
||
|
|
width: 40,
|
||
|
|
},
|
||
|
|
// Item 3: unrelated body text (should not match)
|
||
|
|
{
|
||
|
|
str: 'The parties hereby agree…',
|
||
|
|
transform: [1, 0, 0, 1, 50, 200],
|
||
|
|
width: 200,
|
||
|
|
},
|
||
|
|
],
|
||
|
|
}),
|
||
|
|
}),
|
||
|
|
}),
|
||
|
|
}),
|
||
|
|
}));
|
||
|
|
|
||
|
|
import { detectFields } from '@/lib/services/document-field-detector';
|
||
|
|
|
||
|
|
describe('detectFields', () => {
|
||
|
|
it('returns matches for known anchors with the right type + page', async () => {
|
||
|
|
const result = await detectFields(Buffer.from('%PDF-1.7'));
|
||
|
|
expect(result.length).toBeGreaterThanOrEqual(2);
|
||
|
|
const sig = result.find((r) => r.type === 'SIGNATURE');
|
||
|
|
const date = result.find((r) => r.type === 'DATE');
|
||
|
|
expect(sig).toBeDefined();
|
||
|
|
expect(date).toBeDefined();
|
||
|
|
expect(sig?.pageNumber).toBe(1);
|
||
|
|
expect(date?.pageNumber).toBe(1);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('infers recipient label from nearby text', async () => {
|
||
|
|
const result = await detectFields(Buffer.from('%PDF-1.7'));
|
||
|
|
const sig = result.find((r) => r.type === 'SIGNATURE');
|
||
|
|
expect(sig?.inferredRecipientLabel).toBe('Buyer');
|
||
|
|
});
|
||
|
|
|
||
|
|
it('returns percent coordinates in [0, 100]', async () => {
|
||
|
|
const result = await detectFields(Buffer.from('%PDF-1.7'));
|
||
|
|
for (const f of result) {
|
||
|
|
expect(f.pageX).toBeGreaterThanOrEqual(0);
|
||
|
|
expect(f.pageX).toBeLessThanOrEqual(100);
|
||
|
|
expect(f.pageY).toBeGreaterThanOrEqual(0);
|
||
|
|
expect(f.pageY).toBeLessThanOrEqual(100);
|
||
|
|
expect(f.pageWidth).toBeGreaterThan(0);
|
||
|
|
expect(f.pageHeight).toBeGreaterThan(0);
|
||
|
|
}
|
||
|
|
});
|
||
|
|
|
||
|
|
it('attaches the anchor text + a confidence score', async () => {
|
||
|
|
const result = await detectFields(Buffer.from('%PDF-1.7'));
|
||
|
|
const sig = result.find((r) => r.type === 'SIGNATURE');
|
||
|
|
expect(sig?.anchorText).toMatch(/signature/i);
|
||
|
|
expect(sig?.confidence).toBeGreaterThan(0.5);
|
||
|
|
expect(sig?.confidence).toBeLessThanOrEqual(1);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('does not match body text that lacks a signing-block keyword', async () => {
|
||
|
|
const result = await detectFields(Buffer.from('%PDF-1.7'));
|
||
|
|
// The "The parties hereby agree" item should not produce a TEXT
|
||
|
|
// detection (no underscore run, no keyword).
|
||
|
|
expect(result.find((r) => r.anchorText?.includes('parties'))).toBeUndefined();
|
||
|
|
});
|
||
|
|
|
||
|
|
it('gracefully returns [] when pdfjs throws', async () => {
|
||
|
|
// Force pdfjs to reject for this one call
|
||
|
|
const mod = await import('pdfjs-dist/legacy/build/pdf.mjs');
|
||
|
|
const orig = mod.getDocument;
|
||
|
|
(mod as unknown as { getDocument: typeof orig }).getDocument = () =>
|
||
|
|
({ promise: Promise.reject(new Error('boom')) }) as ReturnType<typeof orig>;
|
||
|
|
const result = await detectFields(Buffer.from('not-a-pdf'));
|
||
|
|
expect(result).toEqual([]);
|
||
|
|
(mod as unknown as { getDocument: typeof orig }).getDocument = orig;
|
||
|
|
});
|
||
|
|
});
|