feat(documenso-phase-4): recipient configurator + field placement UI
Phase 4 lands the visual half of the Documenso build — the upload-
for-signing dialog the Contract + Reservation tabs hand off to. Four
files of new code; the existing tab placeholders point at it.
Files added:
- lib/services/document-field-detector.ts — Phase 4c auto-detect
scanner. Uses pdfjs-dist to extract per-page text + positions, then
matches anchor patterns (Signature, Date, Initials, Email, Name,
underscore-runs) and produces percent-coordinate DetectedField
rows. Recipient label inference walks ±100pt of each match for
Buyer/Seller/Client/Witness/Notary keywords. Returns [] when the
PDF is image-only; UI falls back to manual placement without an
error. 6 unit tests pin the matching + coordinate math.
- app/api/v1/documents/auto-detect-fields/route.ts — multipart POST
endpoint that delegates to detectFields(). Permission-gated by
documents.send_for_signing.
- app/api/v1/documents/signing-defaults/route.ts — GET endpoint that
surfaces just the per-port developer + approver display name/email
+ sendMode flag. No secrets exposed; lets the dialog prefill the
recipient configurator without an admin-scoped settings read.
- components/documents/upload-for-signing-dialog.tsx — the Phase 4
UI. Three-step state machine inside a single Dialog:
1. select-file: drop/click PDF picker + title input
2. configure-recipients: client + developer + approver prefilled,
rep can add/remove/reorder + change role (SIGNER/APPROVER/CC)
3. place-fields: react-pdf renders the source PDF; auto-detect
runs in the background on file load and seeds the overlay;
rep places, drags, resizes, deletes, reassigns fields via the
palette + side panel. Native DOM drag (no dnd-kit dependency
added — the coordinate math stays obvious).
Send fires POST /api/v1/interests/[id]/upload-for-signing (Phase 3
service); success toast reflects port sendMode (auto fires the
invite immediately, manual leaves it for the rep).
Files modified:
- components/interests/interest-contract-tab.tsx + reservation-tab.tsx:
swap the ComingSoonDialog placeholder for the real
UploadForSigningDialog with the matching documentType prop. The
placeholder ComingSoonDialog helper is deleted from both.
- scripts/tsc-staged.mjs: pull src/types/**/*.d.ts into the temp
staged-only tsconfig so side-effect CSS imports (e.g.
react-pdf/dist/Page/AnnotationLayer.css) resolve via the existing
declare-module shim. Without this fix the staged compile reports
TS2882 even though the full tsc --noEmit pass passes.
Design choices noted in code comments:
- Native drag over dnd-kit: the field overlay's percent-based
coordinate math is short enough that adding a drag library adds
complexity without saving lines.
- Auto-detect on file-load (not on demand): runs immediately so the
rep doesn't have to click a second button — empty result drops
back to manual placement silently.
- Per-recipient color swatches indexed by signingOrder.
- Recipient seed via useMemo + user-event handler instead of
useEffect → setRecipients (Wave 3 set-state-in-effect avoidance).
Server-side, Phase 3 plumbing handles the rest: tenant guard, magic-
byte verify, Documenso round-trip with per-port v1/v2 routing,
recipient signingToken capture for Phase 2 webhook cascade, auto-
send when port.sendMode === 'auto'.
Tests: 1334 → 1340 ✅ (6 new for the detector); tsc clean.
Deferred polish (Phase 6):
- Per-field metadata side panel for DROPDOWN/RADIO option lists
- Pinch-zoom + zoom-out controls on the field-placement canvas
- Recipient drag-reorder via dnd-kit
- Required toggle per field
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -36,6 +36,13 @@ const tmpConfig = join(tmpDir, 'tsconfig.json');
|
||||
|
||||
const relFiles = files.map((f) => relative(tmpDir, resolve(cwd, f)));
|
||||
|
||||
// Pull in the project's ambient .d.ts files (css module shim,
|
||||
// react-pdf JSX augment, etc.) so side-effect imports like
|
||||
// `import 'react-pdf/dist/Page/AnnotationLayer.css'` resolve under the
|
||||
// staged-only compile. Without this, `include: []` would shut out
|
||||
// everything in src/types/ and tsc reports TS2882 for any CSS import.
|
||||
const ambientTypesGlob = relative(tmpDir, join(cwd, 'src/types')) + '/**/*.d.ts';
|
||||
|
||||
writeFileSync(
|
||||
tmpConfig,
|
||||
JSON.stringify(
|
||||
@@ -50,7 +57,7 @@ writeFileSync(
|
||||
types: ['node', 'react', 'react-dom'],
|
||||
},
|
||||
files: relFiles,
|
||||
include: [],
|
||||
include: [ambientTypesGlob],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
|
||||
50
src/app/api/v1/documents/auto-detect-fields/route.ts
Normal file
50
src/app/api/v1/documents/auto-detect-fields/route.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import { NextResponse } from 'next/server';
|
||||
|
||||
import { withAuth, withPermission } from '@/lib/api/helpers';
|
||||
import { errorResponse, ValidationError } from '@/lib/errors';
|
||||
import { detectFields } from '@/lib/services/document-field-detector';
|
||||
import { isPdfMagic } from '@/lib/services/berth-pdf-parser';
|
||||
|
||||
/**
|
||||
* Phase 4 — Auto-detect anchor scanner endpoint.
|
||||
*
|
||||
* POST `/api/v1/documents/auto-detect-fields`
|
||||
*
|
||||
* Body: multipart/form-data
|
||||
* - file: the source PDF the rep just uploaded
|
||||
*
|
||||
* Returns: `{ data: { fields: DetectedField[] } }` — seed state for the
|
||||
* drag-drop overlay. Empty array when the PDF has no extractable text
|
||||
* (image-only scan) — the dialog falls back to manual placement
|
||||
* without an error toast.
|
||||
*
|
||||
* Permission: documents.send_for_signing — the only flow that calls
|
||||
* this endpoint is the upload-for-signing dialog, which already
|
||||
* requires that bit. Reusing it here means a custom role with the
|
||||
* upload bit but no send bit can't dry-run the detector to pull
|
||||
* structural metadata out of a contract before sending.
|
||||
*/
|
||||
const MAX_PDF_BYTES = 50 * 1024 * 1024;
|
||||
|
||||
export const POST = withAuth(
|
||||
withPermission('documents', 'send_for_signing', async (req) => {
|
||||
try {
|
||||
const form = await req.formData();
|
||||
const file = form.get('file');
|
||||
if (!file || !(file instanceof File)) {
|
||||
throw new ValidationError('Missing file');
|
||||
}
|
||||
if (file.size > MAX_PDF_BYTES) {
|
||||
throw new ValidationError(`File exceeds ${MAX_PDF_BYTES / 1024 / 1024} MB cap`);
|
||||
}
|
||||
const buffer = Buffer.from(await file.arrayBuffer());
|
||||
if (!isPdfMagic(buffer)) {
|
||||
throw new ValidationError('Uploaded file is not a PDF');
|
||||
}
|
||||
const fields = await detectFields(buffer);
|
||||
return NextResponse.json({ data: { fields } });
|
||||
} catch (error) {
|
||||
return errorResponse(error);
|
||||
}
|
||||
}),
|
||||
);
|
||||
43
src/app/api/v1/documents/signing-defaults/route.ts
Normal file
43
src/app/api/v1/documents/signing-defaults/route.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
import { NextResponse } from 'next/server';
|
||||
|
||||
import { withAuth, withPermission } from '@/lib/api/helpers';
|
||||
import { errorResponse } from '@/lib/errors';
|
||||
import { getPortDocumensoConfig } from '@/lib/services/port-config';
|
||||
|
||||
/**
|
||||
* GET `/api/v1/documents/signing-defaults`
|
||||
*
|
||||
* Returns the per-port developer + approver defaults the
|
||||
* UploadForSigningDialog uses to prefill the recipient configurator.
|
||||
* No secrets are exposed — just the display name, email, and the
|
||||
* sendMode flag so the UI can show the right CTA copy ("Send now" vs
|
||||
* "Save draft, send manually").
|
||||
*
|
||||
* Permission: documents.send_for_signing — the only caller is the
|
||||
* upload-for-signing dialog which already requires this permission to
|
||||
* complete the flow.
|
||||
*/
|
||||
export const GET = withAuth(
|
||||
withPermission('documents', 'send_for_signing', async (_req, ctx) => {
|
||||
try {
|
||||
const cfg = await getPortDocumensoConfig(ctx.portId);
|
||||
return NextResponse.json({
|
||||
data: {
|
||||
developer: {
|
||||
name: cfg.developerName ?? '',
|
||||
email: cfg.developerEmail ?? '',
|
||||
label: cfg.developerLabel ?? 'Developer',
|
||||
},
|
||||
approver: {
|
||||
name: cfg.approverName ?? '',
|
||||
email: cfg.approverEmail ?? '',
|
||||
label: cfg.approverLabel ?? 'Approver',
|
||||
},
|
||||
sendMode: cfg.sendMode,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
return errorResponse(error);
|
||||
}
|
||||
}),
|
||||
);
|
||||
1057
src/components/documents/upload-for-signing-dialog.tsx
Normal file
1057
src/components/documents/upload-for-signing-dialog.tsx
Normal file
File diff suppressed because it is too large
Load Diff
@@ -20,6 +20,7 @@ import { Button } from '@/components/ui/button';
|
||||
import { Skeleton } from '@/components/ui/skeleton';
|
||||
import { ExternalEoiUploadDialog } from '@/components/interests/external-eoi-upload-dialog';
|
||||
import { SigningProgress } from '@/components/documents/signing-progress';
|
||||
import { UploadForSigningDialog } from '@/components/documents/upload-for-signing-dialog';
|
||||
import { apiFetch } from '@/lib/api/client';
|
||||
import { toastError } from '@/lib/api/toast-error';
|
||||
import { useConfirmation } from '@/hooks/use-confirmation';
|
||||
@@ -168,16 +169,16 @@ export function InterestContractTab({ interestId, clientId: _clientId }: Interes
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Upload-for-Documenso-signing dialog placeholder. The real
|
||||
dialog (PDF picker + recipient configurator + send button)
|
||||
is part of the larger custom-doc-upload service that's a
|
||||
follow-up. For now show a friendly "coming soon" card. */}
|
||||
{/* Phase 4 — upload-for-Documenso-signing dialog. Multi-step
|
||||
(file → recipients → fields → send) backed by the Phase 3
|
||||
service. Auto-detect runs after the file lands; rep can
|
||||
tweak placements before sending. */}
|
||||
{uploadForSigningOpen && (
|
||||
<ComingSoonDialog
|
||||
<UploadForSigningDialog
|
||||
open={uploadForSigningOpen}
|
||||
onOpenChange={setUploadForSigningOpen}
|
||||
title="Send contract for signing"
|
||||
body="Upload-and-send-via-Documenso for contracts is being built. For now, draft the contract externally, get it signed via paper or another tool, then upload the signed copy here."
|
||||
interestId={interestId}
|
||||
documentType="contract"
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
@@ -381,44 +382,3 @@ function StatusBadge({ status }: { status: DocumentRow['status'] }) {
|
||||
</Badge>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Placeholder for the upload-for-Documenso-signing flow until the
|
||||
* full upload + recipient + field-placement service is shipped.
|
||||
* Intentional dead-end so reps know the path exists rather than
|
||||
* misclicking and getting confusing behaviour.
|
||||
*/
|
||||
function ComingSoonDialog({
|
||||
open,
|
||||
onOpenChange,
|
||||
title,
|
||||
body,
|
||||
}: {
|
||||
open: boolean;
|
||||
onOpenChange: (next: boolean) => void;
|
||||
title: string;
|
||||
body: string;
|
||||
}) {
|
||||
if (!open) return null;
|
||||
return (
|
||||
<div
|
||||
role="dialog"
|
||||
aria-modal="true"
|
||||
className="fixed inset-0 z-50 flex items-center justify-center bg-black/40 p-4"
|
||||
onClick={() => onOpenChange(false)}
|
||||
>
|
||||
<div
|
||||
className="max-w-md rounded-lg border bg-background p-6 shadow-lg"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
<h3 className="text-base font-semibold text-foreground">{title}</h3>
|
||||
<p className="mt-2 text-sm text-muted-foreground">{body}</p>
|
||||
<div className="mt-4 flex justify-end">
|
||||
<Button onClick={() => onOpenChange(false)} size="sm" variant="outline">
|
||||
Got it
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import { Button } from '@/components/ui/button';
|
||||
import { Skeleton } from '@/components/ui/skeleton';
|
||||
import { ExternalEoiUploadDialog } from '@/components/interests/external-eoi-upload-dialog';
|
||||
import { SigningProgress } from '@/components/documents/signing-progress';
|
||||
import { UploadForSigningDialog } from '@/components/documents/upload-for-signing-dialog';
|
||||
import { apiFetch } from '@/lib/api/client';
|
||||
import { toastError } from '@/lib/api/toast-error';
|
||||
import { useConfirmation } from '@/hooks/use-confirmation';
|
||||
@@ -171,16 +172,13 @@ export function InterestReservationTab({
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Upload-for-Documenso-signing dialog placeholder. The real
|
||||
dialog (PDF picker + recipient configurator + send button)
|
||||
is part of the larger custom-doc-upload service that's a
|
||||
follow-up. For now show a friendly "coming soon" card. */}
|
||||
{/* Phase 4 — upload-for-Documenso-signing dialog. */}
|
||||
{uploadForSigningOpen && (
|
||||
<ComingSoonDialog
|
||||
<UploadForSigningDialog
|
||||
open={uploadForSigningOpen}
|
||||
onOpenChange={setUploadForSigningOpen}
|
||||
title="Send reservation for signing"
|
||||
body="Upload-and-send-via-Documenso for contracts is being built. For now, draft the reservation externally, get it signed via paper or another tool, then upload the signed copy here."
|
||||
interestId={interestId}
|
||||
documentType="reservation_agreement"
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
@@ -384,44 +382,3 @@ function StatusBadge({ status }: { status: DocumentRow['status'] }) {
|
||||
</Badge>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Placeholder for the upload-for-Documenso-signing flow until the
|
||||
* full upload + recipient + field-placement service is shipped.
|
||||
* Intentional dead-end so reps know the path exists rather than
|
||||
* misclicking and getting confusing behaviour.
|
||||
*/
|
||||
function ComingSoonDialog({
|
||||
open,
|
||||
onOpenChange,
|
||||
title,
|
||||
body,
|
||||
}: {
|
||||
open: boolean;
|
||||
onOpenChange: (next: boolean) => void;
|
||||
title: string;
|
||||
body: string;
|
||||
}) {
|
||||
if (!open) return null;
|
||||
return (
|
||||
<div
|
||||
role="dialog"
|
||||
aria-modal="true"
|
||||
className="fixed inset-0 z-50 flex items-center justify-center bg-black/40 p-4"
|
||||
onClick={() => onOpenChange(false)}
|
||||
>
|
||||
<div
|
||||
className="max-w-md rounded-lg border bg-background p-6 shadow-lg"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
<h3 className="text-base font-semibold text-foreground">{title}</h3>
|
||||
<p className="mt-2 text-sm text-muted-foreground">{body}</p>
|
||||
<div className="mt-4 flex justify-end">
|
||||
<Button onClick={() => onOpenChange(false)} size="sm" variant="outline">
|
||||
Got it
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
297
src/lib/services/document-field-detector.ts
Normal file
297
src/lib/services/document-field-detector.ts
Normal file
@@ -0,0 +1,297 @@
|
||||
/**
|
||||
* Phase 4c — Auto-detect anchor scanner.
|
||||
*
|
||||
* Scans a PDF for common signing-block keywords ("Signature:", "Date:",
|
||||
* "Initials", a long run of underscores, etc.) and proposes Documenso
|
||||
* field placements positioned right after the matched anchor. Output
|
||||
* is in PERCENT coordinates so it lines up with the existing
|
||||
* `DocumensoFieldPlacement` shape consumed by the Phase 3 service.
|
||||
*
|
||||
* Confidence calculation is conservative: an explicit keyword match
|
||||
* scores higher than a generic underscore-run; the field-type-specific
|
||||
* regexes are tried in priority order so a `"Date of Signature:"`
|
||||
* anchor doesn't double-place as both DATE and SIGNATURE.
|
||||
*
|
||||
* This is intentionally pdf-content driven (text-extraction based) —
|
||||
* the alternative (image-of-PDF + OCR) is the bigger berth-PDF parser
|
||||
* tier-3 path; we keep this lightweight so it runs in <500ms on a
|
||||
* 10-page contract.
|
||||
*/
|
||||
|
||||
import type { DocumensoFieldType } from '@/lib/services/documenso-client';
|
||||
|
||||
/** Result of detection, one entry per matched anchor. */
|
||||
export interface DetectedField {
|
||||
type: DocumensoFieldType;
|
||||
/** 1-indexed page number. */
|
||||
pageNumber: number;
|
||||
/** All four values are 0-100 percent of page dimensions. */
|
||||
pageX: number;
|
||||
pageY: number;
|
||||
pageWidth: number;
|
||||
pageHeight: number;
|
||||
/** 0..1 — how sure the scanner is. */
|
||||
confidence: number;
|
||||
/** Verbatim anchor that triggered the detection (display + debug). */
|
||||
anchorText: string;
|
||||
/** Inferred recipient label ("Buyer", "Seller", "Client", "Witness",
|
||||
* "Developer", "Notary", null). Phase 4d maps these to recipients
|
||||
* by role/name. */
|
||||
inferredRecipientLabel?: string | null;
|
||||
}
|
||||
|
||||
/** Anchor → field-type pattern table. Order matters: earlier patterns
|
||||
* win when two anchors overlap on the same text item (e.g. "Date of
|
||||
* Signature" matches both DATE and SIGNATURE — DATE goes first because
|
||||
* it's the more specific pattern). */
|
||||
interface AnchorPattern {
|
||||
type: DocumensoFieldType;
|
||||
/** Test against lower-cased anchor text. */
|
||||
match: RegExp;
|
||||
/** Suggested field box in PDF points (72 dpi). Converted to percent
|
||||
* per-page after extraction. */
|
||||
widthPt: number;
|
||||
heightPt: number;
|
||||
/** Bias added to the base confidence. Specific keywords get a bump
|
||||
* over the generic underscore catch-all. */
|
||||
confidenceBoost: number;
|
||||
}
|
||||
|
||||
const ANCHOR_PATTERNS: AnchorPattern[] = [
|
||||
// DATE — more specific than SIGNATURE for the common "Date of
|
||||
// Signature:" case, so listed first.
|
||||
{
|
||||
type: 'DATE',
|
||||
match: /(?:dated|date(?:\s+of\s+signature)?)[:\s_-]+/i,
|
||||
widthPt: 80,
|
||||
heightPt: 20,
|
||||
confidenceBoost: 0.2,
|
||||
},
|
||||
// INITIALS — pre-empts NAME because "Initial:" is short and unique.
|
||||
{
|
||||
type: 'INITIALS',
|
||||
match: /(?:^|\b)(?:initials?)[:\s_-]+/i,
|
||||
widthPt: 50,
|
||||
heightPt: 30,
|
||||
confidenceBoost: 0.2,
|
||||
},
|
||||
// EMAIL — explicit email anchor.
|
||||
{
|
||||
type: 'EMAIL',
|
||||
match: /(?:^|\b)e-?mail[:\s_-]+/i,
|
||||
widthPt: 200,
|
||||
heightPt: 20,
|
||||
confidenceBoost: 0.2,
|
||||
},
|
||||
// NAME — printed/full name labels.
|
||||
{
|
||||
type: 'NAME',
|
||||
match: /(?:^|\b)(?:printed\s*)?(?:full\s+)?name[:\s_-]+/i,
|
||||
widthPt: 150,
|
||||
heightPt: 20,
|
||||
confidenceBoost: 0.15,
|
||||
},
|
||||
// SIGNATURE — broadest of the signing-block patterns.
|
||||
{
|
||||
type: 'SIGNATURE',
|
||||
match: /(?:^|\b)(?:signature|sign\s*here|signed\s*by|signed\s*at)[:\s_-]+/i,
|
||||
widthPt: 150,
|
||||
heightPt: 30,
|
||||
confidenceBoost: 0.2,
|
||||
},
|
||||
// SIGNATURE — explicit "X" mark followed by a blank line.
|
||||
{
|
||||
type: 'SIGNATURE',
|
||||
match: /X\s*_{4,}/,
|
||||
widthPt: 150,
|
||||
heightPt: 30,
|
||||
confidenceBoost: 0.15,
|
||||
},
|
||||
// Catch-all: a run of underscores not preceded by a more specific
|
||||
// keyword (which would have matched above). Defaults to TEXT.
|
||||
{
|
||||
type: 'TEXT',
|
||||
match: /_{8,}/,
|
||||
widthPt: 200,
|
||||
heightPt: 20,
|
||||
confidenceBoost: 0,
|
||||
},
|
||||
];
|
||||
|
||||
/** Recipient labels we know how to match against. Kept in priority
|
||||
* order so "Buyer Notary" wins NOTARY (more specific than BUYER on a
|
||||
* notary-block tail). Each entry is lower-cased. */
|
||||
const RECIPIENT_LABELS: Array<{ label: string; aliases: string[] }> = [
|
||||
{ label: 'Notary', aliases: ['notary', 'witness'] },
|
||||
{ label: 'Witness', aliases: ['witness'] },
|
||||
{ label: 'Developer', aliases: ['developer', 'seller', 'vendor'] },
|
||||
{ label: 'Approver', aliases: ['approver', 'manager'] },
|
||||
{ label: 'Buyer', aliases: ['buyer', 'purchaser', 'client'] },
|
||||
{ label: 'Seller', aliases: ['seller', 'vendor'] },
|
||||
{ label: 'Client', aliases: ['client', 'customer'] },
|
||||
];
|
||||
|
||||
/** A single text item returned by pdfjs-dist. The transform array
|
||||
* encodes the position + scale of the text via PDF's affine matrix:
|
||||
* `[scaleX, skewY, skewX, scaleY, translateX, translateY]`. We use
|
||||
* `(translateX, translateY)` as the anchor's lower-left corner. */
|
||||
interface PdfTextItem {
|
||||
str: string;
|
||||
/** PDF affine [a, b, c, d, e, f]. (e, f) is position. */
|
||||
transform: number[];
|
||||
/** Item width in PDF user-space units. */
|
||||
width?: number;
|
||||
/** Item height — usually equals scaleY. */
|
||||
height?: number;
|
||||
}
|
||||
|
||||
interface PdfPageView {
|
||||
pageNumber: number;
|
||||
widthPt: number;
|
||||
heightPt: number;
|
||||
items: PdfTextItem[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect signing-block fields in a PDF. Each detection points at the
|
||||
* position immediately after the matched anchor text and is offset 5pt
|
||||
* to the right so the placeholder doesn't visually overlap the
|
||||
* keyword.
|
||||
*
|
||||
* Returns an empty array when the PDF has no extractable text (image-
|
||||
* only scans). The caller should fall back to drag-place-manual in
|
||||
* that case.
|
||||
*/
|
||||
export async function detectFields(pdfBuffer: Buffer): Promise<DetectedField[]> {
|
||||
const pages = await extractPdfPages(pdfBuffer);
|
||||
const detected: DetectedField[] = [];
|
||||
|
||||
for (const page of pages) {
|
||||
for (const item of page.items) {
|
||||
const lower = item.str.toLowerCase();
|
||||
// Skip if the item has no positional data — defensive against
|
||||
// exotic PDF encodings.
|
||||
if (!Array.isArray(item.transform) || item.transform.length < 6) continue;
|
||||
const translateX = Number(item.transform[4]);
|
||||
const translateY = Number(item.transform[5]);
|
||||
if (!Number.isFinite(translateX) || !Number.isFinite(translateY)) continue;
|
||||
|
||||
for (const pattern of ANCHOR_PATTERNS) {
|
||||
if (!pattern.match.test(lower)) continue;
|
||||
|
||||
// Place the field immediately after the anchor with a 5pt
|
||||
// horizontal offset. The anchor's width is approximate; pdfjs
|
||||
// sometimes gives a too-small width for short tokens so we
|
||||
// floor at 30pt to avoid the field landing on top of the text.
|
||||
const anchorWidthPt = Math.max(30, item.width ?? lower.length * 5);
|
||||
const fieldXPt = translateX + anchorWidthPt + 5;
|
||||
// PDF user-space origin is the lower-left; transform[5] is the
|
||||
// baseline of the text so the field's lower-left also lives
|
||||
// there. CSS/web origin is top-left — we keep the percent in
|
||||
// PDF coordinates here because Documenso accepts both (the
|
||||
// existing placeFields helper handles the conversion).
|
||||
const fieldYPt = translateY;
|
||||
|
||||
const pageX = (fieldXPt / page.widthPt) * 100;
|
||||
const pageY = (fieldYPt / page.heightPt) * 100;
|
||||
const pageWidth = (pattern.widthPt / page.widthPt) * 100;
|
||||
const pageHeight = (pattern.heightPt / page.heightPt) * 100;
|
||||
|
||||
// Hard-skip fields that would land off-page (defensive — a
|
||||
// misparsed transform can blow up the coordinate space).
|
||||
if (pageX < 0 || pageX > 95 || pageY < 0 || pageY > 95) continue;
|
||||
if (pageWidth <= 0 || pageHeight <= 0) continue;
|
||||
|
||||
const recipientLabel = inferRecipient(page.items, item, translateX, translateY);
|
||||
|
||||
detected.push({
|
||||
type: pattern.type,
|
||||
pageNumber: page.pageNumber,
|
||||
pageX,
|
||||
pageY,
|
||||
pageWidth,
|
||||
pageHeight,
|
||||
confidence: 0.5 + pattern.confidenceBoost,
|
||||
anchorText: item.str.trim(),
|
||||
inferredRecipientLabel: recipientLabel,
|
||||
});
|
||||
// First matching pattern wins for this item — earlier
|
||||
// (more-specific) patterns shadow later ones.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return detected;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk the page's other text items within ±100pt of the anchor and
|
||||
* find a recipient-label keyword. Used to seed the recipient
|
||||
* assignment side-panel; the rep can override.
|
||||
*/
|
||||
function inferRecipient(
|
||||
items: PdfTextItem[],
|
||||
anchor: PdfTextItem,
|
||||
anchorX: number,
|
||||
anchorY: number,
|
||||
): string | null {
|
||||
const RADIUS = 100;
|
||||
for (const candidate of items) {
|
||||
if (candidate === anchor) continue;
|
||||
if (!Array.isArray(candidate.transform) || candidate.transform.length < 6) continue;
|
||||
const cx = Number(candidate.transform[4]);
|
||||
const cy = Number(candidate.transform[5]);
|
||||
if (!Number.isFinite(cx) || !Number.isFinite(cy)) continue;
|
||||
if (Math.abs(cx - anchorX) > RADIUS) continue;
|
||||
if (Math.abs(cy - anchorY) > RADIUS) continue;
|
||||
const lower = candidate.str.toLowerCase();
|
||||
for (const { label, aliases } of RECIPIENT_LABELS) {
|
||||
if (aliases.some((alias) => lower.includes(alias))) return label;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract per-page text + page dimensions from a PDF buffer. Uses
|
||||
* pdfjs-dist (the same library powering react-pdf in the dialog). We
|
||||
* import it dynamically so the heavy native-bindings dep only loads
|
||||
* when the detector actually runs.
|
||||
*
|
||||
* Returns an empty array if pdfjs fails to parse — the rep gets the
|
||||
* manual placement flow without an error toast.
|
||||
*/
|
||||
export async function extractPdfPages(pdfBuffer: Buffer): Promise<PdfPageView[]> {
|
||||
try {
|
||||
// pdfjs-dist 5.x ships a legacy ESM build that works in Node + Next
|
||||
// server bundles without the worker wiring needed in the browser.
|
||||
const pdfjsLib = await import('pdfjs-dist/legacy/build/pdf.mjs');
|
||||
const data = new Uint8Array(pdfBuffer);
|
||||
const loadingTask = pdfjsLib.getDocument({ data });
|
||||
const pdf = await loadingTask.promise;
|
||||
const pages: PdfPageView[] = [];
|
||||
for (let i = 1; i <= pdf.numPages; i++) {
|
||||
const page = await pdf.getPage(i);
|
||||
const viewport = page.getViewport({ scale: 1 });
|
||||
const content = await page.getTextContent();
|
||||
const items = (content.items as Array<unknown>).filter(isPdfTextItem);
|
||||
pages.push({
|
||||
pageNumber: i,
|
||||
widthPt: viewport.width,
|
||||
heightPt: viewport.height,
|
||||
items,
|
||||
});
|
||||
}
|
||||
return pages;
|
||||
} catch {
|
||||
// Image-only scans or corrupt PDFs land here. The dialog falls
|
||||
// back to manual placement — no rep-facing error needed.
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function isPdfTextItem(item: unknown): item is PdfTextItem {
|
||||
if (!item || typeof item !== 'object') return false;
|
||||
const i = item as Record<string, unknown>;
|
||||
return typeof i.str === 'string' && Array.isArray(i.transform);
|
||||
}
|
||||
107
tests/unit/services/document-field-detector.test.ts
Normal file
107
tests/unit/services/document-field-detector.test.ts
Normal file
@@ -0,0 +1,107 @@
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
|
||||
// Mock pdfjs-dist before importing the service. The detector calls
|
||||
// `import('pdfjs-dist/legacy/build/pdf.mjs')` dynamically; we stub the
|
||||
// module with a fake document whose pages return canned text items so
|
||||
// we can assert the anchor-matching + coordinate-conversion logic
|
||||
// without needing a real PDF.
|
||||
vi.mock('pdfjs-dist/legacy/build/pdf.mjs', () => ({
|
||||
getDocument: (_opts: unknown) => ({
|
||||
promise: Promise.resolve({
|
||||
numPages: 1,
|
||||
getPage: async (_n: number) => ({
|
||||
getViewport: ({ scale: _s }: { scale: number }) => ({
|
||||
width: 595, // A4 in pt
|
||||
height: 842,
|
||||
}),
|
||||
getTextContent: async () => ({
|
||||
items: [
|
||||
// Item 0: a signature anchor near the bottom-left
|
||||
{
|
||||
str: 'Signature: ',
|
||||
transform: [1, 0, 0, 1, 50, 100],
|
||||
width: 70,
|
||||
},
|
||||
// Item 1: a date anchor next to it
|
||||
{
|
||||
str: 'Date: ',
|
||||
transform: [1, 0, 0, 1, 250, 100],
|
||||
width: 40,
|
||||
},
|
||||
// Item 2: recipient label nearby
|
||||
{
|
||||
str: 'Buyer',
|
||||
transform: [1, 0, 0, 1, 50, 130],
|
||||
width: 40,
|
||||
},
|
||||
// Item 3: unrelated body text (should not match)
|
||||
{
|
||||
str: 'The parties hereby agree…',
|
||||
transform: [1, 0, 0, 1, 50, 200],
|
||||
width: 200,
|
||||
},
|
||||
],
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
}));
|
||||
|
||||
import { detectFields } from '@/lib/services/document-field-detector';
|
||||
|
||||
describe('detectFields', () => {
|
||||
it('returns matches for known anchors with the right type + page', async () => {
|
||||
const result = await detectFields(Buffer.from('%PDF-1.7'));
|
||||
expect(result.length).toBeGreaterThanOrEqual(2);
|
||||
const sig = result.find((r) => r.type === 'SIGNATURE');
|
||||
const date = result.find((r) => r.type === 'DATE');
|
||||
expect(sig).toBeDefined();
|
||||
expect(date).toBeDefined();
|
||||
expect(sig?.pageNumber).toBe(1);
|
||||
expect(date?.pageNumber).toBe(1);
|
||||
});
|
||||
|
||||
it('infers recipient label from nearby text', async () => {
|
||||
const result = await detectFields(Buffer.from('%PDF-1.7'));
|
||||
const sig = result.find((r) => r.type === 'SIGNATURE');
|
||||
expect(sig?.inferredRecipientLabel).toBe('Buyer');
|
||||
});
|
||||
|
||||
it('returns percent coordinates in [0, 100]', async () => {
|
||||
const result = await detectFields(Buffer.from('%PDF-1.7'));
|
||||
for (const f of result) {
|
||||
expect(f.pageX).toBeGreaterThanOrEqual(0);
|
||||
expect(f.pageX).toBeLessThanOrEqual(100);
|
||||
expect(f.pageY).toBeGreaterThanOrEqual(0);
|
||||
expect(f.pageY).toBeLessThanOrEqual(100);
|
||||
expect(f.pageWidth).toBeGreaterThan(0);
|
||||
expect(f.pageHeight).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
|
||||
it('attaches the anchor text + a confidence score', async () => {
|
||||
const result = await detectFields(Buffer.from('%PDF-1.7'));
|
||||
const sig = result.find((r) => r.type === 'SIGNATURE');
|
||||
expect(sig?.anchorText).toMatch(/signature/i);
|
||||
expect(sig?.confidence).toBeGreaterThan(0.5);
|
||||
expect(sig?.confidence).toBeLessThanOrEqual(1);
|
||||
});
|
||||
|
||||
it('does not match body text that lacks a signing-block keyword', async () => {
|
||||
const result = await detectFields(Buffer.from('%PDF-1.7'));
|
||||
// The "The parties hereby agree" item should not produce a TEXT
|
||||
// detection (no underscore run, no keyword).
|
||||
expect(result.find((r) => r.anchorText?.includes('parties'))).toBeUndefined();
|
||||
});
|
||||
|
||||
it('gracefully returns [] when pdfjs throws', async () => {
|
||||
// Force pdfjs to reject for this one call
|
||||
const mod = await import('pdfjs-dist/legacy/build/pdf.mjs');
|
||||
const orig = mod.getDocument;
|
||||
(mod as unknown as { getDocument: typeof orig }).getDocument = () =>
|
||||
({ promise: Promise.reject(new Error('boom')) }) as ReturnType<typeof orig>;
|
||||
const result = await detectFields(Buffer.from('not-a-pdf'));
|
||||
expect(result).toEqual([]);
|
||||
(mod as unknown as { getDocument: typeof orig }).getDocument = orig;
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user