diff --git a/src/lib/services/berth-pdf-parser.ts b/src/lib/services/berth-pdf-parser.ts
index b64a6567..c28a53b2 100644
--- a/src/lib/services/berth-pdf-parser.ts
+++ b/src/lib/services/berth-pdf-parser.ts
@@ -178,49 +178,52 @@ async function tryAcroForm(buffer: Buffer): Promise<ParseResult | null> {
   };
 }
 
-// ─── tier 2: OCR via Tesseract ───────────────────────────────────────────────
+// ─── tier 2: text extraction via unpdf ────────────────────────────────────────
 
 /**
- * Runs Tesseract against a PDF rasterized to one image per page. Tesseract.js
- * accepts image inputs; we use a lazy `pdfjs-dist`-style rasterization fallback
- * via dynamic import. To keep the parser unit-testable without a WASM bundle,
- * the actual recognize() call is encapsulated in the `runOcr` adapter that
- * production wires to tesseract.js and tests can stub.
+ * Tier-2 extracts text directly from the PDF via `unpdf` (a serverless-
+ * friendly pdfjs wrapper). This works for text-PDFs — i.e. PDFs that
+ * contain real text streams, not scanned page images. Scanned/raster PDFs
+ * land here with empty extracted text and fall through to the AI tier.
+ *
+ * The earlier design called for tesseract.js rasterization, but
+ * `tesseract.recognize` doesn't accept a PDF buffer — it expects an image.
+ * That old code path silently failed at runtime; unpdf is the correct
+ * primitive for "pull text out of a PDF on the server."
+ *
+ * The `recognize` adapter shape is retained for backward compatibility with
+ * the test suite and the `parseAnyBerthPdf(buffer, { adapter })` override.
  */
 export interface OcrAdapter {
-  /** Returns plain text + a 0..100 mean confidence score. */
+  /** Returns plain text + a 0..1 mean confidence score (mapped to 0..100 below). */
   recognize(buffer: Buffer): Promise<{ text: string; confidence: number }>;
 }
 
-/** Hard cap on Tesseract OCR runtime. A crafted PDF rasterizing to
- *  high-resolution noise can pin the process indefinitely (CPU bomb).
- *  30 seconds covers the legitimate single-page-spec case by a wide
- *  margin while bounding the worst-case worker hold-time. The AI
- *  fallback tier handles cases where OCR couldn't finish. */
+/** Hard cap on tier-2 runtime. A crafted PDF could pathologically slow
+ *  pdfjs parsing; 30s covers any reasonable berth-spec by orders of
+ *  magnitude while bounding the worst-case worker hold-time. */
 const OCR_TIMEOUT_MS = 30_000;
 
-/** Default adapter — dynamically imports tesseract.js so the WASM bundle isn't
- *  pulled into client builds. */
 async function defaultOcrAdapter(): Promise<OcrAdapter> {
   return {
     recognize: async (buffer: Buffer) => {
-      const tesseract = await import('tesseract.js');
-      // Race the OCR against a timeout so a runaway recognition can't
-      // hold the worker forever. The race-loser pattern doesn't
-      // actually cancel Tesseract (no AbortController support), but it
-      // does free the awaiter so the caller can fall through to AI.
+      const { extractText } = await import('unpdf');
       let timeoutHandle: NodeJS.Timeout | undefined;
       const timeout = new Promise<{ text: string; confidence: number }>((_, reject) => {
         timeoutHandle = setTimeout(
-          () => reject(new Error(`Tesseract OCR exceeded ${OCR_TIMEOUT_MS}ms timeout`)),
+          () => reject(new Error(`unpdf text extraction exceeded ${OCR_TIMEOUT_MS}ms timeout`)),
           OCR_TIMEOUT_MS,
         );
       });
       try {
+        // unpdf accepts a Uint8Array; Buffer is a Uint8Array subtype.
         const result = await Promise.race([
-          tesseract.recognize(buffer, 'eng').then((r) => ({
-            text: r.data.text ?? '',
-            confidence: typeof r.data.confidence === 'number' ? r.data.confidence : 0,
+          extractText(new Uint8Array(buffer), { mergePages: true }).then((r) => ({
+            text: r.text ?? '',
+            // unpdf yields high-fidelity text; we mark it as 0..1 -> 0.9
+            // confidence when non-empty so per-field regex confidence
+            // (which is already calibrated) wins out over the floor.
+            confidence: r.text && r.text.trim().length > 0 ? 90 : 0,
           })),
           timeout,
         ]);