Files
pn-new-crm/src/lib/services/ocr-providers.ts
Matt 221ae5784e chore(autonomous-session): consolidate uncommitted work from prior session
Bundles the prior autonomous-session output that was sitting unstaged:

- Em-dash sweep across src/ + tests/ (en-dash/em-dash to hyphen, ~2280 instances)
- country-flag-icons rollout (CountryFlag component, replaces emoji glyphs that
  never rendered on Windows; lazy-loads the 3x2 SVG index as a single chunk
  after the per-subpath dynamic-import approach silently failed in webpack)
- Admin IA Phase 1+2: 7-domain regroup, 41 to 38 pages, /admin/berths index,
  redirects (ocr to ai, reports to dashboard, invitations to users),
  docs/admin-ia-proposal.md
- Per-template email tester (registry + endpoint + UI on Email admin page)
- Cancel-document mode picker (delete-from-Documenso vs keep-for-audit)
- Dashboard PDF report: 25 widgets, SVG charts, date-range picker, 11 resolvers
- Customize-widgets per-region sortables at xl+ (charts/rails/feed); single
  flat sortable below xl when the layout stacks; per-viewport saved orders
- Audit doc updates capturing each shipped item
- Lint fixes: react-compiler immutability in DonutChart (reduce instead of
  let-reassign), set-state-in-effect disables in CountryFlag and
  UploadForSigning preview-bytes effect, unused 'confirm' destructures in
  interest contract + reservation tabs, unescaped apostrophe in test-template
  card copy
2026-05-23 00:52:59 +02:00

207 lines
6.0 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Receipt OCR provider adapters. Each adapter takes raw image bytes
* and returns a normalized `ParsedReceipt` shape; callers don't care
* which provider produced it.
*/
import OpenAI from 'openai';
import { CodedError } from '@/lib/errors';
import { logger } from '@/lib/logger';
import { fetchWithTimeout } from '@/lib/fetch-with-timeout';
const OCR_TIMEOUT_MS = 30_000;
export interface ParsedReceiptLineItem {
description: string;
amount: number;
}
export interface ParsedReceipt {
establishment: string | null;
/** ISO YYYY-MM-DD. */
date: string | null;
amount: number | null;
currency: string | null;
lineItems: ParsedReceiptLineItem[];
/** 0..1; below 0.6 surfaces "verify mode" UI. */
confidence: number;
}
export interface OcrUsage {
inputTokens: number;
outputTokens: number;
requestId: string | null;
}
export interface OcrRunResult {
parsed: ParsedReceipt;
usage: OcrUsage;
}
const EMPTY_RESULT: ParsedReceipt = {
establishment: null,
date: null,
amount: null,
currency: null,
lineItems: [],
confidence: 0,
};
const SYSTEM_PROMPT =
'You extract structured data from a marina-business receipt image. Return ONLY a JSON object with these keys: establishment (string), date (ISO YYYY-MM-DD), amount (number, total), currency (3-letter ISO code), lineItems (array of {description, amount}), confidence (number 0-1). If a field cannot be read, return null for that field. Set confidence near 0 if the image is unreadable, near 1 if every field was confidently extracted.';
interface RunArgs {
imageBuffer: Buffer;
mimeType: string;
apiKey: string;
model: string;
}
function safeParse(content: string): ParsedReceipt {
const cleaned = content.replace(/```json\n?|\n?```/g, '').trim();
try {
const obj = JSON.parse(cleaned) as Partial<ParsedReceipt>;
return {
establishment: obj.establishment ?? null,
date: obj.date ?? null,
amount: typeof obj.amount === 'number' ? obj.amount : null,
currency: obj.currency ?? null,
lineItems: Array.isArray(obj.lineItems) ? obj.lineItems : [],
confidence: typeof obj.confidence === 'number' ? obj.confidence : 0,
};
} catch (err) {
logger.warn({ err, contentLen: cleaned.length }, 'OCR provider returned non-JSON');
return EMPTY_RESULT;
}
}
async function runOpenAi({ imageBuffer, mimeType, apiKey, model }: RunArgs): Promise<OcrRunResult> {
// Default OpenAI client has no timeout - a hung request would hold a Bull
// documents-worker concurrency slot until the OS reset it (~15 min). The
// 30s cap matches the cap on the (newer) email-draft worker fetch.
const client = new OpenAI({ apiKey, timeout: OCR_TIMEOUT_MS });
const base64 = imageBuffer.toString('base64');
const response = await client.chat.completions.create({
model,
messages: [
{ role: 'system', content: SYSTEM_PROMPT },
{
role: 'user',
content: [
{ type: 'text', text: 'Extract the receipt as JSON.' },
{
type: 'image_url',
image_url: { url: `data:${mimeType};base64,${base64}` },
},
],
},
],
max_tokens: 1024,
response_format: { type: 'json_object' },
});
const parsed = safeParse(response.choices[0]?.message?.content ?? '{}');
return {
parsed,
usage: {
inputTokens: response.usage?.prompt_tokens ?? 0,
outputTokens: response.usage?.completion_tokens ?? 0,
requestId: response.id ?? null,
},
};
}
async function runClaude({ imageBuffer, mimeType, apiKey, model }: RunArgs): Promise<OcrRunResult> {
const base64 = imageBuffer.toString('base64');
const res = await fetchWithTimeout('https://api.anthropic.com/v1/messages', {
timeoutMs: OCR_TIMEOUT_MS,
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': apiKey,
'anthropic-version': '2023-06-01',
},
body: JSON.stringify({
model,
max_tokens: 1024,
system: SYSTEM_PROMPT,
messages: [
{
role: 'user',
content: [
{
type: 'image',
source: { type: 'base64', media_type: mimeType, data: base64 },
},
{ type: 'text', text: 'Extract the receipt as JSON.' },
],
},
],
}),
});
if (!res.ok) {
const detail = await res.text().catch(() => '');
throw new CodedError('OCR_UPSTREAM_ERROR', {
internalMessage: `Claude API ${res.status}: ${detail.slice(0, 200)}`,
});
}
const body = (await res.json()) as {
id?: string;
content?: Array<{ type: string; text?: string }>;
usage?: { input_tokens?: number; output_tokens?: number };
};
const text = body.content?.find((c) => c.type === 'text')?.text ?? '{}';
const parsed = safeParse(text);
return {
parsed,
usage: {
inputTokens: body.usage?.input_tokens ?? 0,
outputTokens: body.usage?.output_tokens ?? 0,
requestId: body.id ?? null,
},
};
}
export async function runOcr(args: {
provider: 'openai' | 'claude';
imageBuffer: Buffer;
mimeType: string;
apiKey: string;
model: string;
}): Promise<OcrRunResult> {
if (args.provider === 'openai') return runOpenAi(args);
return runClaude(args);
}
/**
* Tiny dummy-image probe used by the admin "Test connection" button.
* Returns the raw HTTP status so callers can render plain-English errors.
*/
export async function testProvider(
provider: 'openai' | 'claude',
apiKey: string,
model: string,
): Promise<{ ok: true } | { ok: false; reason: string }> {
// 1×1 transparent PNG.
const pixelPng = Buffer.from(
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=',
'base64',
);
try {
await runOcr({
provider,
imageBuffer: pixelPng,
mimeType: 'image/png',
apiKey,
model,
});
return { ok: true };
} catch (err) {
const reason = err instanceof Error ? err.message : 'Unknown error';
return { ok: false, reason };
}
}
export const OCR_FEATURE = 'ocr_receipt';
export const OCR_ESTIMATED_TOKENS = 2048;