pn-new-crm/src/lib/services/ocr-providers.ts

/**
 * Receipt OCR provider adapters. Each adapter takes raw image bytes
 * and returns a normalized `ParsedReceipt` shape; callers don't care
 * which provider produced it.
 */

import OpenAI from 'openai';

import { CodedError } from '@/lib/errors';
import { logger } from '@/lib/logger';
import { fetchWithTimeout } from '@/lib/fetch-with-timeout';

const OCR_TIMEOUT_MS = 30_000;

export interface ParsedReceiptLineItem {
  description: string;
  amount: number;
}

export interface ParsedReceipt {
  establishment: string | null;
  /** ISO YYYY-MM-DD. */
  date: string | null;
  amount: number | null;
  currency: string | null;
  lineItems: ParsedReceiptLineItem[];
  /** 0..1; below 0.6 surfaces "verify mode" UI. */
  confidence: number;
}

export interface OcrUsage {
  inputTokens: number;
  outputTokens: number;
  requestId: string | null;
}

export interface OcrRunResult {
  parsed: ParsedReceipt;
  usage: OcrUsage;
}

const EMPTY_RESULT: ParsedReceipt = {
  establishment: null,
  date: null,
  amount: null,
  currency: null,
  lineItems: [],
  confidence: 0,
};

const SYSTEM_PROMPT =
  'You extract structured data from a marina-business receipt image. Return ONLY a JSON object with these keys: establishment (string), date (ISO YYYY-MM-DD), amount (number, total), currency (3-letter ISO code), lineItems (array of {description, amount}), confidence (number 0-1). If a field cannot be read, return null for that field. Set confidence near 0 if the image is unreadable, near 1 if every field was confidently extracted.';

interface RunArgs {
  imageBuffer: Buffer;
  mimeType: string;
  apiKey: string;
  model: string;
}

function safeParse(content: string): ParsedReceipt {
  const cleaned = content.replace(/```json\n?|\n?```/g, '').trim();
  try {
    const obj = JSON.parse(cleaned) as Partial<ParsedReceipt>;
    return {
      establishment: obj.establishment ?? null,
      date: obj.date ?? null,
      amount: typeof obj.amount === 'number' ? obj.amount : null,
      currency: obj.currency ?? null,
      lineItems: Array.isArray(obj.lineItems) ? obj.lineItems : [],
      confidence: typeof obj.confidence === 'number' ? obj.confidence : 0,
    };
  } catch (err) {
    logger.warn({ err, contentLen: cleaned.length }, 'OCR provider returned non-JSON');
    return EMPTY_RESULT;
  }
}

async function runOpenAi({ imageBuffer, mimeType, apiKey, model }: RunArgs): Promise<OcrRunResult> {
  // Default OpenAI client has no timeout — a hung request would hold a Bull
  // documents-worker concurrency slot until the OS reset it (~15 min). The
  // 30s cap matches the cap on the (newer) email-draft worker fetch.
  const client = new OpenAI({ apiKey, timeout: OCR_TIMEOUT_MS });
  const base64 = imageBuffer.toString('base64');
  const response = await client.chat.completions.create({
    model,
    messages: [
      { role: 'system', content: SYSTEM_PROMPT },
      {
        role: 'user',
        content: [
          { type: 'text', text: 'Extract the receipt as JSON.' },
          {
            type: 'image_url',
            image_url: { url: `data:${mimeType};base64,${base64}` },
          },
        ],
      },
    ],
    max_tokens: 1024,
    response_format: { type: 'json_object' },
  });
  const parsed = safeParse(response.choices[0]?.message?.content ?? '{}');
  return {
    parsed,
    usage: {
      inputTokens: response.usage?.prompt_tokens ?? 0,
      outputTokens: response.usage?.completion_tokens ?? 0,
      requestId: response.id ?? null,
    },
  };
}

async function runClaude({ imageBuffer, mimeType, apiKey, model }: RunArgs): Promise<OcrRunResult> {
  const base64 = imageBuffer.toString('base64');
  const res = await fetchWithTimeout('https://api.anthropic.com/v1/messages', {
    timeoutMs: OCR_TIMEOUT_MS,
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
      'x-api-key': apiKey,
      'anthropic-version': '2023-06-01',
    },
    body: JSON.stringify({
      model,
      max_tokens: 1024,
      system: SYSTEM_PROMPT,
      messages: [
        {
          role: 'user',
          content: [
            {
              type: 'image',
              source: { type: 'base64', media_type: mimeType, data: base64 },
            },
            { type: 'text', text: 'Extract the receipt as JSON.' },
          ],
        },
      ],
    }),
  });
  if (!res.ok) {
    const detail = await res.text().catch(() => '');
    throw new CodedError('OCR_UPSTREAM_ERROR', {
      internalMessage: `Claude API ${res.status}: ${detail.slice(0, 200)}`,
    });
  }
  const body = (await res.json()) as {
    id?: string;
    content?: Array<{ type: string; text?: string }>;
    usage?: { input_tokens?: number; output_tokens?: number };
  };
  const text = body.content?.find((c) => c.type === 'text')?.text ?? '{}';
  const parsed = safeParse(text);
  return {
    parsed,
    usage: {
      inputTokens: body.usage?.input_tokens ?? 0,
      outputTokens: body.usage?.output_tokens ?? 0,
      requestId: body.id ?? null,
    },
  };
}

export async function runOcr(args: {
  provider: 'openai' | 'claude';
  imageBuffer: Buffer;
  mimeType: string;
  apiKey: string;
  model: string;
}): Promise<OcrRunResult> {
  if (args.provider === 'openai') return runOpenAi(args);
  return runClaude(args);
}

/**
 * Tiny dummy-image probe used by the admin "Test connection" button.
 * Returns the raw HTTP status so callers can render plain-English errors.
 */
export async function testProvider(
  provider: 'openai' | 'claude',
  apiKey: string,
  model: string,
): Promise<{ ok: true } | { ok: false; reason: string }> {
  // 1×1 transparent PNG.
  const pixelPng = Buffer.from(
    'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=',
    'base64',
  );
  try {
    await runOcr({
      provider,
      imageBuffer: pixelPng,
      mimeType: 'image/png',
      apiKey,
      model,
    });
    return { ok: true };
  } catch (err) {
    const reason = err instanceof Error ? err.message : 'Unknown error';
    return { ok: false, reason };
  }
}

export const OCR_FEATURE = 'ocr_receipt';
export const OCR_ESTIMATED_TOKENS = 2048;