src/app/api/v1/expenses/scan-receipt/route.ts

import { NextResponse } from 'next/server';

import { withAuth, withPermission, withRateLimit } from '@/lib/api/helpers';
import { errorResponse, ValidationError } from '@/lib/errors';
import { logger } from '@/lib/logger';
import { getResolvedOcrConfig } from '@/lib/services/ocr-config.service';
import {
  runOcr,
  type ParsedReceipt,
  OCR_FEATURE,
  OCR_ESTIMATED_TOKENS,
} from '@/lib/services/ocr-providers';
import { checkBudget, recordAiUsage } from '@/lib/services/ai-budget.service';

const EMPTY: ParsedReceipt = {
  establishment: null,
  date: null,
  amount: null,
  currency: null,
  lineItems: [],
  confidence: 0,
};

export const POST = withAuth(
  withPermission(
    'expenses',
    'create',
    withRateLimit('ocr', async (req, ctx) => {
      try {
        const formData = await req.formData();
        const file = formData.get('file') as File | null;
        if (!file) throw new ValidationError('A file is required');
        // Hard 10 MB cap — without this any authenticated rep could grief
        // their own port's AI budget by sending arbitrarily large images
        // and burning OCR tokens (auditor-E3 §28).
        const MAX_OCR_BYTES = 10 * 1024 * 1024;
        if (file.size > MAX_OCR_BYTES) {
          throw new ValidationError('Receipt image is too large (10 MB max).');
        }
        const buffer = Buffer.from(await file.arrayBuffer());
        const mimeType = file.type || 'image/jpeg';
        // Magic-byte gate so a forged Content-Type doesn't reach the OCR
        // provider with arbitrary bytes.
        const { bufferMatchesMime } = await import('@/lib/constants/file-validation');
        const allowedOcrMimes = ['image/jpeg', 'image/png', 'image/webp'];
        if (!allowedOcrMimes.includes(mimeType) || !bufferMatchesMime(buffer, mimeType)) {
          throw new ValidationError('Unsupported receipt image type.');
        }

        const config = await getResolvedOcrConfig(ctx.portId);
        // Tesseract.js (in-browser) is the default. The server only invokes
        // an AI provider when (a) the port admin has flipped `aiEnabled` on
        // and (b) a key resolves. Otherwise the client falls back to its
        // local Tesseract result.
        if (!config.aiEnabled) {
          return NextResponse.json({
            data: { parsed: EMPTY, source: 'manual', reason: 'ai-disabled' },
          });
        }
        if (!config.apiKey) {
          return NextResponse.json({
            data: { parsed: EMPTY, source: 'manual', reason: 'no-ocr-configured' },
          });
        }

        // Per-port budget gate - refuse the call before we spend tokens
        // when the port has already hit its hard cap, or when the request
        // would push it past the cap. Soft-cap warnings ride along on the
        // success response so the UI can show a banner without blocking.
        const budget = await checkBudget({
          portId: ctx.portId,
          estimatedTokens: OCR_ESTIMATED_TOKENS,
        });
        if (!budget.ok) {
          return NextResponse.json({
            data: {
              parsed: EMPTY,
              source: 'manual',
              reason: 'budget-exceeded',
              providerError: `AI budget reached (${budget.usedTokens}/${budget.capTokens} tokens this period).`,
            },
          });
        }

        try {
          const result = await runOcr({
            provider: config.provider,
            model: config.model,
            apiKey: config.apiKey,
            imageBuffer: buffer,
            mimeType,
          });
          await recordAiUsage({
            portId: ctx.portId,
            userId: ctx.userId,
            feature: OCR_FEATURE,
            provider: config.provider,
            model: config.model,
            inputTokens: result.usage.inputTokens,
            outputTokens: result.usage.outputTokens,
            requestId: result.usage.requestId,
          });
          return NextResponse.json({
            data: {
              parsed: result.parsed,
              source: 'ai',
              provider: config.provider,
              model: config.model,
              softCapWarning: budget.softCap,
            },
          });
        } catch (err) {
          logger.error({ err, provider: config.provider }, 'OCR provider call failed');
          // Provider hiccup - degrade to manual entry rather than 500-ing.
          return NextResponse.json({
            data: {
              parsed: EMPTY,
              source: 'manual',
              reason: 'provider-error',
              providerError: err instanceof Error ? err.message.slice(0, 200) : 'Unknown error',
            },
          });
        }
      } catch (error) {
        return errorResponse(error);
      }
    }),
  ),
);