Files
pn-new-crm/src/lib/services/expense-export.ts

184 lines
5.1 KiB
TypeScript
Raw Normal View History

import { eq, and, gte, lte, isNull, or, ilike } from 'drizzle-orm';
import { db } from '@/lib/db';
import { expenses } from '@/lib/db/schema/financial';
import { generatePdf } from '@/lib/pdf/generate';
import { getRate } from '@/lib/services/currency';
import { logger } from '@/lib/logger';
import type { ListExpensesInput } from '@/lib/validators/expenses';
async function fetchAllExpenses(portId: string, query: ListExpensesInput) {
feat(expenses): streaming expense-PDF export + receipt-less expense flag + audit-3 fixes Replaces the legacy text-only expense PDF (was just dumping rows into a single pdfme text field — no images, no pagination) with a proper streaming export modelled on the legacy Nuxt client-portal but re-architected for memory safety. The legacy implementation OOM'd on hundreds of receipts because it: - buffered every receipt image into memory simultaneously - accumulated PDF chunks into an array, concat'd at end - base64-encoded the whole PDF into a JSON response (3x peak memory) - had no image downscaling The new design: - `streamExpensePdf()` (src/lib/services/expense-pdf.service.ts): pdfkit pipes bytes directly to the HTTP response (no Buffer accumulation). Receipts are processed serially so peak heap is one image at a time. Sharp downscales any receipt > 500 KB or > 1500 px to JPEG q80 — typical 8 MB phone photo collapses to ~250 KB. For a 500-receipt export, peak RSS stays under ~100 MB; legacy needed >2 GB for the same input. - Pages: cover summary box (count, totals, currency equiv, optional processing fee), grouped expense table (groupBy=none|payer|category| date), one-page-per-receipt with header (establishment, amount, date, payer, category, file name) and full-bleed image. - Storage backend abstraction — receipts stream from `getStorageBackend().get(storageKey)`, works on MinIO/S3/filesystem. - Route: POST /api/v1/expenses/export/pdf streams binary application/pdf with cache-control:no-store. Validator caps expenseIds at 1000 to prevent runaway loops. Receipt-less expense flow (per user request): - Schema: 0033 migration adds `expenses.no_receipt_acknowledged` boolean (default false). - Validator: createExpenseSchema requires either receiptFileIds OR noReceiptAcknowledged=true; the .refine() error message tells the rep exactly what to do. updateExpenseSchema is partial and skips the rule (existing rows can be edited without re-acknowledging). - PDF: receiptless expenses get an inline red "(no receipt)" tag in the establishment cell + a red footer warning in the summary box showing the count and at-risk amount. - The legacy parent-company reimbursement queue may refuse to pay receiptless expenses, so the warning is load-bearing for ops. Audit-3 fixes piggy-backed: - 🔴 Tesseract OCR runtime now races a 30s timeout (CPU-bomb DoS protection — a crafted PDF rasterizing to high-res noise could pin the worker indefinitely). - 🟠 brochures.service.ts:listBrochures dropped a wasted query (the legacy single-brochure fast-path was discarding its result on the multi-brochure branch). - 🟠 berth-pdf.service.ts:listBerthPdfVersions now Promise.all's the presignDownload calls instead of awaiting each in a for-loop — 20-version berths went from 20× round-trip to 1×. - 🟡 public berths route no longer logs the full `row` object on enum drift (was dumping price + amenity columns into ops logs). - 🟡 dropped the dead `void sql` import from public berths route. Tests still 1163/1163. tsc clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 04:38:32 +02:00
const conditions: ReturnType<typeof eq>[] = [
eq(expenses.portId, portId) as ReturnType<typeof eq>,
];
if (!query.includeArchived) {
conditions.push(isNull(expenses.archivedAt) as unknown as ReturnType<typeof eq>);
}
if (query.category) {
conditions.push(eq(expenses.category, query.category) as ReturnType<typeof eq>);
}
if (query.paymentStatus) {
conditions.push(eq(expenses.paymentStatus, query.paymentStatus) as ReturnType<typeof eq>);
}
if (query.currency) {
conditions.push(eq(expenses.currency, query.currency) as ReturnType<typeof eq>);
}
if (query.payer) {
conditions.push(eq(expenses.payer, query.payer) as ReturnType<typeof eq>);
}
if (query.dateFrom) {
feat(expenses): streaming expense-PDF export + receipt-less expense flag + audit-3 fixes Replaces the legacy text-only expense PDF (was just dumping rows into a single pdfme text field — no images, no pagination) with a proper streaming export modelled on the legacy Nuxt client-portal but re-architected for memory safety. The legacy implementation OOM'd on hundreds of receipts because it: - buffered every receipt image into memory simultaneously - accumulated PDF chunks into an array, concat'd at end - base64-encoded the whole PDF into a JSON response (3x peak memory) - had no image downscaling The new design: - `streamExpensePdf()` (src/lib/services/expense-pdf.service.ts): pdfkit pipes bytes directly to the HTTP response (no Buffer accumulation). Receipts are processed serially so peak heap is one image at a time. Sharp downscales any receipt > 500 KB or > 1500 px to JPEG q80 — typical 8 MB phone photo collapses to ~250 KB. For a 500-receipt export, peak RSS stays under ~100 MB; legacy needed >2 GB for the same input. - Pages: cover summary box (count, totals, currency equiv, optional processing fee), grouped expense table (groupBy=none|payer|category| date), one-page-per-receipt with header (establishment, amount, date, payer, category, file name) and full-bleed image. - Storage backend abstraction — receipts stream from `getStorageBackend().get(storageKey)`, works on MinIO/S3/filesystem. - Route: POST /api/v1/expenses/export/pdf streams binary application/pdf with cache-control:no-store. Validator caps expenseIds at 1000 to prevent runaway loops. Receipt-less expense flow (per user request): - Schema: 0033 migration adds `expenses.no_receipt_acknowledged` boolean (default false). - Validator: createExpenseSchema requires either receiptFileIds OR noReceiptAcknowledged=true; the .refine() error message tells the rep exactly what to do. updateExpenseSchema is partial and skips the rule (existing rows can be edited without re-acknowledging). - PDF: receiptless expenses get an inline red "(no receipt)" tag in the establishment cell + a red footer warning in the summary box showing the count and at-risk amount. - The legacy parent-company reimbursement queue may refuse to pay receiptless expenses, so the warning is load-bearing for ops. Audit-3 fixes piggy-backed: - 🔴 Tesseract OCR runtime now races a 30s timeout (CPU-bomb DoS protection — a crafted PDF rasterizing to high-res noise could pin the worker indefinitely). - 🟠 brochures.service.ts:listBrochures dropped a wasted query (the legacy single-brochure fast-path was discarding its result on the multi-brochure branch). - 🟠 berth-pdf.service.ts:listBerthPdfVersions now Promise.all's the presignDownload calls instead of awaiting each in a for-loop — 20-version berths went from 20× round-trip to 1×. - 🟡 public berths route no longer logs the full `row` object on enum drift (was dumping price + amenity columns into ops logs). - 🟡 dropped the dead `void sql` import from public berths route. Tests still 1163/1163. tsc clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 04:38:32 +02:00
conditions.push(
gte(expenses.expenseDate, new Date(query.dateFrom)) as unknown as ReturnType<typeof eq>,
);
}
if (query.dateTo) {
feat(expenses): streaming expense-PDF export + receipt-less expense flag + audit-3 fixes Replaces the legacy text-only expense PDF (was just dumping rows into a single pdfme text field — no images, no pagination) with a proper streaming export modelled on the legacy Nuxt client-portal but re-architected for memory safety. The legacy implementation OOM'd on hundreds of receipts because it: - buffered every receipt image into memory simultaneously - accumulated PDF chunks into an array, concat'd at end - base64-encoded the whole PDF into a JSON response (3x peak memory) - had no image downscaling The new design: - `streamExpensePdf()` (src/lib/services/expense-pdf.service.ts): pdfkit pipes bytes directly to the HTTP response (no Buffer accumulation). Receipts are processed serially so peak heap is one image at a time. Sharp downscales any receipt > 500 KB or > 1500 px to JPEG q80 — typical 8 MB phone photo collapses to ~250 KB. For a 500-receipt export, peak RSS stays under ~100 MB; legacy needed >2 GB for the same input. - Pages: cover summary box (count, totals, currency equiv, optional processing fee), grouped expense table (groupBy=none|payer|category| date), one-page-per-receipt with header (establishment, amount, date, payer, category, file name) and full-bleed image. - Storage backend abstraction — receipts stream from `getStorageBackend().get(storageKey)`, works on MinIO/S3/filesystem. - Route: POST /api/v1/expenses/export/pdf streams binary application/pdf with cache-control:no-store. Validator caps expenseIds at 1000 to prevent runaway loops. Receipt-less expense flow (per user request): - Schema: 0033 migration adds `expenses.no_receipt_acknowledged` boolean (default false). - Validator: createExpenseSchema requires either receiptFileIds OR noReceiptAcknowledged=true; the .refine() error message tells the rep exactly what to do. updateExpenseSchema is partial and skips the rule (existing rows can be edited without re-acknowledging). - PDF: receiptless expenses get an inline red "(no receipt)" tag in the establishment cell + a red footer warning in the summary box showing the count and at-risk amount. - The legacy parent-company reimbursement queue may refuse to pay receiptless expenses, so the warning is load-bearing for ops. Audit-3 fixes piggy-backed: - 🔴 Tesseract OCR runtime now races a 30s timeout (CPU-bomb DoS protection — a crafted PDF rasterizing to high-res noise could pin the worker indefinitely). - 🟠 brochures.service.ts:listBrochures dropped a wasted query (the legacy single-brochure fast-path was discarding its result on the multi-brochure branch). - 🟠 berth-pdf.service.ts:listBerthPdfVersions now Promise.all's the presignDownload calls instead of awaiting each in a for-loop — 20-version berths went from 20× round-trip to 1×. - 🟡 public berths route no longer logs the full `row` object on enum drift (was dumping price + amenity columns into ops logs). - 🟡 dropped the dead `void sql` import from public berths route. Tests still 1163/1163. tsc clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 04:38:32 +02:00
conditions.push(
lte(expenses.expenseDate, new Date(query.dateTo)) as unknown as ReturnType<typeof eq>,
);
}
if (query.search) {
conditions.push(
or(
ilike(expenses.establishmentName, `%${query.search}%`),
ilike(expenses.description, `%${query.search}%`),
) as unknown as ReturnType<typeof eq>,
);
}
return db
.select()
.from(expenses)
.where(and(...conditions));
}
export async function exportCsv(portId: string, query: ListExpensesInput): Promise<string> {
const rows = await fetchAllExpenses(portId, query);
const headers = [
'Date',
'Establishment',
'Category',
'Amount',
'Currency',
'Amount USD',
'Payment Status',
'Payment Method',
'Description',
];
const csvRows = rows.map((r) => {
const date = r.expenseDate ? new Date(r.expenseDate).toISOString().split('T')[0] : '';
return [
date,
r.establishmentName ?? '',
r.category ?? '',
r.amount,
r.currency,
r.amountUsd ?? 'N/A',
r.paymentStatus ?? '',
r.paymentMethod ?? '',
(r.description ?? '').replace(/"/g, '""'),
]
.map((v) => `"${v}"`)
.join(',');
});
return [headers.join(','), ...csvRows].join('\n');
}
feat(expenses): streaming expense-PDF export + receipt-less expense flag + audit-3 fixes Replaces the legacy text-only expense PDF (was just dumping rows into a single pdfme text field — no images, no pagination) with a proper streaming export modelled on the legacy Nuxt client-portal but re-architected for memory safety. The legacy implementation OOM'd on hundreds of receipts because it: - buffered every receipt image into memory simultaneously - accumulated PDF chunks into an array, concat'd at end - base64-encoded the whole PDF into a JSON response (3x peak memory) - had no image downscaling The new design: - `streamExpensePdf()` (src/lib/services/expense-pdf.service.ts): pdfkit pipes bytes directly to the HTTP response (no Buffer accumulation). Receipts are processed serially so peak heap is one image at a time. Sharp downscales any receipt > 500 KB or > 1500 px to JPEG q80 — typical 8 MB phone photo collapses to ~250 KB. For a 500-receipt export, peak RSS stays under ~100 MB; legacy needed >2 GB for the same input. - Pages: cover summary box (count, totals, currency equiv, optional processing fee), grouped expense table (groupBy=none|payer|category| date), one-page-per-receipt with header (establishment, amount, date, payer, category, file name) and full-bleed image. - Storage backend abstraction — receipts stream from `getStorageBackend().get(storageKey)`, works on MinIO/S3/filesystem. - Route: POST /api/v1/expenses/export/pdf streams binary application/pdf with cache-control:no-store. Validator caps expenseIds at 1000 to prevent runaway loops. Receipt-less expense flow (per user request): - Schema: 0033 migration adds `expenses.no_receipt_acknowledged` boolean (default false). - Validator: createExpenseSchema requires either receiptFileIds OR noReceiptAcknowledged=true; the .refine() error message tells the rep exactly what to do. updateExpenseSchema is partial and skips the rule (existing rows can be edited without re-acknowledging). - PDF: receiptless expenses get an inline red "(no receipt)" tag in the establishment cell + a red footer warning in the summary box showing the count and at-risk amount. - The legacy parent-company reimbursement queue may refuse to pay receiptless expenses, so the warning is load-bearing for ops. Audit-3 fixes piggy-backed: - 🔴 Tesseract OCR runtime now races a 30s timeout (CPU-bomb DoS protection — a crafted PDF rasterizing to high-res noise could pin the worker indefinitely). - 🟠 brochures.service.ts:listBrochures dropped a wasted query (the legacy single-brochure fast-path was discarding its result on the multi-brochure branch). - 🟠 berth-pdf.service.ts:listBerthPdfVersions now Promise.all's the presignDownload calls instead of awaiting each in a for-loop — 20-version berths went from 20× round-trip to 1×. - 🟡 public berths route no longer logs the full `row` object on enum drift (was dumping price + amenity columns into ops logs). - 🟡 dropped the dead `void sql` import from public berths route. Tests still 1163/1163. tsc clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 04:38:32 +02:00
/**
* Legacy text-only PDF export superseded by the streaming
* `streamExpensePdf` in `src/lib/services/expense-pdf.service.ts`.
* The new service supports receipt-image embedding, sharp resize for
* stupidly-large attachments, and streaming output so hundreds of
* expenses no longer OOM the process.
*
* See `src/app/api/v1/expenses/export/pdf/route.ts` for the live route.
*/
export async function exportParentCompany(
portId: string,
query: ListExpensesInput,
): Promise<Uint8Array> {
// BR-043: Convert all amounts to EUR, add 5% management fee
const rows = await fetchAllExpenses(portId, query);
const eurRate = await getRate('USD', 'EUR');
if (!eurRate) {
logger.warn('EUR rate unavailable for parent company export, using 1:1 fallback');
}
const rate = eurRate ?? 1;
const convertedRows = rows.map((r) => {
const amountUsd = r.amountUsd ? Number(r.amountUsd) : Number(r.amount);
const amountEur = Number((amountUsd * rate).toFixed(2));
return {
date: r.expenseDate ? new Date(r.expenseDate).toISOString().split('T')[0] : '',
establishment: r.establishmentName ?? '-',
category: r.category ?? '-',
amountEur,
};
});
const subtotal = convertedRows.reduce((sum, r) => sum + r.amountEur, 0);
const fee = Number((subtotal * 0.05).toFixed(2));
const total = Number((subtotal + fee).toFixed(2));
const template = {
basePdf: { width: 210, height: 297, padding: [10, 10, 10, 10] },
schemas: [
[
{
name: 'title',
type: 'text',
position: { x: 10, y: 10 },
width: 190,
height: 10,
fontSize: 14,
fontColor: '#000000',
},
{
name: 'content',
type: 'text',
position: { x: 10, y: 25 },
width: 190,
height: 230,
fontSize: 8,
fontColor: '#000000',
},
{
name: 'summary',
type: 'text',
position: { x: 10, y: 260 },
width: 190,
height: 30,
fontSize: 10,
fontColor: '#000000',
},
],
],
};
const lines = convertedRows.map(
(r) => `${r.date} | ${r.establishment} | ${r.category} | EUR ${r.amountEur.toFixed(2)}`,
);
const summary = [
`Subtotal: EUR ${subtotal.toFixed(2)}`,
`Management Fee (5%): EUR ${fee.toFixed(2)}`,
`Total: EUR ${total.toFixed(2)}`,
].join('\n');
const inputs = [
{
title: 'Parent Company Expense Report (EUR)',
content: lines.join('\n'),
summary,
},
];
return generatePdf(template as unknown as Parameters<typeof generatePdf>[0], inputs);
}