The mobile receipt scanner now runs Tesseract.js in-browser by default — on-device, free, and image bytes never leave the device. AI providers (OpenAI / Claude) become a per-port opt-in for higher accuracy on hard-to-read receipts. - Lazy-load Tesseract WASM in src/lib/ocr/tesseract-client.ts (5 MB bundle dynamic-imports on first scan, not in main chunk) - Heuristic parser src/lib/ocr/parse-receipt-text.ts extracts vendor, date, amount, currency, and line items from raw OCR text - New port-scoped aiEnabled flag on OcrConfig (defaults false). Resolved flag never inherits from the global row — each port admin opts in independently - Scan endpoint short-circuits to manual-mode when aiEnabled=false so the AI provider is never invoked unless the admin has flipped the switch - Scan UI runs Tesseract first, then asks the server whether AI is enabled — uses the AI result only when its confidence beats Tesseract; network failures degrade gracefully to the local parse - Admin OCR-settings form gains the per-port aiEnabled checkbox Tests: 756/756 vitest (was 747) — +7 parser unit tests, +2 aiEnabled config tests. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
178 lines
5.6 KiB
TypeScript
178 lines
5.6 KiB
TypeScript
/**
|
|
* OCR provider config — stored in `system_settings` under the key
|
|
* `ocr.config`. Each port can either have its own row (port_id = port.id)
|
|
* or opt into the global row (port_id = null) by setting `useGlobal: true`.
|
|
*/
|
|
|
|
import { and, eq, isNull } from 'drizzle-orm';
|
|
|
|
import { db } from '@/lib/db';
|
|
import { systemSettings } from '@/lib/db/schema/system';
|
|
import { encrypt, decrypt } from '@/lib/utils/encryption';
|
|
|
|
export type OcrProvider = 'openai' | 'claude';
|
|
|
|
export const OCR_MODELS: Record<OcrProvider, string[]> = {
|
|
openai: ['gpt-4o-mini', 'gpt-4o', 'gpt-4-turbo'],
|
|
claude: ['claude-haiku-4-5', 'claude-sonnet-4-6', 'claude-opus-4-7'],
|
|
};
|
|
|
|
export const DEFAULT_MODEL: Record<OcrProvider, string> = {
|
|
openai: 'gpt-4o-mini',
|
|
claude: 'claude-haiku-4-5',
|
|
};
|
|
|
|
/** Public shape that admin UIs read — never includes the raw key. */
|
|
export interface OcrConfigPublic {
|
|
provider: OcrProvider;
|
|
model: string;
|
|
/** True when an encrypted key is present. We never echo the key itself. */
|
|
hasApiKey: boolean;
|
|
/** Port-level rows can opt into the global config. */
|
|
useGlobal: boolean;
|
|
/**
|
|
* AI receipt parsing is opt-in per port. When false (the default),
|
|
* the scanner uses the in-browser Tesseract.js engine and the AI
|
|
* provider is never called even if a key is configured.
|
|
*/
|
|
aiEnabled: boolean;
|
|
}
|
|
|
|
/** Internal shape including the decrypted key — server-side only. */
|
|
export interface OcrConfigResolved extends OcrConfigPublic {
|
|
apiKey: string | null;
|
|
/** Source of the resolved row: 'port' | 'global' | 'none'. */
|
|
source: 'port' | 'global' | 'none';
|
|
}
|
|
|
|
interface StoredOcrConfig {
|
|
provider: OcrProvider;
|
|
model: string;
|
|
apiKeyEncrypted: string | null;
|
|
useGlobal: boolean;
|
|
aiEnabled?: boolean;
|
|
}
|
|
|
|
const KEY = 'ocr.config';
|
|
|
|
async function readRow(portId: string | null): Promise<StoredOcrConfig | null> {
|
|
const where =
|
|
portId === null
|
|
? and(eq(systemSettings.key, KEY), isNull(systemSettings.portId))
|
|
: and(eq(systemSettings.key, KEY), eq(systemSettings.portId, portId));
|
|
const [row] = await db.select().from(systemSettings).where(where);
|
|
if (!row) return null;
|
|
return row.value as unknown as StoredOcrConfig;
|
|
}
|
|
|
|
async function writeRow(portId: string | null, value: StoredOcrConfig, userId: string) {
|
|
// upsert: delete + insert keeps logic simple given the (key, port_id) unique index.
|
|
await db
|
|
.delete(systemSettings)
|
|
.where(
|
|
portId === null
|
|
? and(eq(systemSettings.key, KEY), isNull(systemSettings.portId))
|
|
: and(eq(systemSettings.key, KEY), eq(systemSettings.portId, portId)),
|
|
);
|
|
await db.insert(systemSettings).values({
|
|
key: KEY,
|
|
portId,
|
|
value: value as unknown as Record<string, unknown>,
|
|
updatedBy: userId,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Resolve the active OCR config for a port: port row (unless `useGlobal`),
|
|
* falling back to the global row, falling back to a default-empty config.
|
|
*/
|
|
export async function getResolvedOcrConfig(portId: string): Promise<OcrConfigResolved> {
|
|
const portRow = await readRow(portId);
|
|
const useGlobal = portRow?.useGlobal === true || !portRow;
|
|
const sourceRow = useGlobal ? await readRow(null) : portRow;
|
|
if (!sourceRow) {
|
|
return {
|
|
provider: 'openai',
|
|
model: DEFAULT_MODEL.openai,
|
|
apiKey: null,
|
|
hasApiKey: false,
|
|
useGlobal: portRow?.useGlobal === true,
|
|
aiEnabled: false,
|
|
source: 'none',
|
|
};
|
|
}
|
|
// The aiEnabled flag is per-port: even if the port falls back to a global
|
|
// key, the port admin still has to flip the switch on this port.
|
|
const aiEnabled = portRow?.aiEnabled === true;
|
|
return {
|
|
provider: sourceRow.provider,
|
|
model: sourceRow.model,
|
|
apiKey: sourceRow.apiKeyEncrypted ? decrypt(sourceRow.apiKeyEncrypted) : null,
|
|
hasApiKey: Boolean(sourceRow.apiKeyEncrypted),
|
|
useGlobal: portRow?.useGlobal === true,
|
|
aiEnabled,
|
|
source: useGlobal ? 'global' : 'port',
|
|
};
|
|
}
|
|
|
|
/** Public-safe view for the admin UI — same shape but never the key. */
|
|
export async function getPublicOcrConfig(portId: string | null): Promise<OcrConfigPublic> {
|
|
const row = await readRow(portId);
|
|
if (!row) {
|
|
return {
|
|
provider: 'openai',
|
|
model: DEFAULT_MODEL.openai,
|
|
hasApiKey: false,
|
|
useGlobal: false,
|
|
aiEnabled: false,
|
|
};
|
|
}
|
|
return {
|
|
provider: row.provider,
|
|
model: row.model,
|
|
hasApiKey: Boolean(row.apiKeyEncrypted),
|
|
useGlobal: row.useGlobal,
|
|
aiEnabled: row.aiEnabled === true,
|
|
};
|
|
}
|
|
|
|
export interface SaveOcrConfigInput {
|
|
provider: OcrProvider;
|
|
model: string;
|
|
/** When provided, replaces any stored key. When undefined, the existing key is preserved. */
|
|
apiKey?: string;
|
|
/** When true, clears the stored key. */
|
|
clearApiKey?: boolean;
|
|
useGlobal?: boolean;
|
|
/** Per-port toggle: enable AI receipt parsing. Defaults to false. */
|
|
aiEnabled?: boolean;
|
|
}
|
|
|
|
export async function saveOcrConfig(
|
|
portId: string | null,
|
|
input: SaveOcrConfigInput,
|
|
userId: string,
|
|
): Promise<void> {
|
|
const existing = await readRow(portId);
|
|
let apiKeyEncrypted = existing?.apiKeyEncrypted ?? null;
|
|
if (input.clearApiKey) {
|
|
apiKeyEncrypted = null;
|
|
} else if (input.apiKey !== undefined && input.apiKey.length > 0) {
|
|
apiKeyEncrypted = encrypt(input.apiKey);
|
|
}
|
|
// AI is meaningful only at the port scope. Preserve the existing flag if the
|
|
// caller didn't pass one (so toggling provider/model doesn't re-disable AI).
|
|
const aiEnabled = portId === null ? false : (input.aiEnabled ?? existing?.aiEnabled ?? false);
|
|
await writeRow(
|
|
portId,
|
|
{
|
|
provider: input.provider,
|
|
model: input.model,
|
|
apiKeyEncrypted,
|
|
useGlobal: portId === null ? false : Boolean(input.useGlobal),
|
|
aiEnabled,
|
|
},
|
|
userId,
|
|
);
|
|
}
|