Reduce AI costs: switch tagging to gpt-4o-mini, add custom base URL support

- Change AI tagging to use AI_MODELS.QUICK (gpt-4o-mini) instead of gpt-4o for 10-15x cost reduction on classification tasks - Add openai_base_url system setting for OpenAI-compatible providers (OpenRouter, Groq, Together AI, local models) - Reset OpenAI client singleton when API key, base URL, or model changes - Add base URL field to AI settings form with provider examples Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 15:34:59 +01:00
parent f12c29103c
commit 014bb15890
5 changed files with 74 additions and 4 deletions
--- a/src/components/settings/ai-settings-form.tsx
+++ b/src/components/settings/ai-settings-form.tsx
@@ -36,6 +36,7 @@ const formSchema = z.object({
  ai_model: z.string(),
  ai_send_descriptions: z.boolean(),
  openai_api_key: z.string().optional(),
  openai_base_url: z.string().optional(),
 })
 type FormValues = z.infer<typeof formSchema>
@@ -47,6 +48,7 @@ interface AISettingsFormProps {
    ai_model?: string
    ai_send_descriptions?: string
    openai_api_key?: string
    openai_base_url?: string
  }
 }
@@ -61,6 +63,7 @@ export function AISettingsForm({ settings }: AISettingsFormProps) {
      ai_model: settings.ai_model || 'gpt-4o',
      ai_send_descriptions: settings.ai_send_descriptions === 'true',
      openai_api_key: '',
      openai_base_url: settings.openai_base_url || '',
    },
  })
@@ -113,6 +116,9 @@ export function AISettingsForm({ settings }: AISettingsFormProps) {
      settingsToUpdate.push({ key: 'openai_api_key', value: data.openai_api_key })
    }
    // Save base URL (empty string clears it)
    settingsToUpdate.push({ key: 'openai_base_url', value: data.openai_base_url?.trim() || '' })
    updateSettings.mutate({ settings: settingsToUpdate })
  }
@@ -208,6 +214,27 @@ export function AISettingsForm({ settings }: AISettingsFormProps) {
          )}
        />
        <FormField
          control={form.control}
          name="openai_base_url"
          render={({ field }) => (
            <FormItem>
              <FormLabel>API Base URL (Optional)</FormLabel>
              <FormControl>
                <Input
                  placeholder="https://api.openai.com/v1"
                  {...field}
                />
              </FormControl>
              <FormDescription>
                Custom base URL for OpenAI-compatible providers. Leave blank for OpenAI.
                Use <code className="text-xs bg-muted px-1 rounded">https://openrouter.ai/api/v1</code> for OpenRouter (access Claude, Gemini, Llama, etc.)
              </FormDescription>
              <FormMessage />
            </FormItem>
          )}
        />
        <FormField
          control={form.control}
          name="ai_model"
--- a/src/components/settings/settings-content.tsx
+++ b/src/components/settings/settings-content.tsx
@@ -84,6 +84,7 @@ export function SettingsContent({ initialSettings, isSuperAdmin = true }: Settin
    'ai_model',
    'ai_send_descriptions',
    'openai_api_key',
    'openai_base_url',
  ])
  const brandingSettings = getSettingsByKeys([
--- a/src/lib/openai.ts
+++ b/src/lib/openai.ts
@@ -187,7 +187,25 @@ async function getOpenAIApiKey(): Promise<string | null> {
 }
 /**
- * Create OpenAI client instance
+ * Get custom base URL for OpenAI-compatible providers.
 * Supports OpenRouter, Together AI, Groq, local models, etc.
 * Set via Settings → AI or OPENAI_BASE_URL env var.
 */
 async function getBaseURL(): Promise<string | undefined> {
  try {
    const setting = await prisma.systemSettings.findUnique({
      where: { key: 'openai_base_url' },
    })
    return setting?.value || process.env.OPENAI_BASE_URL || undefined
  } catch {
    return process.env.OPENAI_BASE_URL || undefined
  }
 }
 /**
 * Create OpenAI client instance.
 * Supports custom baseURL for OpenAI-compatible providers
 * (OpenRouter, Groq, Together AI, local models, etc.)
 */
 async function createOpenAIClient(): Promise<OpenAI | null> {
  const apiKey = await getOpenAIApiKey()
@@ -197,8 +215,15 @@ async function createOpenAIClient(): Promise<OpenAI | null> {
    return null
  }
  const baseURL = await getBaseURL()
  if (baseURL) {
    console.log(`[OpenAI] Using custom base URL: ${baseURL}`)
  }
  return new OpenAI({
    apiKey,
    ...(baseURL ? { baseURL } : {}),
  })
 }
@@ -221,6 +246,15 @@ export async function getOpenAI(): Promise<OpenAI | null> {
  return client
 }
 /**
 * Reset the OpenAI client singleton (e.g., after settings change).
 * Next call to getOpenAI() will create a fresh client.
 */
 export function resetOpenAIClient(): void {
  globalForOpenAI.openai = undefined
  globalForOpenAI.openaiInitialized = false
 }
 /**
 * Check if OpenAI is configured and available
 */
--- a/src/server/routers/settings.ts
+++ b/src/server/routers/settings.ts
@@ -201,6 +201,12 @@ export const settingsRouter = router({
        clearStorageProviderCache()
      }
      // Reset OpenAI client if API key or base URL changed
      if (input.settings.some((s) => s.key === 'openai_api_key' || s.key === 'openai_base_url' || s.key === 'ai_model')) {
        const { resetOpenAIClient } = await import('@/lib/openai')
        resetOpenAIClient()
      }
      // Audit log
      await logAudit({
        prisma: ctx.prisma,
--- a/src/server/services/ai-tagging.ts
+++ b/src/server/services/ai-tagging.ts
@@ -16,7 +16,7 @@
 */
 import { prisma } from '@/lib/prisma'
-import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
+import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai'
 import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
 import { classifyAIError, createParseError, logAIError } from './ai-errors'
 import {
@@ -178,7 +178,8 @@ async function getAISuggestions(
    return { suggestions: [], tokensUsed: 0 }
  }
-  const model = await getConfiguredModel()
+  // Use QUICK model — tag classification is simple, doesn't need expensive reasoning
  const model = await getConfiguredModel(AI_MODELS.QUICK)
  // Build compact tag list for prompt
  const tagList = availableTags.map((t) => ({
@@ -294,7 +295,8 @@ async function getAISuggestionsBatch(
    return { suggestionsMap: new Map(), tokensUsed: 0 }
  }
-  const model = await getConfiguredModel()
+  // Use QUICK model — tag classification is simple, doesn't need expensive reasoning
  const model = await getConfiguredModel(AI_MODELS.QUICK)
  const suggestionsMap = new Map<string, TagSuggestion[]>()
  // Build compact tag list (sent once for entire batch)