From e01d741f01101652afac31aa2715406b7752e3b7 Mon Sep 17 00:00:00 2001
From: Matt <matt@letsbe.solutions>
Date: Thu, 5 Feb 2026 15:02:22 +0100
Subject: [PATCH] Fix GPT-5 nano empty response issue with token limits

GPT-5 nano (and other GPT-5 models) use reasoning that consumes
the output token budget. When max_tokens is too low, all tokens
get used by internal reasoning, leaving nothing for the response.

- Add needsHigherTokenLimit() to detect models needing more tokens
- Add getMinTokenLimit() to ensure minimum 16k tokens for GPT-5
- Update buildCompletionParams to apply minimum token limits
- This fixes the No response from AI error with gpt-5-nano

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/lib/openai.ts | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/src/lib/openai.ts b/src/lib/openai.ts
index 2114c95..bbddb95 100644
--- a/src/lib/openai.ts
+++ b/src/lib/openai.ts
@@ -70,6 +70,30 @@ export function supportsTemperature(model: string): boolean {
   )
 }
 
+/**
+ * Check if a model requires higher token limits due to reasoning overhead.
+ * GPT-5 nano especially needs more tokens as reasoning consumes output budget.
+ */
+export function needsHigherTokenLimit(model: string): boolean {
+  const modelLower = model.toLowerCase()
+  return modelLower.includes('nano') || modelLower.includes('gpt-5')
+}
+
+/**
+ * Get minimum recommended max_tokens for a model.
+ * Reasoning models need higher limits because internal reasoning consumes tokens.
+ */
+export function getMinTokenLimit(model: string, requestedLimit?: number): number | undefined {
+  // For GPT-5 nano, reasoning uses significant token budget
+  // If user requests < 8000, bump it up or remove limit
+  if (needsHigherTokenLimit(model)) {
+    const minLimit = 16000 // Ensure enough headroom for reasoning
+    if (!requestedLimit) return undefined // No limit = model default
+    return Math.max(requestedLimit, minLimit)
+  }
+  return requestedLimit
+}
+
 // ─── Chat Completion Parameter Builder ───────────────────────────────────────
 
 type MessageRole = 'system' | 'user' | 'assistant' | 'developer'
@@ -124,11 +148,13 @@ export function buildCompletionParams(
 
   // Token limit parameter differs between model types
   // Newer models (GPT-5+, o-series) use max_completion_tokens
-  if (options.maxTokens) {
+  // Also ensure sufficient tokens for models with reasoning overhead (GPT-5 nano)
+  const effectiveMaxTokens = getMinTokenLimit(model, options.maxTokens)
+  if (effectiveMaxTokens) {
     if (usesNewTokenParam(model)) {
-      params.max_completion_tokens = options.maxTokens
+      params.max_completion_tokens = effectiveMaxTokens
     } else {
-      params.max_tokens = options.maxTokens
+      params.max_tokens = effectiveMaxTokens
     }
   }