From e01d741f01101652afac31aa2715406b7752e3b7 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 5 Feb 2026 15:02:22 +0100 Subject: [PATCH] Fix GPT-5 nano empty response issue with token limits GPT-5 nano (and other GPT-5 models) use reasoning that consumes the output token budget. When max_tokens is too low, all tokens get used by internal reasoning, leaving nothing for the response. - Add needsHigherTokenLimit() to detect models needing more tokens - Add getMinTokenLimit() to ensure minimum 16k tokens for GPT-5 - Update buildCompletionParams to apply minimum token limits - This fixes the No response from AI error with gpt-5-nano Co-Authored-By: Claude Opus 4.5 --- src/lib/openai.ts | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/src/lib/openai.ts b/src/lib/openai.ts index 2114c95..bbddb95 100644 --- a/src/lib/openai.ts +++ b/src/lib/openai.ts @@ -70,6 +70,30 @@ export function supportsTemperature(model: string): boolean { ) } +/** + * Check if a model requires higher token limits due to reasoning overhead. + * GPT-5 nano especially needs more tokens as reasoning consumes output budget. + */ +export function needsHigherTokenLimit(model: string): boolean { + const modelLower = model.toLowerCase() + return modelLower.includes('nano') || modelLower.includes('gpt-5') +} + +/** + * Get minimum recommended max_tokens for a model. + * Reasoning models need higher limits because internal reasoning consumes tokens. + */ +export function getMinTokenLimit(model: string, requestedLimit?: number): number | undefined { + // For GPT-5 nano, reasoning uses significant token budget + // If user requests < 8000, bump it up or remove limit + if (needsHigherTokenLimit(model)) { + const minLimit = 16000 // Ensure enough headroom for reasoning + if (!requestedLimit) return undefined // No limit = model default + return Math.max(requestedLimit, minLimit) + } + return requestedLimit +} + // ─── Chat Completion Parameter Builder ─────────────────────────────────────── type MessageRole = 'system' | 'user' | 'assistant' | 'developer' @@ -124,11 +148,13 @@ export function buildCompletionParams( // Token limit parameter differs between model types // Newer models (GPT-5+, o-series) use max_completion_tokens - if (options.maxTokens) { + // Also ensure sufficient tokens for models with reasoning overhead (GPT-5 nano) + const effectiveMaxTokens = getMinTokenLimit(model, options.maxTokens) + if (effectiveMaxTokens) { if (usesNewTokenParam(model)) { - params.max_completion_tokens = options.maxTokens + params.max_completion_tokens = effectiveMaxTokens } else { - params.max_tokens = options.maxTokens + params.max_tokens = effectiveMaxTokens } }