Fix GPT-5 nano empty response issue with token limits
Build and Push Docker Image / build (push) Successful in 9m36s
Details
Build and Push Docker Image / build (push) Successful in 9m36s
Details
GPT-5 nano (and other GPT-5 models) use reasoning that consumes the output token budget. When max_tokens is too low, all tokens get used by internal reasoning, leaving nothing for the response. - Add needsHigherTokenLimit() to detect models needing more tokens - Add getMinTokenLimit() to ensure minimum 16k tokens for GPT-5 - Update buildCompletionParams to apply minimum token limits - This fixes the No response from AI error with gpt-5-nano Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
bf187e4b9a
commit
e01d741f01
|
|
@ -70,6 +70,30 @@ export function supportsTemperature(model: string): boolean {
|
|||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a model requires higher token limits due to reasoning overhead.
|
||||
* GPT-5 nano especially needs more tokens as reasoning consumes output budget.
|
||||
*/
|
||||
export function needsHigherTokenLimit(model: string): boolean {
|
||||
const modelLower = model.toLowerCase()
|
||||
return modelLower.includes('nano') || modelLower.includes('gpt-5')
|
||||
}
|
||||
|
||||
/**
|
||||
* Get minimum recommended max_tokens for a model.
|
||||
* Reasoning models need higher limits because internal reasoning consumes tokens.
|
||||
*/
|
||||
export function getMinTokenLimit(model: string, requestedLimit?: number): number | undefined {
|
||||
// For GPT-5 nano, reasoning uses significant token budget
|
||||
// If user requests < 8000, bump it up or remove limit
|
||||
if (needsHigherTokenLimit(model)) {
|
||||
const minLimit = 16000 // Ensure enough headroom for reasoning
|
||||
if (!requestedLimit) return undefined // No limit = model default
|
||||
return Math.max(requestedLimit, minLimit)
|
||||
}
|
||||
return requestedLimit
|
||||
}
|
||||
|
||||
// ─── Chat Completion Parameter Builder ───────────────────────────────────────
|
||||
|
||||
type MessageRole = 'system' | 'user' | 'assistant' | 'developer'
|
||||
|
|
@ -124,11 +148,13 @@ export function buildCompletionParams(
|
|||
|
||||
// Token limit parameter differs between model types
|
||||
// Newer models (GPT-5+, o-series) use max_completion_tokens
|
||||
if (options.maxTokens) {
|
||||
// Also ensure sufficient tokens for models with reasoning overhead (GPT-5 nano)
|
||||
const effectiveMaxTokens = getMinTokenLimit(model, options.maxTokens)
|
||||
if (effectiveMaxTokens) {
|
||||
if (usesNewTokenParam(model)) {
|
||||
params.max_completion_tokens = options.maxTokens
|
||||
params.max_completion_tokens = effectiveMaxTokens
|
||||
} else {
|
||||
params.max_tokens = options.maxTokens
|
||||
params.max_tokens = effectiveMaxTokens
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue