diff --git a/tensorrt_llm/executor/base_worker.py b/tensorrt_llm/executor/base_worker.py index c7c6f804fbe..6810eb3cf87 100644 --- a/tensorrt_llm/executor/base_worker.py +++ b/tensorrt_llm/executor/base_worker.py @@ -432,7 +432,7 @@ def _deduce_max_tokens(request: GenerationRequest, # default_max_tokens is the biggest available value if max_tokens is None: return default_max_tokens - elif max_tokens > default_max_tokens: + elif max_tokens > default_max_tokens and default_max_tokens > 0: logger.warning( f"User-specified `max_tokens` ({max_tokens}) is greater than deduced " f"`default_max_tokens` ({default_max_tokens}), using default_max_tokens instead."