11# Standard
22from argparse import Namespace
3+ from http import HTTPStatus
34import inspect
45import signal
56
67# Third Party
78from fastapi import Request
9+ from fastapi .exceptions import RequestValidationError
810from fastapi .responses import JSONResponse
911from starlette .datastructures import State
1012from vllm .config import ModelConfig
1416from vllm .entrypoints .logger import RequestLogger
1517from vllm .entrypoints .openai import api_server
1618from vllm .entrypoints .openai .cli_args import make_arg_parser , validate_parsed_serve_args
17- from vllm .entrypoints .openai .protocol import ErrorResponse
19+ from vllm .entrypoints .openai .protocol import ErrorInfo , ErrorResponse
1820from vllm .entrypoints .openai .serving_models import BaseModelPath , OpenAIServingModels
1921from vllm .entrypoints .openai .tool_parsers import ToolParserManager
2022from vllm .utils import FlexibleArgumentParser , is_valid_ipv6_address , set_ulimit
4143 # Third Party
4244 from vllm .reasoning import ReasoningParserManager
4345
46+
4447TIMEOUT_KEEP_ALIVE = 5 # seconds
4548
4649# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765)
@@ -162,6 +165,37 @@ def signal_handler(*_) -> None:
162165 # Use vllm build_app which adds middleware
163166 app = api_server .build_app (args )
164167
168+ # Override exception handler to flatten errors for detectors API
169+ @app .exception_handler (RequestValidationError )
170+ async def validation_exception_handler (
171+ request : Request , exc : RequestValidationError
172+ ):
173+ exc_str = str (exc )
174+ errors_str = str (exc .errors ())
175+ message = None
176+ if exc .errors () and errors_str and errors_str != exc_str :
177+ message = f"{ exc_str } { errors_str } "
178+ else :
179+ message = exc_str
180+
181+ error_info = ErrorInfo (
182+ message = message ,
183+ type = HTTPStatus .BAD_REQUEST .phrase ,
184+ code = HTTPStatus .BAD_REQUEST ,
185+ )
186+
187+ if request .url .path .startswith ("/api/v1/text" ):
188+ # Flatten detectors API request validation errors
189+ return JSONResponse (
190+ content = error_info .model_dump (), status_code = HTTPStatus .BAD_REQUEST
191+ )
192+ else :
193+ # vLLM general request validation error handling
194+ err = ErrorResponse (error = error_info )
195+ return JSONResponse (
196+ content = err .model_dump (), status_code = HTTPStatus .BAD_REQUEST
197+ )
198+
165199 # api_server.init_app_state takes vllm_config
166200 # ref. https://github.com/vllm-project/vllm/pull/16572
167201 if hasattr (engine_client , "get_vllm_config" ):
@@ -213,9 +247,9 @@ async def create_chat_detection(request: ChatDetectionRequest, raw_request: Requ
213247 detector_response = await chat_detection (raw_request ).chat (request , raw_request )
214248
215249 if isinstance (detector_response , ErrorResponse ):
216- # ErrorResponse includes code and message, corresponding to errors for the detectorAPI
217250 return JSONResponse (
218- content = detector_response .model_dump (), status_code = detector_response .code
251+ content = detector_response .error .model_dump (),
252+ status_code = detector_response .error .code ,
219253 )
220254
221255 elif isinstance (detector_response , DetectionResponse ):
@@ -235,9 +269,9 @@ async def create_context_doc_detection(
235269 )
236270
237271 if isinstance (detector_response , ErrorResponse ):
238- # ErrorResponse includes code and message, corresponding to errors for the detectorAPI
239272 return JSONResponse (
240- content = detector_response .model_dump (), status_code = detector_response .code
273+ content = detector_response .error .model_dump (),
274+ status_code = detector_response .error .code ,
241275 )
242276
243277 elif isinstance (detector_response , DetectionResponse ):
@@ -256,9 +290,9 @@ async def create_contents_detection(
256290 request , raw_request
257291 )
258292 if isinstance (detector_response , ErrorResponse ):
259- # ErrorResponse includes code and message, corresponding to errors for the detectorAPI
260293 return JSONResponse (
261- content = detector_response .model_dump (), status_code = detector_response .code
294+ content = detector_response .error .model_dump (),
295+ status_code = detector_response .error .code ,
262296 )
263297
264298 elif isinstance (detector_response , ContentsDetectionResponse ):
@@ -277,9 +311,9 @@ async def create_generation_detection(
277311 request , raw_request
278312 )
279313 if isinstance (detector_response , ErrorResponse ):
280- # ErrorResponse includes code and message, corresponding to errors for the detectorAPI
281314 return JSONResponse (
282- content = detector_response .model_dump (), status_code = detector_response .code
315+ content = detector_response .error .model_dump (),
316+ status_code = detector_response .error .code ,
283317 )
284318
285319 elif isinstance (detector_response , DetectionResponse ):
0 commit comments