Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/guides/multimodal.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,18 @@ curl http://localhost:8000/v1/chat/completions \
| Local file | `{"type": "video", "video": "/path/to/video.mp4"}` |
| Base64 | `{"type": "video_url", "video_url": {"url": "data:video/mp4;base64,..."}}` |

### Remote URL safety

Remote image, video, and audio URLs are checked before each fetch and redirect
hop. URLs that resolve to localhost, link-local, private, or otherwise
non-global addresses are rejected with a generic client error while detailed
diagnostics stay in server logs.

This validation does not pin the IP address used by the later HTTP transport
connection. In environments where DNS rebinding or split-horizon DNS is in
scope, run vllm-mlx behind network egress controls or fetch media through a
trusted proxy that enforces the destination policy at connect time.

## Python API

```python
Expand Down
11 changes: 11 additions & 0 deletions tests/test_mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,17 @@ def test_validate_url_safety_allows_public_ip(self):

_validate_url_safety("https://8.8.8.8/image.jpg")

def test_unsafe_remote_url_error_has_safe_public_message(self):
"""Public safety errors should not disclose resolved hosts or IPs."""
from vllm_mlx.models.mllm import UnsafeRemoteURLError, _validate_url_safety

with pytest.raises(UnsafeRemoteURLError) as exc_info:
_validate_url_safety("http://169.254.169.254/latest/meta-data/")

assert "169.254.169.254" in str(exc_info.value)
assert exc_info.value.public_message == "Remote media URL is not allowed"
assert "169.254.169.254" not in exc_info.value.public_message

def test_request_with_safe_redirects_blocks_unsafe_redirect(self, monkeypatch):
"""Test that redirect hops are validated before a second request."""
from vllm_mlx.models import mllm
Expand Down
58 changes: 58 additions & 0 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2561,6 +2561,64 @@ def fake_extract(messages, preserve_native_format=False):
assert response.status_code == 200
assert extract_calls["count"] == 1

def test_chat_completion_sanitizes_remote_media_safety_errors(
self, client, monkeypatch
):
"""Unsafe remote media URLs should return a generic public error."""
import vllm_mlx.server as server

class FakeEngine:
model_name = "fake-mllm"
is_mllm = True
preserve_native_tool_format = False

async def chat(self, messages, **kwargs): # pragma: no cover
raise AssertionError("unsafe URL should fail during preparation")

async def fake_acquire(_raw_request, **_kwargs):
return FakeEngine()

async def fake_release(*_args, **_kwargs):
return None

monkeypatch.setattr(server, "_acquire_default_engine_for_request", fake_acquire)
monkeypatch.setattr(server, "_release_engine_for_request", fake_release)
monkeypatch.setattr(server, "_model_name", "test-model")
monkeypatch.setattr(server, "_default_timeout", 30.0)
monkeypatch.setattr(server, "_default_max_tokens", 128)
monkeypatch.setattr(server, "_api_key", None)
monkeypatch.setattr(
server,
"_rate_limiter",
server.RateLimiter(requests_per_minute=60, enabled=False),
)

response = client.post(
"/v1/chat/completions",
json={
"model": "test-model",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "describe this"},
{
"type": "image_url",
"image_url": {
"url": "http://169.254.169.254/latest/meta-data/"
},
},
],
}
],
"max_tokens": 8,
},
)

assert response.status_code == 400
assert response.json()["detail"] == "Remote media URL is not allowed"
assert "169.254.169.254" not in response.text


class TestChatCompletionStreamingModeSwitching:
"""Endpoint-level regression tests for stream/non-stream mode switching."""
Expand Down
9 changes: 8 additions & 1 deletion vllm_mlx/models/mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,14 @@ class FileSizeExceededError(Exception):
class UnsafeRemoteURLError(ValueError):
"""Raised when a remote media URL targets an unsafe destination."""

pass
def __init__(
self,
message: str,
*,
public_message: str = "Remote media URL is not allowed",
) -> None:
super().__init__(message)
self.public_message = public_message


@dataclass
Expand Down
85 changes: 73 additions & 12 deletions vllm_mlx/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@
load_registry_config,
)
from .metrics import metrics as _metrics
from .models.mllm import UnsafeRemoteURLError, _validate_url_safety, is_url
from .reasoning import get_parser as get_reasoning_parser
from .tool_parsers import ToolParserManager, get_parser_stop_tokens

Expand Down Expand Up @@ -264,6 +265,8 @@ def _prepare_chat_messages(
request_messages: list[Message | dict],
) -> tuple[list[dict], list, list, list, bool]:
"""Normalize messages and collect media once for both stream/non-stream paths."""
_validate_remote_media_urls(request_messages)

is_mllm = bool(getattr(engine, "is_mllm", False))
preserve_native = bool(getattr(engine, "preserve_native_tool_format", False))

Expand Down Expand Up @@ -332,6 +335,52 @@ def _prepare_chat_messages(
return messages, images, videos, audios, has_media


def _iter_remote_media_urls(messages: list[Message | dict]):
"""Yield remote media URLs from OpenAI-style multimodal message content."""
for msg in messages:
content = msg.get("content") if isinstance(msg, dict) else msg.content
if not isinstance(content, list):
continue
for item in content:
if hasattr(item, "model_dump"):
item = item.model_dump(exclude_none=True)
elif hasattr(item, "dict"):
item = {k: v for k, v in item.dict().items() if v is not None}
if not isinstance(item, dict):
continue

item_type = item.get("type", "")
media_value = None
if item_type == "image_url":
media_value = item.get("image_url", {})
elif item_type == "video_url":
media_value = item.get("video_url", {})
elif item_type == "audio_url":
media_value = item.get("audio_url", {})
elif item_type in {"image", "video", "audio"}:
media_value = item.get(item_type, item.get("url", ""))

if isinstance(media_value, dict):
media_value = media_value.get("url", "")
if isinstance(media_value, str) and is_url(media_value):
yield media_value


def _validate_remote_media_urls(messages: list[Message | dict]) -> None:
"""Validate remote media URLs during request preparation."""
for url in _iter_remote_media_urls(messages):
_validate_url_safety(url)


def _raise_remote_media_http_error(exc: UnsafeRemoteURLError) -> None:
"""Log internal URL-safety detail while returning a generic client error."""
logger.warning(
"Blocked unsafe remote media URL: %s",
_sanitize_log_text(exc, limit=500),
)
raise HTTPException(status_code=400, detail=exc.public_message) from exc


def _prepare_json_logits_processor(
engine: BaseEngine,
messages: list[dict],
Expand Down Expand Up @@ -1994,6 +2043,8 @@ def _prepare_responses_request(
f"tools={len(request.tools)}"
)

_validate_remote_media_urls(chat_request.messages)

messages, images, videos, audios = extract_multimodal_content(
chat_request.messages,
preserve_native_format=engine.preserve_native_tool_format,
Expand Down Expand Up @@ -4448,11 +4499,15 @@ async def create_chat_completion(request: ChatCompletionRequest, raw_request: Re

release_on_exit = True
try:
prepared = _prepare_chat_completion_invocation(
engine,
request,
effective_max_tokens,
)
try:
prepared = _prepare_chat_completion_invocation(
engine,
request,
effective_max_tokens,
)
except UnsafeRemoteURLError as exc:
tracker.finish(result="client_error")
_raise_remote_media_http_error(exc)

if request.stream:
response = StreamingResponse(
Expand Down Expand Up @@ -4634,15 +4689,21 @@ def _get_engine_tokenizer(engine) -> object | None:
)
async def create_response(request: ResponsesRequest, raw_request: Request):
"""Create a Responses API response."""
if request.stream:
return StreamingResponse(
_disconnect_guard(_stream_responses_request(request), raw_request),
media_type="text/event-stream",
try:
if request.stream:
chat_request = _responses_request_to_chat_request(request)
_validate_remote_media_urls(chat_request.messages)
return StreamingResponse(
_disconnect_guard(_stream_responses_request(request), raw_request),
media_type="text/event-stream",
)

response_object, _persisted_messages = await _run_responses_request(
request, raw_request
)
except UnsafeRemoteURLError as exc:
_raise_remote_media_http_error(exc)

response_object, _persisted_messages = await _run_responses_request(
request, raw_request
)
if response_object is None:
return Response(status_code=499)

Expand Down
Loading