Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/guides/multimodal.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,18 @@ curl http://localhost:8000/v1/chat/completions \
| Local file | `{"type": "video", "video": "/path/to/video.mp4"}` |
| Base64 | `{"type": "video_url", "video_url": {"url": "data:video/mp4;base64,..."}}` |

### Remote URL safety

Remote image, video, and audio URLs are checked before each fetch and redirect
hop. URLs that resolve to localhost, link-local, private, or otherwise
non-global addresses are rejected with a generic client error while detailed
diagnostics stay in server logs.

This validation does not pin the IP address used by the later HTTP transport
connection. In environments where DNS rebinding or split-horizon DNS is in
scope, run vllm-mlx behind network egress controls or fetch media through a
trusted proxy that enforces the destination policy at connect time.

## Python API

```python
Expand Down
11 changes: 11 additions & 0 deletions tests/test_mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,17 @@ def test_validate_url_safety_allows_public_ip(self):

_validate_url_safety("https://8.8.8.8/image.jpg")

def test_unsafe_remote_url_error_has_safe_public_message(self):
"""Public safety errors should not disclose resolved hosts or IPs."""
from vllm_mlx.models.mllm import UnsafeRemoteURLError, _validate_url_safety

with pytest.raises(UnsafeRemoteURLError) as exc_info:
_validate_url_safety("http://169.254.169.254/latest/meta-data/")

assert "169.254.169.254" in str(exc_info.value)
assert exc_info.value.public_message == "Remote media URL is not allowed"
assert "169.254.169.254" not in exc_info.value.public_message

def test_request_with_safe_redirects_blocks_unsafe_redirect(self, monkeypatch):
"""Test that redirect hops are validated before a second request."""
from vllm_mlx.models import mllm
Expand Down
44 changes: 44 additions & 0 deletions tests/test_responses_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,50 @@ def test_responses_applies_server_default_chat_template_kwargs(self, client):
"enable_thinking": False
}

def test_streaming_responses_validates_remote_media_once(self, client, monkeypatch):
import vllm_mlx.server as srv

engine = _mock_engine()
engine._stream_outputs = [_stream_output("Hello there", finish_reason="stop")]
srv._engine = engine

validate_calls = []

def fake_validate(messages):
validate_calls.append(messages)

def fake_responses_to_chat(_request):
return srv.ChatCompletionRequest(
model="test-model",
messages=[
srv.Message(
role="user",
content=[
{"type": "text", "text": "describe this"},
{
"type": "image_url",
"image_url": {"url": "https://example.com/image.png"},
},
],
)
],
max_tokens=8,
stream=True,
)

monkeypatch.setattr(srv, "_validate_remote_media_urls", fake_validate)
monkeypatch.setattr(
srv, "_responses_request_to_chat_request", fake_responses_to_chat
)

resp = client.post(
"/v1/responses",
json={"model": "test-model", "input": "describe this", "stream": True},
)

assert resp.status_code == 200
assert len(validate_calls) == 1

def test_responses_request_kwargs_override_server_defaults(self, client):
import vllm_mlx.server as srv

Expand Down
124 changes: 124 additions & 0 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2561,6 +2561,130 @@ def fake_extract(messages, preserve_native_format=False):
assert response.status_code == 200
assert extract_calls["count"] == 1

def test_chat_completion_sanitizes_remote_media_safety_errors(
self, client, monkeypatch
):
"""Unsafe remote media URLs should return a generic public error."""
import vllm_mlx.server as server

class FakeEngine:
model_name = "fake-mllm"
is_mllm = True
preserve_native_tool_format = False

async def chat(self, messages, **kwargs): # pragma: no cover
raise AssertionError("unsafe URL should fail during preparation")

async def fake_acquire(_raw_request, **_kwargs):
return FakeEngine()

async def fake_release(*_args, **_kwargs):
return None

monkeypatch.setattr(server, "_acquire_default_engine_for_request", fake_acquire)
monkeypatch.setattr(server, "_release_engine_for_request", fake_release)
monkeypatch.setattr(server, "_model_name", "test-model")
monkeypatch.setattr(server, "_default_timeout", 30.0)
monkeypatch.setattr(server, "_default_max_tokens", 128)
monkeypatch.setattr(server, "_api_key", None)
monkeypatch.setattr(
server,
"_rate_limiter",
server.RateLimiter(requests_per_minute=60, enabled=False),
)

response = client.post(
"/v1/chat/completions",
json={
"model": "test-model",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "describe this"},
{
"type": "image_url",
"image_url": {
"url": "http://169.254.169.254/latest/meta-data/"
},
},
],
}
],
"max_tokens": 8,
},
)

assert response.status_code == 400
assert response.json()["detail"] == "Remote media URL is not allowed"
assert "169.254.169.254" not in response.text

def test_anthropic_message_sanitizes_remote_media_safety_errors(
self, client, monkeypatch
):
"""Anthropic preparation should sanitize URL-safety failures too."""
import vllm_mlx.server as server

class FakeEngine:
model_name = "fake-mllm"
is_mllm = True
preserve_native_tool_format = False

async def chat(self, messages, **kwargs): # pragma: no cover
raise AssertionError("unsafe URL should fail during preparation")

async def fake_acquire(_raw_request, **_kwargs):
return FakeEngine()

async def fake_release(*_args, **_kwargs):
return None

def fake_anthropic_to_openai(_anthropic_request):
return server.ChatCompletionRequest(
model="test-model",
messages=[
server.Message(
role="user",
content=[
{"type": "text", "text": "describe this"},
{
"type": "image_url",
"image_url": {
"url": "http://169.254.169.254/latest/meta-data/"
},
},
],
)
],
max_tokens=8,
)

monkeypatch.setattr(server, "_acquire_default_engine_for_request", fake_acquire)
monkeypatch.setattr(server, "_release_engine_for_request", fake_release)
monkeypatch.setattr(server, "anthropic_to_openai", fake_anthropic_to_openai)
monkeypatch.setattr(server, "_model_name", "test-model")
monkeypatch.setattr(server, "_default_timeout", 30.0)
monkeypatch.setattr(server, "_default_max_tokens", 128)
monkeypatch.setattr(server, "_api_key", None)
monkeypatch.setattr(
server,
"_rate_limiter",
server.RateLimiter(requests_per_minute=60, enabled=False),
)

response = client.post(
"/v1/messages",
json={
"model": "test-model",
"messages": [{"role": "user", "content": "describe this"}],
"max_tokens": 8,
},
)

assert response.status_code == 400
assert response.json()["detail"] == "Remote media URL is not allowed"
assert "169.254.169.254" not in response.text


class TestChatCompletionStreamingModeSwitching:
"""Endpoint-level regression tests for stream/non-stream mode switching."""
Expand Down
9 changes: 8 additions & 1 deletion vllm_mlx/models/mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,14 @@ class FileSizeExceededError(Exception):
class UnsafeRemoteURLError(ValueError):
"""Raised when a remote media URL targets an unsafe destination."""

pass
def __init__(
self,
message: str,
*,
public_message: str = "Remote media URL is not allowed",
) -> None:
super().__init__(message)
self.public_message = public_message


@dataclass
Expand Down
Loading
Loading