waybarrios · waybarrios · May 8, 2026 · May 2, 2026 · May 7, 2026
diff --git a/docs/guides/multimodal.md b/docs/guides/multimodal.md
@@ -162,6 +162,18 @@ curl http://localhost:8000/v1/chat/completions \
 | Local file | `{"type": "video", "video": "/path/to/video.mp4"}` |
 | Base64 | `{"type": "video_url", "video_url": {"url": "data:video/mp4;base64,..."}}` |
 
+### Remote URL safety
+
+Remote image, video, and audio URLs are checked before each fetch and redirect
+hop. URLs that resolve to localhost, link-local, private, or otherwise
+non-global addresses are rejected with a generic client error while detailed
+diagnostics stay in server logs.
+
+This validation does not pin the IP address used by the later HTTP transport
+connection. In environments where DNS rebinding or split-horizon DNS is in
+scope, run vllm-mlx behind network egress controls or fetch media through a
+trusted proxy that enforces the destination policy at connect time.
+
 ## Python API
 
 ```python

diff --git a/tests/test_mllm.py b/tests/test_mllm.py
@@ -170,6 +170,17 @@ def test_validate_url_safety_allows_public_ip(self):
 
         _validate_url_safety("https://8.8.8.8/image.jpg")
 
+    def test_unsafe_remote_url_error_has_safe_public_message(self):
+        """Public safety errors should not disclose resolved hosts or IPs."""
+        from vllm_mlx.models.mllm import UnsafeRemoteURLError, _validate_url_safety
+
+        with pytest.raises(UnsafeRemoteURLError) as exc_info:
+            _validate_url_safety("http://169.254.169.254/latest/meta-data/")
+
+        assert "169.254.169.254" in str(exc_info.value)
+        assert exc_info.value.public_message == "Remote media URL is not allowed"
+        assert "169.254.169.254" not in exc_info.value.public_message
+
     def test_request_with_safe_redirects_blocks_unsafe_redirect(self, monkeypatch):
         """Test that redirect hops are validated before a second request."""
         from vllm_mlx.models import mllm

diff --git a/tests/test_responses_api.py b/tests/test_responses_api.py
@@ -152,6 +152,50 @@ def test_responses_applies_server_default_chat_template_kwargs(self, client):
             "enable_thinking": False
         }
 
+    def test_streaming_responses_validates_remote_media_once(self, client, monkeypatch):
+        import vllm_mlx.server as srv
+
+        engine = _mock_engine()
+        engine._stream_outputs = [_stream_output("Hello there", finish_reason="stop")]
+        srv._engine = engine
+
+        validate_calls = []
+
+        def fake_validate(messages):
+            validate_calls.append(messages)
+
+        def fake_responses_to_chat(_request):
+            return srv.ChatCompletionRequest(
+                model="test-model",
+                messages=[
+                    srv.Message(
+                        role="user",
+                        content=[
+                            {"type": "text", "text": "describe this"},
+                            {
+                                "type": "image_url",
+                                "image_url": {"url": "https://example.com/image.png"},
+                            },
+                        ],
+                    )
+                ],
+                max_tokens=8,
+                stream=True,
+            )
+
+        monkeypatch.setattr(srv, "_validate_remote_media_urls", fake_validate)
+        monkeypatch.setattr(
+            srv, "_responses_request_to_chat_request", fake_responses_to_chat
+        )
+
+        resp = client.post(
+            "/v1/responses",
+            json={"model": "test-model", "input": "describe this", "stream": True},
+        )
+
+        assert resp.status_code == 200
+        assert len(validate_calls) == 1
+
     def test_responses_request_kwargs_override_server_defaults(self, client):
         import vllm_mlx.server as srv
 

diff --git a/tests/test_server.py b/tests/test_server.py
@@ -2561,6 +2561,130 @@ def fake_extract(messages, preserve_native_format=False):
         assert response.status_code == 200
         assert extract_calls["count"] == 1
 
+    def test_chat_completion_sanitizes_remote_media_safety_errors(
+        self, client, monkeypatch
+    ):
+        """Unsafe remote media URLs should return a generic public error."""
+        import vllm_mlx.server as server
+
+        class FakeEngine:
+            model_name = "fake-mllm"
+            is_mllm = True
+            preserve_native_tool_format = False
+
+            async def chat(self, messages, **kwargs):  # pragma: no cover
+                raise AssertionError("unsafe URL should fail during preparation")
+
+        async def fake_acquire(_raw_request, **_kwargs):
+            return FakeEngine()
+
+        async def fake_release(*_args, **_kwargs):
+            return None
+
+        monkeypatch.setattr(server, "_acquire_default_engine_for_request", fake_acquire)
+        monkeypatch.setattr(server, "_release_engine_for_request", fake_release)
+        monkeypatch.setattr(server, "_model_name", "test-model")
+        monkeypatch.setattr(server, "_default_timeout", 30.0)
+        monkeypatch.setattr(server, "_default_max_tokens", 128)
+        monkeypatch.setattr(server, "_api_key", None)
+        monkeypatch.setattr(
+            server,
+            "_rate_limiter",
+            server.RateLimiter(requests_per_minute=60, enabled=False),
+        )
+
+        response = client.post(
+            "/v1/chat/completions",
+            json={
+                "model": "test-model",
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "describe this"},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": "http://169.254.169.254/latest/meta-data/"
+                                },
+                            },
+                        ],
+                    }
+                ],
+                "max_tokens": 8,
+            },
+        )
+
+        assert response.status_code == 400
+        assert response.json()["detail"] == "Remote media URL is not allowed"
+        assert "169.254.169.254" not in response.text
+
+    def test_anthropic_message_sanitizes_remote_media_safety_errors(
+        self, client, monkeypatch
+    ):
+        """Anthropic preparation should sanitize URL-safety failures too."""
+        import vllm_mlx.server as server
+
+        class FakeEngine:
+            model_name = "fake-mllm"
+            is_mllm = True
+            preserve_native_tool_format = False
+
+            async def chat(self, messages, **kwargs):  # pragma: no cover
+                raise AssertionError("unsafe URL should fail during preparation")
+
+        async def fake_acquire(_raw_request, **_kwargs):
+            return FakeEngine()
+
+        async def fake_release(*_args, **_kwargs):
+            return None
+
+        def fake_anthropic_to_openai(_anthropic_request):
+            return server.ChatCompletionRequest(
+                model="test-model",
+                messages=[
+                    server.Message(
+                        role="user",
+                        content=[
+                            {"type": "text", "text": "describe this"},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": "http://169.254.169.254/latest/meta-data/"
+                                },
+                            },
+                        ],
+                    )
+                ],
+                max_tokens=8,
+            )
+
+        monkeypatch.setattr(server, "_acquire_default_engine_for_request", fake_acquire)
+        monkeypatch.setattr(server, "_release_engine_for_request", fake_release)
+        monkeypatch.setattr(server, "anthropic_to_openai", fake_anthropic_to_openai)
+        monkeypatch.setattr(server, "_model_name", "test-model")
+        monkeypatch.setattr(server, "_default_timeout", 30.0)
+        monkeypatch.setattr(server, "_default_max_tokens", 128)
+        monkeypatch.setattr(server, "_api_key", None)
+        monkeypatch.setattr(
+            server,
+            "_rate_limiter",
+            server.RateLimiter(requests_per_minute=60, enabled=False),
+        )
+
+        response = client.post(
+            "/v1/messages",
+            json={
+                "model": "test-model",
+                "messages": [{"role": "user", "content": "describe this"}],
+                "max_tokens": 8,
+            },
+        )
+
+        assert response.status_code == 400
+        assert response.json()["detail"] == "Remote media URL is not allowed"
+        assert "169.254.169.254" not in response.text
+
 
 class TestChatCompletionStreamingModeSwitching:
     """Endpoint-level regression tests for stream/non-stream mode switching."""

diff --git a/vllm_mlx/models/mllm.py b/vllm_mlx/models/mllm.py
@@ -122,7 +122,14 @@ class FileSizeExceededError(Exception):
 class UnsafeRemoteURLError(ValueError):
     """Raised when a remote media URL targets an unsafe destination."""
 
-    pass
+    def __init__(
+        self,
+        message: str,
+        *,
+        public_message: str = "Remote media URL is not allowed",
+    ) -> None:
+        super().__init__(message)
+        self.public_message = public_message
 
 
 @dataclass