Skip to content

Commit dabe667

Browse files
authored
Merge pull request #46 from gkumbhat/update_tgis_adapter
📦 Update tgis adapter to 0.6.3
2 parents d2bf3b0 + 99b1574 commit dabe667

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@ classifiers = [
1515
dependencies = [
1616
"orjson>=3.10.16,<3.11",
1717
"vllm @ git+https://github.com/vllm-project/[email protected] ; sys_platform == 'darwin'",
18+
# NOTE: Currently vllm-tgis-adapter doesn't support vLLM 0.8.2, otherwise, vllm-detector-adapter
19+
# does work with higher version of vLLM
1820
"vllm>=0.7.3,<0.7.4 ; sys_platform != 'darwin'",
1921
]
2022

2123
[project.optional-dependencies]
2224
vllm-tgis-adapter = [
23-
"vllm-tgis-adapter>=0.6.2,<0.6.3"
25+
"vllm-tgis-adapter>=0.6.3,<0.6.4"
2426
]
2527

2628
## Dev Extra Sets ##

vllm_detector_adapter/start_with_tgis_adapter.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import asyncio
1414
import contextlib
1515
import importlib.util
16+
import inspect
1617
import os
1718
import traceback
1819

@@ -24,6 +25,8 @@
2425
import uvloop
2526

2627
if TYPE_CHECKING:
28+
import socket
29+
2730
from vllm.engine.async_llm_engine import AsyncLLMEngine
2831
from vllm.engine.protocol import AsyncEngineClient
2932

@@ -47,6 +50,7 @@
4750
else:
4851
# Third Party
4952
from vllm_tgis_adapter.grpc import run_grpc_server
53+
from vllm_tgis_adapter.http import build_http_server
5054
from vllm_tgis_adapter.tgis_utils.args import (
5155
EnvVarArgumentParser,
5256
add_tgis_args,
@@ -60,6 +64,7 @@
6064
async def run_http_server(
6165
args: argparse.Namespace,
6266
engine: AsyncLLMEngine | AsyncEngineClient,
67+
sock: socket.socket | None = None,
6368
**uvicorn_kwargs, # noqa: ANN003
6469
) -> None:
6570
# modified copy of vllm.entrypoints.openai.api_server.run_server that
@@ -81,6 +86,10 @@ async def run_http_server(
8186
}
8287
serve_kwargs.update(uvicorn_kwargs)
8388

89+
# should only be used in versions of vllm >= 0.7.3
90+
if "sock" in inspect.getfullargspec(serve_http).args:
91+
serve_kwargs["sock"] = sock
92+
8493
shutdown_coro = await serve_http(app, **serve_kwargs)
8594

8695
# launcher.serve_http returns a shutdown coroutine to await
@@ -94,19 +103,27 @@ async def start_servers(args: argparse.Namespace) -> None:
94103
"""
95104
loop = asyncio.get_running_loop()
96105

106+
# workaround to make sure that we bind the port before the engine is set up.
107+
# This avoids race conditions with ray.
108+
# see https://github.com/vllm-project/vllm/issues/8204
109+
sock_addr = (args.host or "", args.port)
110+
sock = api_server.create_server_socket(sock_addr)
111+
97112
tasks: list[asyncio.Task] = []
98113
async with api_server.build_async_engine_client(args) as engine:
99114
add_logging_wrappers(engine)
100115

116+
vllm_server = await build_http_server(args, engine)
117+
101118
http_server_task = loop.create_task(
102-
run_http_server(args, engine),
119+
run_http_server(args, engine, sock),
103120
name="http_server",
104121
)
105122
# The http server task will catch interrupt signals for us
106123
tasks.append(http_server_task)
107124

108125
grpc_server_task = loop.create_task(
109-
run_grpc_server(args, engine),
126+
run_grpc_server(args, engine, vllm_server),
110127
name="grpc_server",
111128
)
112129
tasks.append(grpc_server_task)

0 commit comments

Comments
 (0)