Skip to content

Commit

Permalink
fix is_local bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
wnma3mz committed Jan 31, 2025
1 parent 50c6221 commit 8fd6bbf
Show file tree
Hide file tree
Showing 7 changed files with 13 additions and 12 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

1. install dependencies

- for mlx (macos arm): `pip install -e ".[mlx]" && pip install -r requirements/mlx.txt`
- for mlx (macos arm): `pip install -U -e ".[mlx]" && pip install -e git+https://github.com/wnma3mz/mlx_clip.git#egg=mlx_clip`
- for nvidia: `pip install -e ".[torch]"`

2. run server
Expand Down
3 changes: 1 addition & 2 deletions requirements/mlx.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
mlx==0.22.0
mlx-lm==0.21.1
mlx-vlm==0.1.12
-e git+https://github.com/wnma3mz/mlx_clip.git#egg=mlx_clip
mlx-vlm==0.1.12
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

# 平台特定依赖
with open(root_dir / "requirements" / "mlx.txt") as fid:
mlx_requires = [l.strip() for l in fid.readlines() if not l.startswith("-e")]
mlx_requires = [l.strip() for l in fid.readlines()]

with open(root_dir / "requirements" / "torch.txt") as fid:
torch_requires = [l.strip() for l in fid.readlines()]
Expand Down
7 changes: 4 additions & 3 deletions tllm/entrypoints/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ async def update_model_url():
for clients in host_list
]
worker_rpc_manager.update_url(host_list)
master_url = args.hostname if is_local(args.hostname) else f"{args.hostname}:{args.grpc_port}"
master_url = args.hostname if args.is_local else f"{args.hostname}:{args.grpc_port}"
await worker_rpc_manager.send_config(master_url, host_list)
# 后台持续进行健康检查,如果有节点挂掉,需要重新分配
await worker_rpc_manager.start_health_check()
Expand Down Expand Up @@ -185,7 +185,7 @@ async def init_model_func(

async def init_app(engine: AsyncEngine, args):
global app, openai_serving_chat, image_serving
logger.info("args: %s", args)
logger.info("Master Args: %s", args)
if args.is_image:
image_serving = ImageServing(engine, args)
else:
Expand Down Expand Up @@ -225,7 +225,8 @@ async def run_server(args) -> None:

uvicorn_kwargs = {"host": ["::", "0.0.0.0"], "port": args.http_port, "timeout_graceful_shutdown": 5}

if is_local(args.hostname):
args.is_local = is_local(args.hostname)
if args.is_local:
if os.path.isfile(MASTER_SOCKET_PATH):
os.remove(MASTER_SOCKET_PATH)
if os.path.isfile(CLIENT_SOCKET_PATH):
Expand Down
4 changes: 2 additions & 2 deletions tllm/entrypoints/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def start(self):
return
self.process = Process(target=self.run_server)
self.process.start()
self.logger.info(f"Started gRPC process (PID: {self.process.pid})")
# self.logger.info(f"Worker gRPC process (PID: {self.process.pid})")

def shutdown(self):
"""关闭 gRPC 服务器进程"""
Expand All @@ -148,7 +148,7 @@ def shutdown(self):
self.process.join(timeout=5)
if self.process.is_alive():
self.process.kill()
self.logger.info("gRPC process stopped")
self.logger.info("Worker gRPC process stopped")


async def serve_http(app: FastAPI, grpc_process, engine, master_server, **uvicorn_kwargs: Dict):
Expand Down
3 changes: 2 additions & 1 deletion tllm/grpc/master_service/master_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@ async def stop(self):
await self.server.stop(grace=5)
await self.server.wait_for_termination()
except (Exception, asyncio.CancelledError) as e:
print("master handler error", str(e))
self.logger.info("master handler error", str(e))

async def Forward(
self, request: schemas_pb2.ForwardRequest, context: grpc.ServicerContext
) -> schemas_pb2.ForwardResponse:
"""处理从最后一个节点返回的结果"""
self.logger.info("master handler request")
request_id = "-".join(x for x in list(request.uuid_list))

try:
Expand Down
4 changes: 2 additions & 2 deletions tllm/grpc/worker_service/worker_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ async def start(self, ip_addr_list: List[str], port: int = 50051):
schemas_pb2_grpc.add_RPCServiceServicer_to_server(self, self.server)
self.server.add_insecure_port(f"[::]:{port}")
self.server.add_insecure_port(f"unix://{CLIENT_SOCKET_PATH}")
self.logger.info(f"Starting gRPC server on [::]:{port}")
self.logger.info(f"Starting Worker gRPC server on [::]:{port}")
await self.server.start()

self.http_client.is_running = True
Expand Down Expand Up @@ -174,7 +174,7 @@ async def run(args):

client_id = f"test-{str(uuid.uuid4())[:8]}-{comm.rank}"
rpc_servicer = WorkerServer(comm, logger, args.master_addr, client_id)
logger.info("args: %s", args)
logger.info("Worker Args: %s", args)
try:
await rpc_servicer.start(ip_addr_list, args.grpc_port)
except Exception as e:
Expand Down

0 comments on commit 8fd6bbf

Please sign in to comment.