This repository has been archived by the owner on May 30, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeploy.py
50 lines (44 loc) · 1.62 KB
/
deploy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import ray
from ray import serve
import multiprocessing as mp
from augmentation.deploy import AugmentationDeployment
from preprocessing.deploy import PreprocessingDeployment
if __name__ == "__main__":
# Start Ray Serve backend
ray.init(address="auto", namespace="serve")
serve.start(detached=True, http_options={"host": "0.0.0.0", "port": 8000})
deploy_augmentation: bool = True
deploy_preprocessing: bool = True
# Explanation of `ray_actor_options`:
# -----------------------------------
# Example configuration
# ```
# ray_actor_options={
# "num_cpus": 1 if mp.cpu_count() > 2 else 0.5,
# "num_gpus": 0,
# },
# ```
# If the instance has only 1 CPU core, augmentation and preprocessing each
# use half of the CPU, i.e. 0.5. If the instance has more than 2 CPU cores,
# augmentation uses 1 CPU core, the operating system uses 1 CPU core,
# and the rest is used for preprocessing.
if deploy_augmentation:
AugmentationDeployment.options(
route_prefix="/augmentation",
num_replicas=1,
max_concurrent_queries=1000,
ray_actor_options={
"num_cpus": 1,
"num_gpus": 0,
},
).deploy(use_gpu=False)
if deploy_preprocessing:
PreprocessingDeployment.options(
route_prefix="/preprocessing",
num_replicas=mp.cpu_count() - 2 if mp.cpu_count() > 2 else 1,
max_concurrent_queries=1000,
ray_actor_options={
"num_cpus": 1,
"num_gpus": 0,
},
).deploy(use_gpu=False)