-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathdeploy_model.py
157 lines (120 loc) · 5 KB
/
deploy_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python
# $ python deploy_model.py --model_name --model_type
# $ python deploy_model.py --model_name nyutron_readmission --model_type onnx
import os
import shutil
from shutil import copytree, ignore_patterns
import argparse
from jinja2 import Environment, FileSystemLoader
import json
from helpers import get_config
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--model_name', type=str, default='nyutron_readmission', help='model name from our ModelHub')
parser.add_argument('--model_type', type=str, default='onnx', choices=['hf', 'onnx', 'trt'], help='model type/accelerator the way triton likes to name')
def build_directories(model_name, model_type):
"""
This method constructs the underlying directory structure for the production model.
Note that we ignore .bin files since we don't need to move the weights
"""
print("LOADING {0} as {1} INTO NYUTRITON".format(model_name,model_type))
#define directory paths
src_model = "./hf_models/{0}".format(args.model_name)
model_path = "./tritonmodelrepo/{0}_{1}".format(model_name,model_type)
if os.path.exists(model_path):
shutil.rmtree(model_path)
copytree(
src = src_model,
dst = model_path+"/1/hf",ignore=ignore_patterns('*.bin'))
#separately need to copy the base onnx model into another directory named model_name_base
src_onnx = "./onnx_models/{0}/model.{1}".format(model_name,model_type)
dst_base = "./tritonmodelrepo/{0}_base/1".format(model_name)
os.makedirs(dst_base, exist_ok=True)
print("created folder : ", dst_base)
shutil.copy(src = src_onnx,dst = dst_base)
print("created deployment dir at:{0}".format(dst_base))
return model_path
def render_template(template_environment, template_filename, context):
model_script = template_environment.get_template(
template_filename).render(context)
return model_script
def create_modelpyfile(model_name, model_type, model_path):
"""
Creates the model's .py file that must be named model.py and specify the nVidia
business logic for the underlying model that should include tokenizer, model, and
any other preprocessing steps.
Args:
model_name (str): model name
model_type (str): model type must be hf, onnx, trt
model_path (str): path to model directory
Returns:
None
"""
#Extract info from HF config file
model_cfg = get_config(model_name)
print("Creating a {0} model.py".format(model_cfg['architectures'][0]))
path = os.path.dirname(os.path.abspath(__file__))
template_environment = Environment(
autoescape=False,
loader=FileSystemLoader(os.path.join(path, 'templates')),
trim_blocks=False)
fname = "{0}/1/model.py".format(model_path)
context = {
'model_name': model_name,
'model_type': model_type,
'model_architecture': model_cfg['architectures'][0],
'input_name': model_cfg['input_name'],
'output_name': model_cfg['output_name'],
'max_seq_length': model_cfg["max_position_embeddings"]
}
with open(fname, 'w') as f:
model_script = render_template(
template_environment,
'mdltemplate.py',
context)
f.write(model_script)
def create_modelcfgfile(model_name, model_type, model_path):
"""
Creates the model's .pbtxt config file that must be named config.pbtxt and specify the nVidia
triton config model.
Args:
model_name (str): model name
model_type (str): model type must be hf, onnx, trt
model_path (str): path to model directory
Returns:
None
"""
#Extract info from HF config file
model_cfg = get_config(model_name)
print("Creating a {0} config.pbtxt".format(model_cfg['architectures'][0]))
path = os.path.dirname(os.path.abspath(__file__))
template_environment = Environment(
autoescape=False,
loader=FileSystemLoader(os.path.join(path, 'templates')),
trim_blocks=False)
fname = "{0}/config.pbtxt".format(model_path)
context = {
'model_name': '{0}_{1}'.format(model_name,model_type),
'input_name': model_cfg['input_name'],
'output_name': model_cfg['output_name'],
'max_batch_size': model_cfg['max_batch_size']
}
with open(fname, 'w') as f:
model_script = render_template(
template_environment,
'configtemplate.pbtxt',
context)
f.write(model_script)
########################################
if __name__ == "__main__":
args = parser.parse_args()
model_path = build_directories(
model_name = args.model_name,
model_type = args.model_type)
create_modelpyfile(
model_name = args.model_name,
model_type = args.model_type,
model_path = model_path)
create_modelcfgfile(
model_name = args.model_name,
model_type = args.model_type,
model_path = model_path)