4
4
5
5
6
6
import json
7
+ import re
7
8
import shlex
8
9
import threading
9
10
from datetime import datetime , timedelta
@@ -732,28 +733,18 @@ def _create_deployment(
732
733
f"Aqua model deployment { deployment_id } created for model { aqua_model_id } . Work request Id is { deployment .dsc_model_deployment .workflow_req_id } "
733
734
)
734
735
status_list = []
735
- progress_thread_1 = threading .Thread (
736
- target = deployment .watch ,
737
- args = (status_list ),
738
- daemon = True ,
739
- )
740
- progress_thread_1 .start ()
741
736
742
- progress_thread_2 = threading .Thread (
737
+ progress_thread = threading .Thread (
743
738
target = self .get_deployment_status ,
744
739
args = (
745
740
deployment_id ,
746
741
deployment .dsc_model_deployment .workflow_req_id ,
747
742
model_type ,
748
743
model_name ,
749
- status_list ,
750
744
),
751
745
daemon = True ,
752
746
)
753
- progress_thread_2 .start ()
754
-
755
- progress_thread_1 .join ()
756
- progress_thread_2 .join ()
747
+ progress_thread .start ()
757
748
758
749
# we arbitrarily choose last 8 characters of OCID to identify MD in telemetry
759
750
telemetry_kwargs = {"ocid" : get_ocid_substring (deployment_id , key_len = 8 )}
@@ -1245,11 +1236,11 @@ def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
1245
1236
1246
1237
def get_deployment_status (
1247
1238
self ,
1239
+ deployment ,
1248
1240
model_deployment_id : str ,
1249
1241
work_request_id : str ,
1250
1242
model_type : str ,
1251
1243
model_name : str ,
1252
- status_list : List [str ] = [],
1253
1244
) -> None :
1254
1245
"""Waits for the data science model deployment to be completed and log its status in telemetry.
1255
1246
@@ -1262,17 +1253,14 @@ def get_deployment_status(
1262
1253
The work request Id of the model deployment.
1263
1254
model_type: str
1264
1255
The type of aqua model to be deployed. Allowed values are: `custom`, `service` and `multi_model`.
1265
- status_list: List[str]
1266
- The list of status frmo streams the access and/or predict logs of model deployment.
1267
1256
1268
1257
Returns
1269
1258
-------
1270
1259
AquaDeployment
1271
1260
An Aqua deployment instance.
1272
1261
"""
1273
-
1274
1262
ocid = get_ocid_substring (model_deployment_id , key_len = 8 )
1275
- telemetry_kwargs = { "ocid" : ocid , "model_name" : model_name }
1263
+ status_list : List [ str ] = []
1276
1264
1277
1265
data_science_work_request : DataScienceWorkRequest = DataScienceWorkRequest (
1278
1266
work_request_id
@@ -1284,25 +1272,104 @@ def get_deployment_status(
1284
1272
max_wait_time = DEFAULT_WAIT_TIME ,
1285
1273
poll_interval = DEFAULT_POLL_INTERVAL ,
1286
1274
)
1287
- except Exception :
1275
+ predict_logs = deployment .tail_logs ("predict" )
1276
+ access_logs = deployment .tail_logs ("access" )
1277
+
1278
+ status = ""
1279
+ if access_logs and len (access_logs ) > 0 :
1280
+ print ("access log list ############################" )
1281
+ print (access_logs )
1282
+ status = access_logs [0 ]["message" ]
1283
+
1284
+ if predict_logs and len (predict_logs ) > 0 :
1285
+ print ("predict_logs ############################" )
1286
+ print (predict_logs )
1287
+ status += predict_logs [0 ]["message" ]
1288
+
1289
+ status = re .sub (r"[^a-zA-Z0-9]" , "" , status )
1290
+ telemetry_kwargs = {
1291
+ "ocid" : ocid ,
1292
+ "model_name" : model_name ,
1293
+ "status" : status ,
1294
+ }
1295
+ print (telemetry_kwargs )
1296
+ print ("############################" )
1297
+
1298
+ self .telemetry .record_event (
1299
+ category = f"aqua/{ model_type } /deployment/status" ,
1300
+ action = "LAST_LOG" ,
1301
+ # detail=error_str,
1302
+ ** telemetry_kwargs ,
1303
+ )
1304
+
1305
+ except Exception as e :
1288
1306
if data_science_work_request ._error_message :
1289
1307
error_str = ""
1290
1308
for error in data_science_work_request ._error_message :
1291
1309
error_str = error_str + " " + error .message
1292
1310
1293
1311
status = ""
1294
- if len (status_list ) > 0 :
1295
- status = status_list [- 1 ]
1296
-
1297
- telemetry_kwargs ["status" ] = status
1312
+ predict_logs = deployment .tail_logs ("predict" )
1313
+ access_logs = deployment .tail_logs ("access" )
1314
+ if access_logs and len (access_logs ) > 0 :
1315
+ print (access_logs )
1316
+ status = access_logs [0 ]["message" ]
1317
+
1318
+ if predict_logs and len (predict_logs ) > 0 :
1319
+ print ("predict_logs ############################" )
1320
+ print (predict_logs )
1321
+ status += predict_logs [0 ]["message" ]
1322
+ status = re .sub (r"[^a-zA-Z0-9]" , "" , status )
1323
+ error_str = re .sub (r"[^a-zA-Z0-9]" , "" , error_str )
1324
+ telemetry_kwargs = {
1325
+ "ocid" : ocid ,
1326
+ "model_name" : model_name ,
1327
+ "status" : error_str + " " + status ,
1328
+ }
1329
+ print (telemetry_kwargs )
1330
+ print ("############################" )
1298
1331
1299
1332
self .telemetry .record_event (
1300
1333
category = f"aqua/{ model_type } /deployment/status" ,
1301
1334
action = "FAILED" ,
1302
1335
detail = error_str ,
1303
1336
** telemetry_kwargs ,
1304
1337
)
1338
+ else :
1339
+ print (str (e ))
1340
+ status = str (e )
1341
+ predict_logs = deployment .tail_logs ("predict" )
1342
+ access_logs = deployment .tail_logs ("access" )
1343
+ if access_logs and len (access_logs ) > 0 :
1344
+ print ("access log list ############################" )
1345
+ print (access_logs )
1346
+ status = access_logs [0 ]["message" ]
1347
+
1348
+ if predict_logs and len (predict_logs ) > 0 :
1349
+ print ("predict_logs ############################" )
1350
+ print (predict_logs )
1351
+ status += predict_logs [0 ]["message" ]
1352
+
1353
+ status = re .sub (r"[^a-zA-Z0-9]" , "" , status )
1354
+ error_str = re .sub (r"[^a-zA-Z0-9]" , "" , error_str )
1355
+
1356
+ telemetry_kwargs = {
1357
+ "ocid" : ocid ,
1358
+ "model_name" : model_name ,
1359
+ "status" : error_str + " " + status ,
1360
+ }
1361
+ print (telemetry_kwargs )
1362
+ print ("############################" )
1363
+
1364
+ self .telemetry .record_event (
1365
+ category = f"aqua/{ model_type } /deployment/status" ,
1366
+ action = "FAILED" ,
1367
+ # detail=error_str,
1368
+ ** telemetry_kwargs ,
1369
+ )
1370
+
1305
1371
else :
1372
+ telemetry_kwargs = {"ocid" : ocid , "model_name" : model_name }
1306
1373
self .telemetry .record_event_async (
1307
1374
category = f"aqua/{ model_type } /deployment/status" ,
1308
1375
action = "SUCCEEDED" ,
0 commit comments