@@ -201,7 +201,7 @@ def _dump_logs(instance_id):
201201 log .warning ("Failed while dumping logs to %s with exception %s." , filename , e )
202202
203203
204- def _terminate_if_down (scheduler_module , config , instance_id , max_wait ):
204+ def _terminate_if_down (scheduler_module , config , asg_name , instance_id , max_wait ):
205205 """Check that node is correctly attached to scheduler otherwise terminate the instance."""
206206 asg_client = boto3 .client ("autoscaling" , region_name = config .region , config = config .proxy_config )
207207
@@ -219,7 +219,7 @@ def _poll_wait_for_node_ready():
219219 _dump_logs (instance_id )
220220 # jobwatcher already has the logic to request a new host in case of down nodes,
221221 # which is done in order to speed up cluster recovery.
222- _self_terminate (asg_client , instance_id , decrement_desired = True )
222+ _self_terminate (asg_client , instance_id , decrement_desired = not _maintain_size ( asg_name , asg_client ) )
223223
224224
225225@retry (
@@ -346,7 +346,7 @@ def _poll_instance_status(config, scheduler_module, asg_name, hostname, instance
346346 :param instance_type: current instance type
347347 """
348348 _wait_for_stack_ready (config .stack_name , config .region , config .proxy_config )
349- _terminate_if_down (scheduler_module , config , instance_id , INITIAL_TERMINATE_TIMEOUT )
349+ _terminate_if_down (scheduler_module , config , asg_name , instance_id , INITIAL_TERMINATE_TIMEOUT )
350350
351351 idletime = _init_idletime ()
352352 instance_properties = get_instance_properties (config .region , config .proxy_config , instance_type )
@@ -358,7 +358,7 @@ def _poll_instance_status(config, scheduler_module, asg_name, hostname, instance
358358 max_cluster_size = _refresh_cluster_properties (config .region , config .proxy_config , asg_name )
359359
360360 _store_idletime (idletime )
361- _terminate_if_down (scheduler_module , config , instance_id , TERMINATE_TIMEOUT )
361+ _terminate_if_down (scheduler_module , config , asg_name , instance_id , TERMINATE_TIMEOUT )
362362
363363 has_jobs = _has_jobs (scheduler_module , hostname )
364364 if has_jobs :
0 commit comments