@@ -567,11 +567,11 @@ def _create_slurm_sbatch_script(
567567
568568 # wait for the server to initialize
569569 health_path = cfg .deployment .get ("health_check_path" , "/health" )
570- # Only check MASTER_IP if not multiinstance, otherwise check all IPs
570+ # For multi-instance check all node IPs, for single instance check localhost
571571 if cfg .deployment .get ("multiple_instances" , False ):
572572 ip_list = '"${NODES_IPS_ARRAY[@]}"'
573573 else :
574- ip_list = '"$MASTER_IP "'
574+ ip_list = '"127.0.0.1 "'
575575 s += _get_wait_for_server_handler (
576576 ip_list ,
577577 cfg .deployment .port ,
@@ -1097,8 +1097,13 @@ def _generate_haproxy_config_with_placeholders(cfg):
10971097 """Generate HAProxy configuration with placeholder IPs using Jinja template."""
10981098 # Set up Jinja environment
10991099 template_dir = Path (__file__ ).parent
1100+ template_path = template_dir / "haproxy.cfg.template"
1101+
1102+ if not template_path .exists ():
1103+ raise FileNotFoundError (f"HAProxy template not found: { template_path } " )
1104+
11001105 env = Environment (loader = FileSystemLoader (template_dir ))
1101- template = env .get_template ("haproxy.cfg.j2 " )
1106+ template = env .get_template ("haproxy.cfg.template " )
11021107
11031108 # Prepare template data with placeholder IPs - use actual number of nodes
11041109 num_nodes = cfg .execution .num_nodes
@@ -1127,8 +1132,13 @@ def _generate_haproxy_config(cfg, nodes_ips):
11271132 """Generate HAProxy configuration using Jinja template."""
11281133 # Set up Jinja environment
11291134 template_dir = Path (__file__ ).parent
1135+ template_path = template_dir / "haproxy.cfg.template"
1136+
1137+ if not template_path .exists ():
1138+ raise FileNotFoundError (f"HAProxy template not found: { template_path } " )
1139+
11301140 env = Environment (loader = FileSystemLoader (template_dir ))
1131- template = env .get_template ("haproxy.cfg.j2 " )
1141+ template = env .get_template ("haproxy.cfg.template " )
11321142
11331143 # Prepare template data
11341144 nodes = []
@@ -1229,7 +1239,7 @@ def _get_wait_for_server_handler(
12291239
12301240
12311241def _get_proxy_server_srun_command (cfg , remote_task_subdir ):
1232- """Generate HAProxy proxy server srun command."""
1242+ """Generate HAProxy proxy server srun command using template-based config ."""
12331243 s = ""
12341244 s += "# HAProxy load balancer\n "
12351245 s += "# Copy template to config file (important for restarts)\n "
0 commit comments