Skip to content

Commit 6817bcd

Browse files
Integ-tests: Add checks for EFS utils on Amazon Linux 2
We only install EFS utils on Amazon Linux 2. EFS file systems are still mounted by basic NFS. test_multiple_efs is improved to unmount all EFS and mount them again using EFS utils. Signed-off-by: Hanwen <[email protected]>
1 parent 9078f4b commit 6817bcd

File tree

3 files changed

+51
-2
lines changed

3 files changed

+51
-2
lines changed

tests/integration-tests/tests/storage/storage_common.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,10 @@ def check_fsx(
208208
assert_fsx_correctly_shared(scheduler_commands, remote_command_executor, mount_dir)
209209

210210

211+
def get_efs_ids(cluster, region):
212+
return retrieve_cfn_outputs(cluster.cfn_name, region).get("EFSIds").split(",")
213+
214+
211215
def get_fsx_ids(cluster, region):
212216
return retrieve_cfn_outputs(cluster.cfn_name, region).get("FSXIds").split(",")
213217

tests/integration-tests/tests/storage/test_efs.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@
1818
from remote_command_executor import RemoteCommandExecutor
1919
from troposphere import Ref, Template, ec2
2020
from troposphere.efs import MountTarget
21-
from utils import generate_stack_name, get_vpc_snakecase_value
21+
from utils import generate_stack_name, get_compute_nodes_instance_ips, get_vpc_snakecase_value
2222

2323
from tests.storage.storage_common import (
24+
get_efs_ids,
2425
test_efs_correctly_mounted,
2526
verify_directory_correctly_shared,
2627
write_file_into_efs,
@@ -65,10 +66,11 @@ def test_efs_same_az(region, pcluster_config_reader, clusters_factory, vpc_stack
6566
_test_efs_correctly_shared(remote_command_executor, mount_dir, scheduler_commands)
6667

6768

68-
@pytest.mark.usefixtures("scheduler", "instance")
69+
@pytest.mark.usefixtures("instance")
6970
def test_multiple_efs(
7071
os,
7172
region,
73+
scheduler,
7274
efs_stack_factory,
7375
mount_target_stack_factory,
7476
pcluster_config_reader,
@@ -130,6 +132,12 @@ def test_multiple_efs(
130132

131133
run_benchmarks(remote_command_executor, scheduler_commands)
132134

135+
if os == "alinux2" and scheduler == "slurm":
136+
logging.info("Checking EFS utils on Amazon linux 2")
137+
_test_efs_utils(
138+
remote_command_executor, scheduler_commands, cluster, region, all_mount_dirs, get_efs_ids(cluster, region)
139+
)
140+
133141

134142
def _add_mount_targets(subnet_ids, efs_ids, security_group, template):
135143
subnet_response = boto3.client("ec2").describe_subnets(SubnetIds=subnet_ids)["Subnets"]
@@ -230,3 +238,26 @@ def _assert_subnet_az_relations(region, vpc_stack, expected_in_same_az):
230238
assert_that(head_node_subnet_az).is_equal_to(compute_subnet_az)
231239
else:
232240
assert_that(head_node_subnet_az).is_not_equal_to(compute_subnet_az)
241+
242+
243+
def _test_efs_utils(remote_command_executor, scheduler_commands, cluster, region, mount_dirs, efs_ids):
244+
# Collect a list of command executors of all compute nodes
245+
compute_node_remote_command_executors = []
246+
for compute_node_ip in get_compute_nodes_instance_ips(cluster.name, region):
247+
compute_node_remote_command_executors.append(RemoteCommandExecutor(cluster, compute_node_ip=compute_node_ip))
248+
# Unmount all EFS from head node and compute nodes
249+
for mount_dir in mount_dirs:
250+
command = f"sudo umount {mount_dir}"
251+
remote_command_executor.run_remote_command(command)
252+
for compute_node_remote_command_executor in compute_node_remote_command_executors:
253+
compute_node_remote_command_executor.run_remote_command(command)
254+
# Mount all EFS using EFS-utils
255+
assert_that(mount_dirs).is_length(len(efs_ids))
256+
for mount_dir, efs_id in zip(mount_dirs, efs_ids):
257+
command = f"sudo mount -t efs -o tls {efs_id}:/ {mount_dir}"
258+
remote_command_executor.run_remote_command(command)
259+
for compute_node_remote_command_executor in compute_node_remote_command_executors:
260+
compute_node_remote_command_executor.run_remote_command(command)
261+
_test_efs_correctly_shared(remote_command_executor, mount_dir, scheduler_commands)
262+
for mount_dir in mount_dirs:
263+
test_efs_correctly_mounted(remote_command_executor, mount_dir)

tests/integration-tests/utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,20 @@ def get_cluster_nodes_instance_ids(stack_name, region, instance_types=None, node
214214
raise
215215

216216

217+
def get_compute_nodes_instance_ips(stack_name, region):
218+
"""Return a list of compute Instances Ip's."""
219+
try:
220+
instances = describe_cluster_instances(
221+
stack_name,
222+
region,
223+
filter_by_node_type="Compute",
224+
)
225+
return [instance["PrivateIpAddress"] for instance in instances]
226+
except Exception as e:
227+
logging.error("Failed retrieving instance ips for stack %s in region %s", stack_name, region)
228+
raise e
229+
230+
217231
def describe_cluster_instances(
218232
stack_name,
219233
region,

0 commit comments

Comments
 (0)