Skip to content

Commit e8f3c5d

Browse files
authored
3.x Update integration test to use normalized directory name for CUDA rather than versioned directory name. (aws#4331)
* Fix upload_artifacts.sh shell script to work with both BSD and Posix style sed options * Reference CUDA directory in integration test through normalized linked name
1 parent 9f04907 commit e8f3c5d

File tree

2 files changed

+12
-8
lines changed

2 files changed

+12
-8
lines changed

scheduler_plugins/slurm/utils/upload_artifacts.sh

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ function usage {
1919
exit 1
2020
}
2121

22+
BUILD_OS=$(uname)
23+
if [ "${BUILD_OS}" != "Linux" ]; then
24+
SED_BAK_EXT="''"
25+
fi
26+
2227
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
2328
cd "${SCRIPT_DIR}/.."
2429

@@ -74,18 +79,18 @@ PLUGIN_ARTIFACTS_CHECKSUM=$(shasum --algorithm 256 "${PLUGIN_ARTIFACTS_ARCHIVE}"
7479
PLUGIN_DEFINITION_S3_URL="s3://${S3_BUCKET}${S3_BUCKET_PREFIX}/plugin_definition.yaml"
7580
GENERATED_PLUGIN_DEFINITION_PATH="${TMP}/plugin_template_plugin_definition.yaml"
7681
cp plugin_definition.yaml ${GENERATED_PLUGIN_DEFINITION_PATH}
77-
sed -i "s|<TEMPLATE_CHECKSUM>|${ADDITIONAL_CLUSTER_INFRASTRUCTURE_CHECKSUM}|g" ${GENERATED_PLUGIN_DEFINITION_PATH}
78-
sed -i "s|<ARTIFACTS_CHECKSUM>|${PLUGIN_ARTIFACTS_CHECKSUM}|g" ${GENERATED_PLUGIN_DEFINITION_PATH}
79-
sed -i "s|<BUCKET>|${S3_BUCKET}${S3_BUCKET_PREFIX}|g" ${GENERATED_PLUGIN_DEFINITION_PATH}
82+
sed -i $SED_BAK_EXT "s|<TEMPLATE_CHECKSUM>|${ADDITIONAL_CLUSTER_INFRASTRUCTURE_CHECKSUM}|g" ${GENERATED_PLUGIN_DEFINITION_PATH}
83+
sed -i $SED_BAK_EXT "s|<ARTIFACTS_CHECKSUM>|${PLUGIN_ARTIFACTS_CHECKSUM}|g" ${GENERATED_PLUGIN_DEFINITION_PATH}
84+
sed -i $SED_BAK_EXT "s|<BUCKET>|${S3_BUCKET}${S3_BUCKET_PREFIX}|g" ${GENERATED_PLUGIN_DEFINITION_PATH}
8085
echo "Generated plugin definition:" && cat ${GENERATED_PLUGIN_DEFINITION_PATH}
8186
echo "Uploading plugin_definition to ${PLUGIN_DEFINITION_S3_URL}"
8287
aws s3 cp --region "${AWS_REGION}" "${GENERATED_PLUGIN_DEFINITION_PATH}" "${PLUGIN_DEFINITION_S3_URL}"
8388

8489
GENERATED_CONFIG_PATH="${TMP}/slurm_plugin_cluster_config.yaml"
8590
cp examples/cluster_configuration.yaml ${GENERATED_CONFIG_PATH}
86-
sed -i "s|<PLUGIN_DEFINITION>|${PLUGIN_DEFINITION_S3_URL}|g" ${GENERATED_CONFIG_PATH}
91+
sed -i $SED_BAK_EXT "s|<PLUGIN_DEFINITION>|${PLUGIN_DEFINITION_S3_URL}|g" ${GENERATED_CONFIG_PATH}
8792
PLUGIN_DEFINITION_CHECKSUM=$(shasum --algorithm 256 "${GENERATED_PLUGIN_DEFINITION_PATH}" | cut -d' ' -f1)
88-
sed -i "s|<PLUGIN_DEFINITION_CHECKSUM>|${PLUGIN_DEFINITION_CHECKSUM}|g" ${GENERATED_CONFIG_PATH}
93+
sed -i $SED_BAK_EXT "s|<PLUGIN_DEFINITION_CHECKSUM>|${PLUGIN_DEFINITION_CHECKSUM}|g" ${GENERATED_CONFIG_PATH}
8994

9095
echo "Generated test cluster configuration in ${GENERATED_CONFIG_PATH}:"
9196
cat ${GENERATED_CONFIG_PATH}

tests/integration-tests/tests/efa/test_efa/test_efa/nccl_benchmarks/init_nccl_benchmarks.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ module load ${1}
77
NCCL_BENCHMARKS_VERSION='2.10.0'
88
NCCL_VERSION='2.7.8-1'
99
ML_REPO_PKG='nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb'
10-
CUDA_VERSION='11.4'
1110
OFI_NCCL_VERSION='1.1.1'
1211
MPI_HOME=$(which mpirun | awk -F '/bin' '{print $1}')
1312
NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80" # Arch for NVIDIA A100
@@ -32,6 +31,6 @@ wget https://github.com/aws/aws-ofi-nccl/archive/v${OFI_NCCL_VERSION}.tar.gz
3231
tar xvfz v${OFI_NCCL_VERSION}.tar.gz
3332
cd aws-ofi-nccl-${OFI_NCCL_VERSION}
3433
./autogen.sh
35-
./configure --with-libfabric=/opt/amazon/efa --with-cuda=/usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/ --with-nccl=/shared/openmpi/nccl-${NCCL_VERSION}/build/ --with-mpi=${MPI_HOME} --prefix /shared/openmpi/ofi-plugin
34+
./configure --with-libfabric=/opt/amazon/efa --with-cuda=/usr/local/cuda/targets/x86_64-linux/ --with-nccl=/shared/openmpi/nccl-${NCCL_VERSION}/build/ --with-mpi=${MPI_HOME} --prefix /shared/openmpi/ofi-plugin
3635
make
37-
make install
36+
make install

0 commit comments

Comments
 (0)