diff --git a/orttraining/tools/ci_test/results/ci-mi200.huggingface.bert-large-rocm5.6.json b/orttraining/tools/ci_test/results/ci-mi200.huggingface.bert-large-rocm5.7.json similarity index 100% rename from orttraining/tools/ci_test/results/ci-mi200.huggingface.bert-large-rocm5.6.json rename to orttraining/tools/ci_test/results/ci-mi200.huggingface.bert-large-rocm5.7.json diff --git a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml index 352ee19a49108..5dac8fc9cda63 100644 --- a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml @@ -36,7 +36,7 @@ variables: - name: render value: 109 - name: RocmVersion - value: 5.6 + value: 5.7 jobs: - job: Linux_Build @@ -99,6 +99,7 @@ jobs: ccache -s; \ python tools/ci_build/build.py \ --config Release \ + --enable_training \ --cmake_extra_defines \ CMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ \ onnxruntime_BUILD_KERNEL_EXPLORER=OFF \ @@ -181,7 +182,7 @@ jobs: /bin/bash -c " set -ex; \ cd /build/Release && xargs -a /build/Release/perms.txt chmod a+x; \ - bash /onnxruntime_src/tools/ci_build/github/pai/migraphx_test_launcher.sh" + bash /onnxruntime_src/tools/ci_build/github/pai/pai_test_launcher.sh" workingDirectory: $(Build.SourcesDirectory) displayName: 'Run onnxruntime unit tests' diff --git a/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml index 8dd1f0c5c6461..ed84b514fbbcf 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml @@ -25,7 +25,7 @@ variables: - name: render value: 109 - name: RocmVersion - value: 5.6 + value: 5.7 - name: BuildConfig value: Release @@ -98,7 +98,7 @@ jobs: /bin/bash -c " set -ex; \ ccache -s; \ - /opt/python/cp38-cp38/bin/python3 tools/ci_build/build.py \ + /opt/python/cp39-cp39/bin/python3 tools/ci_build/build.py \ --config $(BuildConfig) \ --enable_training \ --mpi_home /opt/ompi \ diff --git a/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile b/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile index 7d2c818d08920..8a67692ae598b 100644 --- a/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile +++ b/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile @@ -1,7 +1,7 @@ # Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete FROM ubuntu:22.04 -ARG ROCM_VERSION=5.6 +ARG ROCM_VERSION=5.7 ARG AMDGPU_VERSION=${ROCM_VERSION} ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' @@ -71,12 +71,15 @@ RUN pip install cryptography==41.0.0 # Create migraphx-ci environment ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/migraphx-ci ENV CONDA_DEFAULT_ENV migraphx-ci -RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.8 +RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.9 ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH} # Enable migraphx-ci environment SHELL ["conda", "run", "-n", "migraphx-ci", "/bin/bash", "-c"] +# ln -sf is needed to make sure that version `GLIBCXX_3.4.30' is found +RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libstdc++.so.6 + # Install migraphx RUN apt update && apt install -y migraphx diff --git a/tools/ci_build/github/pai/migraphx-excluded-tests.txt b/tools/ci_build/github/pai/migraphx-excluded-tests.txt deleted file mode 100644 index 5e38f982d7de3..0000000000000 --- a/tools/ci_build/github/pai/migraphx-excluded-tests.txt +++ /dev/null @@ -1,2 +0,0 @@ -GatherOpTest.Gather_invalid_index_cpu -Scatter.InvalidIndex diff --git a/tools/ci_build/github/pai/migraphx_test_launcher.sh b/tools/ci_build/github/pai/migraphx_test_launcher.sh deleted file mode 100755 index c2d593b6417c2..0000000000000 --- a/tools/ci_build/github/pai/migraphx_test_launcher.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -build_dir=${1:-"."} -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -echo "Warning: The following tests are EXCLUDED on MIGraphX agent:" -gtest_filter="-" -while read line; do - gtest_filter="$gtest_filter:$line" - echo "$line" -done <$script_dir/migraphx-excluded-tests.txt -echo "" - -echo "Running ./onnxruntime_test_all .." -$build_dir/onnxruntime_test_all --gtest_filter=$gtest_filter diff --git a/tools/ci_build/github/pai/pai-excluded-tests.txt b/tools/ci_build/github/pai/pai-excluded-tests.txt index b446dac2034de..845d36b71d215 100644 --- a/tools/ci_build/github/pai/pai-excluded-tests.txt +++ b/tools/ci_build/github/pai/pai-excluded-tests.txt @@ -1,6 +1,3 @@ -CudaKernelTest.NegativeLogLikelihoodLoss_TinySizeTensor -CudaKernelTest.NegativeLogLikelihoodLoss_SmallSizeTensor -CudaKernelTest.NegativeLogLikelihoodLoss_MediumSizeTensor CudaKernelTest.SoftmaxGrad_LargeTensor_LastAxis_Float16 CudaKernelTest.SoftmaxGrad_LargeTensor_LastAxis_Float16_NoPowerOfTwo CudaKernelTest.SoftmaxGrad_LargeTensor_AllAxis_Float16 @@ -10,26 +7,6 @@ CudaKernelTest.LogSoftmaxGrad_LargeTensor_LastAxis_Float16_NoPowerOfTwo CudaKernelTest.LogSoftmaxGrad_LargeTensor_AllAxis_Float16 CudaKernelTest.LogSoftmaxGrad_LargeTensor_AllAxis_Float16_NoPowerOfTwo ReductionOpTest.ReductionVariationTest -ReductionOpTest.ReduceLogSumExp_default_axes_keepdims_double -ReductionOpTest.ReduceLogSumExp_default_axes_do_not_keep_dims_double -ReductionOpTest.ReduceLogSumExp_do_not_keepdims_double -ReductionOpTest.ReduceLogSumExp_do_not_keepdims_2_double -ReductionOpTest.ReduceLogSumExp_keepdims_double -ReductionOpTest.ReduceLogSumExp_double -ReductionOpTest.ReduceMax_double -ReductionOpTest.ReduceMean_default_axes_keepdims_double -ReductionOpTest.ReduceMean_default_axes_do_not_keep_dims_double -ReductionOpTest.ReduceMean_do_not_keepdims_double -ReductionOpTest.ReduceMean_do_not_keepdims_2_double -ReductionOpTest.ReduceMean_keepdims_double -ReductionOpTest.ReduceMean_double -ReductionOpTest.ReduceMean0DTensor_double -ReductionOpTest.ReduceMin_double -ReductionOpTest.ReduceSum_double -ReductionOpTest.ReduceSumSquare_double -ReductionOpTest.ReduceInfMax_double -ReductionOpTest.ReduceInfMin_double -ReductionOpTest.ReduceInfLogSumExp_double GatherOpTest.Gather_invalid_index_cpu Scatter.InvalidIndex GradientCheckerTest.AddGrad diff --git a/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile b/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile index 89a7fe09c527f..32bb99f08812e 100644 --- a/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile +++ b/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile @@ -1,7 +1,7 @@ # Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete FROM ubuntu:22.04 -ARG ROCM_VERSION=5.6 +ARG ROCM_VERSION=5.7 ARG AMDGPU_VERSION=${ROCM_VERSION} ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' @@ -64,7 +64,7 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86 # Create rocm-ci environment ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/rocm-ci ENV CONDA_DEFAULT_ENV rocm-ci -RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.8 +RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.9 ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH} # Conda base patch @@ -77,7 +77,7 @@ SHELL ["conda", "run", "-n", "rocm-ci", "/bin/bash", "-c"] RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libstdc++.so.6 # Install Pytorch -RUN pip install install torch==2.0.1 torchvision==0.15.2 -f https://repo.radeon.com/rocm/manylinux/rocm-rel-${ROCM_VERSION}/ && \ +RUN pip install torch==2.0.1 torchvision==0.15.2 -f https://repo.radeon.com/rocm/manylinux/rocm-rel-${ROCM_VERSION}/ && \ pip install torch-ort --no-dependencies