Skip to content

Test RAPIDS build times #27

Test RAPIDS build times

Test RAPIDS build times #27

name: Test RAPIDS build times
on:
workflow_dispatch:
inputs:
branch:
type: string
required: false
default: main
node_type:
type: string
required: false
default: cpu32
jobs:
check-event:
name: Check GH Event
runs-on: ubuntu-latest
outputs:
ok: ${{ steps.check_gh_event.outputs.ok }}
steps:
- id: check_gh_event
name: Check GH Event
shell: bash
run: |
[[ '${{ github.event_name }}' == 'workflow_dispatch' && '${{ github.repository }}' == 'rapidsai/devcontainers' ]] \
&& echo "ok=true" | tee -a "$GITHUB_OUTPUT" \
|| echo "ok=false" | tee -a "$GITHUB_OUTPUT";
test-rapids-build-times:
name: ${{ matrix.name }}
if: needs.check-event.outputs.ok == 'true'
needs: check-event
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@main
permissions:
actions: read
packages: read
id-token: write
contents: read
pull-requests: read
strategy:
fail-fast: false
matrix:
include:
- name: 'build cluster: no'
env: |
SCCACHE_NO_DIST_COMPILE=1
MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL=2
- name: 'build cluster: yes'
env: ""
with:
arch: '["amd64", "arm64"]'
cuda: '["12.9", "13.1"]'
node_type: ${{ inputs.node_type }}
timeout-minutes: 720
# 1. Prohibit sccache from shutting down automatically
# 2. Infinitely retry transient errors
# 3. Enable debug logging to track cache misses
# 4. Never fallback to locally compiling
env: |
PYTHON_VERSION=3.13
CONDA_ENV_CREATE_QUIET=1
PARALLEL_LEVEL=0
SCCACHE_IDLE_TIMEOUT=0
SCCACHE_SERVER_LOG=sccache=debug
SCCACHE_DIST_MAX_RETRIES=inf
SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=false
${{ matrix.env }}
build_command: |
function begin_group() {
local blue="34"
echo -e "::group::\e[${blue}m${1:-}\e[0m"
}
function end_group() {
local name="${1:-}"
local build_status="${2:-0}"
local red="31"
echo "::endgroup::"
if [ "$build_status" -ne 0 ]; then
echo -e "::error::\e[${red}m ${name} - Failed (⬆️ click above for full log ⬆️)\e[0m"
fi
}
function run_command() {
local -;
set -euo pipefail;
local group="${1:-}";
shift;
local command=("$@");
local exit_code="0";
begin_group "$group";
echo "Working directory: $(pwd)";
echo "Running command: ${command[*]@Q}";
"${command[@]}" || exit_code=$?;
end_group "$group" "$exit_code"
return "$exit_code"
}
# convert ucx branch names
convert_ucx_branch() {
local -;
set -euo pipefail;
local repo="$1"
local custom_branch="$2"
local normalized_branch="${custom_branch}"
if [[ "${repo}" == "ucx"* ]]; then
# Only convert branches that match the pattern release/YY.MM
if [[ "${custom_branch}" =~ ^release/[0-9]{2}\.[0-9]{2}$ ]]; then
RAPIDS_VERSION=$(echo "${custom_branch}" | awk '{split($0, a, "/"); print a[2]}')
# Get UCX version associated w/ RAPIDS version
UCX_VERSION="$(curl -sL https://version.gpuci.io/rapids/${RAPIDS_VERSION})"
normalized_branch="release/${UCX_VERSION}"
fi
fi
echo "${normalized_branch}"
}
# Clone all the repos
time run_command "Clone RAPIDS repositories" bash -ceuo pipefail "\
RAPIDS_TO_UCXX_BRANCH=\"$(convert_ucx_branch ucxx '${{ inputs.branch }}')\";
CLONE_ARGS=(-j$(nproc) -q -v --clone-upstream --depth 1 --single-branch --shallow-submodules --no-update-env);
clone-all -b \"${{ inputs.branch }}\" \${CLONE_ARGS[*]} >/dev/null 2>&1;
clone-ucxx -b \"\$RAPIDS_TO_UCXX_BRANCH\" \${CLONE_ARGS[*]} >/dev/null 2>&1;
" 2>&1
sleep 1
time run_command "Create RAPIDS python environment" bash -ceuo pipefail "\
rapids-post-start-command >/dev/null" 2>&1
sleep 1
# Configure all the C++ libs
time run_command "Configure C++ libraries" bash -ceuo pipefail "\
configure-all \
-j${PARALLEL_LEVEL} \
-GNinja \
-Wno-dev \
-DBUILD_TESTS=ON \
-DBUILD_BENCHMARKS=ON \
-DBUILD_PRIMS_BENCH=ON \
-DBUILD_SHARED_LIBS=ON \
-DRAFT_COMPILE_LIBRARY=ON \
-DBUILD_CUGRAPH_MG_TESTS=ON \
>/dev/null" 2>&1
sleep 1
for ENVVAR in "SCCACHE_RECACHE=1" \
"SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=0" \
"SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=1" ; do
# Restart sccache
run_command "Start sccache (${ENVVAR})" bash -ceuo pipefail "\
${ENVVAR} devcontainer-utils-start-sccache --kill-all >/dev/null 2>&1"
# Build all the C++ libs
time run_command "Build C++ libraries (${ENVVAR})" bash -ceuo pipefail "\
${ENVVAR} build-all-cpp -j${PARALLEL_LEVEL} >/dev/null" 2>&1
sleep 1
# Print cache and dist stats
run_command "sccache stats (${ENVVAR})" \
sccache --show-adv-stats
# Print build times
run_command "Build times (${ENVVAR})" bash -ceuo pipefail "\
find /var/log/devcontainer-utils/ -type f -name 'build-*-time.log' -print0 \
| xargs -0 -n1 grep -H real | sed 's/real\t/ /g' || : # Nonfatal if not found"
# Clean
run_command "Clean (${ENVVAR})" bash -ceuo pipefail "\
find /var/log/devcontainer-utils/ -type f -name 'build-*-time.log' -delete >/dev/null 2>&1 || : # Nonfatal if not found
find ~/ -maxdepth 4 -type l -path '*/cpp/build/latest' -print0 | xargs -P$(nproc) -0 -n1 ninja clean -C >/dev/null 2>&1 || : # Nonfatal if not found"
done