From 5891465f3908c0e938e5501586a51dbdaecbfa6a Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Wed, 19 Jun 2024 22:22:50 +0000 Subject: [PATCH 01/47] Add build job for rocm --- .github/workflows/python-package.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 72e1b099a..78bc747c3 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -103,6 +103,28 @@ jobs: name: shared_library_cuda_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.cuda_version }} path: output/* retention-days: 7 + build-shared-libs-rocm: + strategy: + matrix: + os: [ubuntu-latest] + arch: [x86_64] + runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents + steps: + - uses: actions/checkout@v4 + - name: Set up Docker multiarch + if: startsWith(matrix.os, 'ubuntu') + uses: docker/setup-qemu-action@v2 + - name: Clean up disk space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - name: Build C++ + run: bash .github/scripts/build-rocm.sh + env: + build_os: ${{ matrix.os }} + build_arch: ${{ matrix.arch }} build-wheels: needs: - build-shared-libs From d03a680871b2d665ba9f420b7513cab35b0b6960 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Wed, 19 Jun 2024 22:23:08 +0000 Subject: [PATCH 02/47] Add rocm build script --- .github/scripts/build-rocm.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/scripts/build-rocm.sh diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh new file mode 100644 index 000000000..fc7515aa7 --- /dev/null +++ b/.github/scripts/build-rocm.sh @@ -0,0 +1,19 @@ +#!/bin/bash +declare build_arch +declare build_os + +set -xeuo pipefail +if [ "${build_os:0:6}" == ubuntu ]; then + image=rocm/dev-ubuntu-22.04:6.1-complete + echo "Using image $image" + docker run --rm --platform "linux/$build_arch" -i \ + -w /src -v "$PWD:/src" "$image" sh -c \ + "apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ + && cmake -DCOMPUTE_BACKEND=hip . \ + && cmake --build ." +fi + +#output_dir="output/${build_os}/${build_arch}" +#mkdir -p "${output_dir}" +#(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}") From ec9000f5444726589935ba8107249eddade9689d Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 20 Jun 2024 21:02:16 +0000 Subject: [PATCH 03/47] Copy shared obj file into output_dir --- .github/scripts/build-rocm.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index fc7515aa7..616e8c250 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -14,6 +14,6 @@ if [ "${build_os:0:6}" == ubuntu ]; then && cmake --build ." fi -#output_dir="output/${build_os}/${build_arch}" -#mkdir -p "${output_dir}" -#(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}") +output_dir="output/${build_os}/${build_arch}" +mkdir -p "${output_dir}" +(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}") From 9b8c1da639c76c0fd41df00b835ab02b3508a64b Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 20 Jun 2024 21:02:50 +0000 Subject: [PATCH 04/47] upload build artifacts and enable wheels build --- .github/workflows/python-package.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 78bc747c3..3b243993b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -125,10 +125,17 @@ jobs: env: build_os: ${{ matrix.os }} build_arch: ${{ matrix.arch }} + - name: Upload build artifact + uses: actions/upload-artifact@v4 + with: + name: shared_library_rocm_${{ matrix.os }}_${{ matrix.arch }} + path: output/* + retention-days: 7 build-wheels: needs: - build-shared-libs - build-shared-libs-cuda + - build-shared-libs-rocm strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] From 1413c5f3a2aed51140b86daa8ee9283c67cce738 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 20 Jun 2024 16:10:54 -0500 Subject: [PATCH 05/47] Remove cuda build temporarily --- .github/workflows/python-package.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 3b243993b..0b0b35416 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -134,7 +134,6 @@ jobs: build-wheels: needs: - build-shared-libs - - build-shared-libs-cuda - build-shared-libs-rocm strategy: matrix: From fd655b02663d1f692734b1a6376421dfbe1064b9 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Mon, 29 Jul 2024 21:00:16 +0000 Subject: [PATCH 06/47] Add ROCm version to .so filename --- CMakeLists.txt | 2 ++ bitsandbytes/cextension.py | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3bedefd51..c526678c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -192,7 +192,9 @@ elseif(BUILD_HIP) # get hip version execute_process(COMMAND hipconfig --version OUTPUT_VARIABLE HIP_CONFIG_VERSION) string(REGEX MATCH "[0-9]+\\.[0-9]+" HIP_VERSION "${HIP_CONFIG_VERSION}") + string(REPLACE "." "" HIP_VERSION_SHORT "${HIP_VERSION}") + string(APPEND BNB_OUTPUT_NAME "${HIP_VERSION_SHORT}") if(NO_CUBLASLT OR HIP_VERSION VERSION_LESS "6.1") string(APPEND BNB_OUTPUT_NAME "_nohipblaslt") endif() diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index 03d2cbd61..6e391a752 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -38,9 +38,9 @@ def get_cuda_bnb_library_path(cuda_specs: CUDASpecs) -> Path: """ if torch.version.hip: if BNB_HIP_VERSION < 601: - return PACKAGE_DIR / f"libbitsandbytes_hip_nohipblaslt{DYNAMIC_LIBRARY_SUFFIX}" + return PACKAGE_DIR / f"libbitsandbytes_hip{BNB_HIP_VERSION_SHORT}_nohipblaslt{DYNAMIC_LIBRARY_SUFFIX}" else: - return PACKAGE_DIR / f"libbitsandbytes_hip{DYNAMIC_LIBRARY_SUFFIX}" + return PACKAGE_DIR / f"libbitsandbytes_hip{BNB_HIP_VERSION_SHORT}{DYNAMIC_LIBRARY_SUFFIX}" library_name = f"libbitsandbytes_cuda{cuda_specs.cuda_version_string}" if not cuda_specs.has_cublaslt: # if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt @@ -119,8 +119,10 @@ def get_native_library() -> BNBNativeLibrary: if torch.version.hip: hip_major, hip_minor = map(int, torch.version.hip.split(".")[0:2]) HIP_ENVIRONMENT, BNB_HIP_VERSION = True, hip_major * 100 + hip_minor + BNB_HIP_VERSION_SHORT = str(hip_major) + str(hip_minor) else: HIP_ENVIRONMENT, BNB_HIP_VERSION = False, 0 + BNB_HIP_VERSION_SHORT = "" lib = get_native_library() except Exception as e: lib = None From 6b77f4c3f7aa042518d566489e13b774c96f68e3 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Mon, 29 Jul 2024 21:16:57 +0000 Subject: [PATCH 07/47] Add rocm_version to whls build --- .github/scripts/build-rocm.sh | 3 ++- .github/workflows/python-package.yml | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index 616e8c250..cc15210fd 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -1,10 +1,11 @@ #!/bin/bash declare build_arch declare build_os +declare rocm_version set -xeuo pipefail if [ "${build_os:0:6}" == ubuntu ]; then - image=rocm/dev-ubuntu-22.04:6.1-complete + image=rocm/dev-ubuntu-22.04:${rocm_version}-complete echo "Using image $image" docker run --rm --platform "linux/$build_arch" -i \ -w /src -v "$PWD:/src" "$image" sh -c \ diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 0b0b35416..cab735562 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -108,6 +108,8 @@ jobs: matrix: os: [ubuntu-latest] arch: [x86_64] + rocm_version: + ["6.1.2"] runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents steps: - uses: actions/checkout@v4 @@ -128,7 +130,7 @@ jobs: - name: Upload build artifact uses: actions/upload-artifact@v4 with: - name: shared_library_rocm_${{ matrix.os }}_${{ matrix.arch }} + name: shared_library_rocm_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.rocm_version }} path: output/* retention-days: 7 build-wheels: From 78324b32075b7ae6076c304e1dfd5f71db01704b Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Mon, 29 Jul 2024 21:23:24 +0000 Subject: [PATCH 08/47] Revert "Remove cuda build temporarily" This reverts commit 1413c5f3a2aed51140b86daa8ee9283c67cce738. --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index cab735562..6a4a6205b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -136,6 +136,7 @@ jobs: build-wheels: needs: - build-shared-libs + - build-shared-libs-cuda - build-shared-libs-rocm strategy: matrix: From c146b8b8f2fe9d6fec5f1f1b8da25b1ec60d6ac6 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Mon, 29 Jul 2024 21:36:41 +0000 Subject: [PATCH 09/47] Add rocm_version env var --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 6a4a6205b..f4cc5486b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -127,6 +127,7 @@ jobs: env: build_os: ${{ matrix.os }} build_arch: ${{ matrix.arch }} + rocm_version: ${{ matrix.rocm_version }} - name: Upload build artifact uses: actions/upload-artifact@v4 with: From d6c3df47a46d55c093e9bd8bf61ee3489bac605e Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 05:01:18 +0000 Subject: [PATCH 10/47] Remove thrush header files --- csrc/kernels.hip | 2 -- csrc/ops_hip.cuh | 6 ------ 2 files changed, 8 deletions(-) diff --git a/csrc/kernels.hip b/csrc/kernels.hip index ca77dceda..d8d7cdba5 100644 --- a/csrc/kernels.hip +++ b/csrc/kernels.hip @@ -10,8 +10,6 @@ #include #include -#include -#include //#include diff --git a/csrc/ops_hip.cuh b/csrc/ops_hip.cuh index 1b9c13063..e57cbb3b5 100644 --- a/csrc/ops_hip.cuh +++ b/csrc/ops_hip.cuh @@ -21,12 +21,6 @@ #include #include -/* -#include -#include -*/ - - #define CUDA_CHECK_RETURN(value) { \ hipError_t _m_cudaStat = value; \ if (_m_cudaStat != hipSuccess) { \ From 7e9a65c33f66fffcb14ee2438170718777c06022 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 06:44:49 +0000 Subject: [PATCH 11/47] Print node info --- .github/scripts/build-rocm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index cc15210fd..8aac2c04b 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -11,7 +11,7 @@ if [ "${build_os:0:6}" == ubuntu ]; then -w /src -v "$PWD:/src" "$image" sh -c \ "apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ - && cmake -DCOMPUTE_BACKEND=hip . \ + && rocminfo && cmake -DCOMPUTE_BACKEND=hip . \ && cmake --build ." fi From cdb209a2eb896d9c4166f53e9b2aa580c10e42c0 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 06:51:34 +0000 Subject: [PATCH 12/47] print cuda node info --- .github/scripts/build-cuda.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/build-cuda.sh b/.github/scripts/build-cuda.sh index 0f9b8d726..fc79a92f6 100644 --- a/.github/scripts/build-cuda.sh +++ b/.github/scripts/build-cuda.sh @@ -15,7 +15,7 @@ for NO_CUBLASLT in ON OFF; do docker run --platform "linux/$build_arch" -i -w /src -v "$PWD:/src" "$image" sh -c \ "apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ - && cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" -DNO_CUBLASLT=${NO_CUBLASLT} . \ + && nvidia-smi && cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" -DNO_CUBLASLT=${NO_CUBLASLT} . \ && cmake --build ." else pip install cmake==3.28.3 From 77e149917dd4bb5be87099289edf53421fef6fe8 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 06:56:11 +0000 Subject: [PATCH 13/47] Revert "print cuda node info" This reverts commit cdb209a2eb896d9c4166f53e9b2aa580c10e42c0. --- .github/scripts/build-cuda.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/build-cuda.sh b/.github/scripts/build-cuda.sh index fc79a92f6..0f9b8d726 100644 --- a/.github/scripts/build-cuda.sh +++ b/.github/scripts/build-cuda.sh @@ -15,7 +15,7 @@ for NO_CUBLASLT in ON OFF; do docker run --platform "linux/$build_arch" -i -w /src -v "$PWD:/src" "$image" sh -c \ "apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ - && nvidia-smi && cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" -DNO_CUBLASLT=${NO_CUBLASLT} . \ + && cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" -DNO_CUBLASLT=${NO_CUBLASLT} . \ && cmake --build ." else pip install cmake==3.28.3 From 7c9190990478d3980eece86909b5faf4d36b3e16 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 06:56:54 +0000 Subject: [PATCH 14/47] Revert "Print node info" This reverts commit 7e9a65c33f66fffcb14ee2438170718777c06022. --- .github/scripts/build-rocm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index 8aac2c04b..cc15210fd 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -11,7 +11,7 @@ if [ "${build_os:0:6}" == ubuntu ]; then -w /src -v "$PWD:/src" "$image" sh -c \ "apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ - && rocminfo && cmake -DCOMPUTE_BACKEND=hip . \ + && cmake -DCOMPUTE_BACKEND=hip . \ && cmake --build ." fi From b78b3400b59e2f40dad5ba4f676e7ffd46dff978 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 07:14:18 +0000 Subject: [PATCH 15/47] Add rocm arch to compile command --- .github/scripts/build-rocm.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index cc15210fd..a5933da3f 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -4,6 +4,7 @@ declare build_os declare rocm_version set -xeuo pipefail +bnb_rocm_arch="gfx906;gfx908;gfx90a;gfx942;gfx1100;gfx1030" if [ "${build_os:0:6}" == ubuntu ]; then image=rocm/dev-ubuntu-22.04:${rocm_version}-complete echo "Using image $image" @@ -11,7 +12,7 @@ if [ "${build_os:0:6}" == ubuntu ]; then -w /src -v "$PWD:/src" "$image" sh -c \ "apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ - && cmake -DCOMPUTE_BACKEND=hip . \ + && cmake -DCOMPUTE_BACKEND=hip -DBNB_ROCM_ARCH=\"${bnb_rocm_arch}\" . \ && cmake --build ." fi From a62b9d454b25f5f7933e7793fd553cc81699a925 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 19:21:47 +0000 Subject: [PATCH 16/47] Rename .so files to rocm --- CMakeLists.txt | 2 +- bitsandbytes/cextension.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a7671239..0891f75b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,7 +194,7 @@ elseif(BUILD_HIP) list(APPEND SRC_FILES ${HIP_FILES}) - string(APPEND BNB_OUTPUT_NAME "_hip") + string(APPEND BNB_OUTPUT_NAME "_rocm") # get hip version execute_process(COMMAND hipconfig --version OUTPUT_VARIABLE HIP_CONFIG_VERSION) diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index 6e391a752..a096d0d51 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -38,9 +38,9 @@ def get_cuda_bnb_library_path(cuda_specs: CUDASpecs) -> Path: """ if torch.version.hip: if BNB_HIP_VERSION < 601: - return PACKAGE_DIR / f"libbitsandbytes_hip{BNB_HIP_VERSION_SHORT}_nohipblaslt{DYNAMIC_LIBRARY_SUFFIX}" + return PACKAGE_DIR / f"libbitsandbytes_rocm{BNB_HIP_VERSION_SHORT}_nohipblaslt{DYNAMIC_LIBRARY_SUFFIX}" else: - return PACKAGE_DIR / f"libbitsandbytes_hip{BNB_HIP_VERSION_SHORT}{DYNAMIC_LIBRARY_SUFFIX}" + return PACKAGE_DIR / f"libbitsandbytes_rocm{BNB_HIP_VERSION_SHORT}{DYNAMIC_LIBRARY_SUFFIX}" library_name = f"libbitsandbytes_cuda{cuda_specs.cuda_version_string}" if not cuda_specs.has_cublaslt: # if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt From 9059bff540eac1b871eca220120c37a32186d481 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 19:27:12 +0000 Subject: [PATCH 17/47] Update default gpu arch --- .github/scripts/build-rocm.sh | 2 +- CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index a5933da3f..b508fac69 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -4,7 +4,7 @@ declare build_os declare rocm_version set -xeuo pipefail -bnb_rocm_arch="gfx906;gfx908;gfx90a;gfx942;gfx1100;gfx1030" +bnb_rocm_arch="gfx90a;gfx942;gfx1100" if [ "${build_os:0:6}" == ubuntu ]; then image=rocm/dev-ubuntu-22.04:${rocm_version}-complete echo "Using image $image" diff --git a/CMakeLists.txt b/CMakeLists.txt index 0891f75b0..eac72fe52 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -185,7 +185,7 @@ elseif(BUILD_HIP) set(CMAKE_HIP_ARCHITECTURES ${BNB_ROCM_ARCH}) else() if (NOT AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES) - set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx940;gfx941;gfx942") + set(CMAKE_HIP_ARCHITECTURES "gfx90a;gfx942;gfx1100") elseif (AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES) set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS}) endif() From c5a406ad9aed7738a236c9c184a4b1bf2ccd422c Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 19:48:32 +0000 Subject: [PATCH 18/47] Skip cpu based igemmlt int tests on ROCm --- tests/test_functional.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_functional.py b/tests/test_functional.py index 4e82c530a..a9d926b89 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -584,6 +584,9 @@ def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, trans @pytest.mark.parametrize("ldb", (0,), ids=id_formatter("ldb")) @pytest.mark.parametrize("device", ("cuda", "cpu"), ids=id_formatter("device")) def test_igemmlt_int(dim1, dim2, dim3, dim4, dims, ldb, device): + if HIP_ENVIRONMENT and device == "cpu": + pytest.skip("this test is not supported on ROCm yet") + for i in range(k): if dims == 2: A = torch.randint(-128, 127, size=(dim1, dim3), device=device).to(torch.int8) From 9cbb5e12a8987c57188917e5353e46492ef8d1eb Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 21:19:53 +0000 Subject: [PATCH 19/47] Update Documentation --- docs/source/installation.mdx | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index f917f2623..0daa5e279 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -146,13 +146,23 @@ Please follow these steps to install bitsandbytes with device-specific backend s bitsandbytes is fully supported from ROCm 6.1 onwards (currently in alpha release). > [!TIP] -> If you already installed ROCm and PyTorch, skip Docker steps below and please check that the torch version matches your ROCm install. To install torch for a specific ROCm version, please refer to step 3 of wheels install in [Installing PyTorch for ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/pytorch-install.html#using-wheels-package) guide. +> If you would like to install ROCm and PyTorch on bare metal, skip Docker steps and refer to our official guides at [ROCm installation overview](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/install-overview.html#rocm-install-overview) and [Installing PyTorch for ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/pytorch-install.html#using-wheels-package) (Step 3 of wheels build for quick installation). Please make sure to get PyTorch wheel for the installed ROCm version. ```bash -# Create a docker container with latest pytorch. It comes with ROCm and pytorch preinstalled -docker pull rocm/pytorch:latest -docker run -it --device=/dev/kfd --device=/dev/dri --group-add video rocm/pytorch:latest +# Create a docker container with latest ROCm image, which includes ROCm libraries +docker pull rocm/dev-ubuntu-22.04:6.1.2-complete +docker run -it --device=/dev/kfd --device=/dev/dri --group-add video rocm/dev-ubuntu-22.04:6.1.2-complete +apt-get update && apt-get install -y git && cd home +# Install pytorch compatible with above ROCm version +pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/rocm6.1/ + +# Install bitsandbytes from PyPI +# (This is supported on Ubuntu 22.04, Python 3.10, ROCm 6.1.0/6.1.1/6.1.2 and gpu arch - gfx90a, gfx942, gfx1100 +# Please install from source as given below if your configuration doesn't match with these) +pip install bitsandbytes + +# Install bitsandbytes from source # Clone bitsandbytes repo, ROCm backend is currently enabled on multi-backend-refactor branch git clone --depth 1 -b multi-backend-refactor https://github.com/TimDettmers/bitsandbytes.git && cd bitsandbytes/ From 358062473b3ec475ecf14dd7effc2b02754ff947 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 21:23:07 +0000 Subject: [PATCH 20/47] Update upstream repo name --- docs/source/installation.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 0daa5e279..a71fe3261 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -164,7 +164,7 @@ pip install bitsandbytes # Install bitsandbytes from source # Clone bitsandbytes repo, ROCm backend is currently enabled on multi-backend-refactor branch -git clone --depth 1 -b multi-backend-refactor https://github.com/TimDettmers/bitsandbytes.git && cd bitsandbytes/ +git clone --depth 1 -b multi-backend-refactor https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/ # Install dependencies pip install -r requirements-dev.txt From 3bde1b7bc3b52163d8f35bf654e933879213992e Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 21:28:39 +0000 Subject: [PATCH 21/47] Update docs --- docs/source/installation.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index a71fe3261..3ed694ac1 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -159,7 +159,7 @@ pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/roc # Install bitsandbytes from PyPI # (This is supported on Ubuntu 22.04, Python 3.10, ROCm 6.1.0/6.1.1/6.1.2 and gpu arch - gfx90a, gfx942, gfx1100 -# Please install from source as given below if your configuration doesn't match with these) +# Please install from source if your configuration doesn't match with these) pip install bitsandbytes # Install bitsandbytes from source From db1df723a9782f2a8b6080ebffdb806fa2c44365 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:11:53 -0500 Subject: [PATCH 22/47] Update string format Co-authored-by: Aarni Koskela --- bitsandbytes/cextension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index a096d0d51..cfeaf4f44 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -119,7 +119,7 @@ def get_native_library() -> BNBNativeLibrary: if torch.version.hip: hip_major, hip_minor = map(int, torch.version.hip.split(".")[0:2]) HIP_ENVIRONMENT, BNB_HIP_VERSION = True, hip_major * 100 + hip_minor - BNB_HIP_VERSION_SHORT = str(hip_major) + str(hip_minor) + BNB_HIP_VERSION_SHORT = f"{hip_major}{hip_minor}" else: HIP_ENVIRONMENT, BNB_HIP_VERSION = False, 0 BNB_HIP_VERSION_SHORT = "" From e498b4d09ba2f4328572ee22a27f94377359a4b2 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Wed, 31 Jul 2024 16:25:40 -0500 Subject: [PATCH 23/47] Remove pre-release option for torch install --- docs/source/installation.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 3ed694ac1..e92981a3b 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -155,7 +155,7 @@ docker run -it --device=/dev/kfd --device=/dev/dri --group-add video rocm/dev-ub apt-get update && apt-get install -y git && cd home # Install pytorch compatible with above ROCm version -pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/rocm6.1/ +pip install torch --index-url https://download.pytorch.org/whl/nightly/rocm6.1/ # Install bitsandbytes from PyPI # (This is supported on Ubuntu 22.04, Python 3.10, ROCm 6.1.0/6.1.1/6.1.2 and gpu arch - gfx90a, gfx942, gfx1100 From 7d2e02705c8489795a3c115fda42fcb6de9200b0 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 1 Aug 2024 15:43:22 -0500 Subject: [PATCH 24/47] Update pytorch install path Co-authored-by: Titus <9048635+Titus-von-Koeller@users.noreply.github.com> --- docs/source/installation.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index e92981a3b..0e8da0cda 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -155,7 +155,7 @@ docker run -it --device=/dev/kfd --device=/dev/dri --group-add video rocm/dev-ub apt-get update && apt-get install -y git && cd home # Install pytorch compatible with above ROCm version -pip install torch --index-url https://download.pytorch.org/whl/nightly/rocm6.1/ +pip install torch --index-url https://download.pytorch.org/whl/rocm6.1/ # Install bitsandbytes from PyPI # (This is supported on Ubuntu 22.04, Python 3.10, ROCm 6.1.0/6.1.1/6.1.2 and gpu arch - gfx90a, gfx942, gfx1100 From 0c76b1cdadf4d393e2f71b5dc0554b389f5fa803 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 18:59:19 +0000 Subject: [PATCH 25/47] Add messages for Heuristics error --- csrc/ops.hip | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index 157e84629..5c0688b91 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -576,6 +576,7 @@ template int igemmlt(hipblasLtHandl if (returnedAlgoCount == 0) { has_error = 1; + printf("Error: Matmul Algo Heurisitic didn't return algorithms\n"); } else { @@ -614,18 +615,26 @@ template int igemmlt(hipblasLtHandl heuristicResult, &returnedAlgoCount)); - if(!SCALE_ROWS) + if (returnedAlgoCount == 0) { - float alpha = 1.0f, beta = 0.0f; - - has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); + has_error = 1; + printf("Error: Matmul Algo Heurisitic didn't return algorithms\n"); } else { - //has_error |= checkHipblasStatus(hipblasLtMatmulDescSetAttribute(matmulDesc, hipblasLt_MATMUL_DESC_POINTER_MODE, &alphaVec, sizeof(alphaVec))); - float beta = 0.0f; - - has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); + if(!SCALE_ROWS) + { + float alpha = 1.0f, beta = 0.0f; + + has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); + } + else + { + //has_error |= checkHipblasStatus(hipblasLtMatmulDescSetAttribute(matmulDesc, hipblasLt_MATMUL_DESC_POINTER_MODE, &alphaVec, sizeof(alphaVec))); + float beta = 0.0f; + + has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); + } } } @@ -635,7 +644,7 @@ template int igemmlt(hipblasLtHandl if (Adesc) has_error |= checkHipblasStatus(hipblasLtMatrixLayoutDestroy(Adesc)); if (matmulDesc) has_error |= checkHipblasStatus(hipblasLtMatmulDescDestroy(matmulDesc)); if(has_error == 1) - printf("error detected"); + printf("error detected\n"); return has_error; #endif // NO_HIPBLASLT From 714d9e9adb41c5983ae62105fb355186c54255d3 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 20:10:10 +0000 Subject: [PATCH 26/47] Remove toolcache for disk space --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 91e6d82a6..1c0488401 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -120,6 +120,7 @@ jobs: sudo rm -rf /opt/ghc sudo rm -rf "/usr/local/share/boost" sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /opt/hostedtoolcache - name: Build C++ run: bash .github/scripts/build-rocm.sh env: From ce77361934721642ab1dc2f6751074cac027762f Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 20:19:29 +0000 Subject: [PATCH 27/47] print disk usage --- .github/workflows/python-package.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 1c0488401..210f22a9f 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -116,11 +116,13 @@ jobs: uses: docker/setup-qemu-action@v2 - name: Clean up disk space run: | + sudo df -h sudo rm -rf /usr/share/dotnet sudo rm -rf /opt/ghc sudo rm -rf "/usr/local/share/boost" sudo rm -rf "$AGENT_TOOLSDIRECTORY" sudo rm -rf /opt/hostedtoolcache + sudo df -h - name: Build C++ run: bash .github/scripts/build-rocm.sh env: From b87c2b93a7065ae6d893d36695e5e798aee79663 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 20:33:55 +0000 Subject: [PATCH 28/47] Clean disk space for linux --- .github/workflows/python-package.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 210f22a9f..68b45affe 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -123,6 +123,11 @@ jobs: sudo rm -rf "$AGENT_TOOLSDIRECTORY" sudo rm -rf /opt/hostedtoolcache sudo df -h + if: startsWith(matrix.os, 'ubuntu') + run: | + sudo rm -rf /swapfile + apt-get clean + sudo df -h - name: Build C++ run: bash .github/scripts/build-rocm.sh env: From 828fdc6e34095da83f2a9174fd2a1ef6ed8386e9 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 20:38:29 +0000 Subject: [PATCH 29/47] Fix for ubuntu --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 68b45affe..5d3baa6ee 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -123,6 +123,7 @@ jobs: sudo rm -rf "$AGENT_TOOLSDIRECTORY" sudo rm -rf /opt/hostedtoolcache sudo df -h + - name: Clean up disk space Ubuntu if: startsWith(matrix.os, 'ubuntu') run: | sudo rm -rf /swapfile From 5721601db924739f456a6d4df0386bc9fc8e6eda Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 20:40:09 +0000 Subject: [PATCH 30/47] Add sudo for apt clean --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 5d3baa6ee..1aece2298 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -127,7 +127,7 @@ jobs: if: startsWith(matrix.os, 'ubuntu') run: | sudo rm -rf /swapfile - apt-get clean + sudo apt-get clean sudo df -h - name: Build C++ run: bash .github/scripts/build-rocm.sh From d58303f921a3555644ac2a4ce2e197d757123726 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 20:56:30 +0000 Subject: [PATCH 31/47] Update clean up disk list --- .github/workflows/python-package.yml | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 1aece2298..f784446e6 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -117,17 +117,23 @@ jobs: - name: Clean up disk space run: | sudo df -h - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf "/usr/local/share/boost" - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - sudo rm -rf /opt/hostedtoolcache - sudo df -h - - name: Clean up disk space Ubuntu - if: startsWith(matrix.os, 'ubuntu') - run: | - sudo rm -rf /swapfile - sudo apt-get clean + sudo rm -rf \ + /usr/share/dotnet \ + /opt/ghc \ + "/usr/local/share/boost" \ + "$AGENT_TOOLSDIRECTORY" \ + /opt/hostedtoolcache \ + /opt/google/chrome \ + /opt/microsoft/msedge \ + /opt/microsoft/powershell \ + /opt/pipx \ + /usr/lib/mono \ + /usr/local/julia* \ + /usr/local/lib/android \ + /usr/local/lib/node_modules \ + /usr/local/share/chromium \ + /usr/local/share/powershell \ + /usr/share/swift sudo df -h - name: Build C++ run: bash .github/scripts/build-rocm.sh From 483e8ca48da03bd9a619d2d7f54db200a4c08748 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 21:04:34 +0000 Subject: [PATCH 32/47] remove disk usage print --- .github/workflows/python-package.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index f784446e6..d2da82501 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -116,7 +116,6 @@ jobs: uses: docker/setup-qemu-action@v2 - name: Clean up disk space run: | - sudo df -h sudo rm -rf \ /usr/share/dotnet \ /opt/ghc \ @@ -134,7 +133,6 @@ jobs: /usr/local/share/chromium \ /usr/local/share/powershell \ /usr/share/swift - sudo df -h - name: Build C++ run: bash .github/scripts/build-rocm.sh env: From 52ba52eee1681ea878ec82511a46b07c3ef05ae0 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Sat, 24 Aug 2024 23:25:40 +0000 Subject: [PATCH 33/47] Add BNB_BACKEND variable --- bitsandbytes/cextension.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index cfeaf4f44..063931a8a 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -99,7 +99,7 @@ def get_native_library() -> BNBNativeLibrary: if cuda_binary_path.exists(): binary_path = cuda_binary_path else: - logger.warning("Could not find the bitsandbytes CUDA binary at %r", cuda_binary_path) + logger.warning(f"Could not find the bitsandbytes {BNB_BACKEND} binary at {cuda_binary_path}") logger.debug(f"Loading bitsandbytes native library from: {binary_path}") dll = ct.cdll.LoadLibrary(str(binary_path)) @@ -120,21 +120,23 @@ def get_native_library() -> BNBNativeLibrary: hip_major, hip_minor = map(int, torch.version.hip.split(".")[0:2]) HIP_ENVIRONMENT, BNB_HIP_VERSION = True, hip_major * 100 + hip_minor BNB_HIP_VERSION_SHORT = f"{hip_major}{hip_minor}" + BNB_BACKEND = "ROCM" else: HIP_ENVIRONMENT, BNB_HIP_VERSION = False, 0 BNB_HIP_VERSION_SHORT = "" + BNB_BACKEND = "CUDA" lib = get_native_library() except Exception as e: lib = None logger.error(f"Could not load bitsandbytes native library: {e}", exc_info=True) if torch.cuda.is_available(): logger.warning( - """ -CUDA Setup failed despite CUDA being available. Please run the following command to get more information: + f""" +{BNB_BACKEND} Setup failed despite {BNB_BACKEND} being available. Please run the following command to get more information: python -m bitsandbytes -Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them +Inspect the output of the command and see if you can locate {BNB_BACKEND} libraries. You might need to add them to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues """, From 755dfbe61a6d4a2238ee60d3f2a42f62d2ad2163 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Sat, 24 Aug 2024 23:26:50 +0000 Subject: [PATCH 34/47] Update diagnostic functions for ROCm --- bitsandbytes/diagnostics/cuda.py | 120 +++++++++++++++++++++---------- bitsandbytes/diagnostics/main.py | 23 +++--- 2 files changed, 98 insertions(+), 45 deletions(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 8974c6400..6679c49cc 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -6,6 +6,7 @@ import torch from bitsandbytes.cextension import get_cuda_bnb_library_path +from bitsandbytes.cextension import HIP_ENVIRONMENT, BNB_BACKEND from bitsandbytes.consts import NONPYTORCH_DOC_URL from bitsandbytes.cuda_specs import CUDASpecs from bitsandbytes.diagnostics.utils import print_dedented @@ -38,6 +39,9 @@ "nvcuda*.dll", # Windows ) +if HIP_ENVIRONMENT: + CUDA_RUNTIME_LIB_PATTERNS = ("libamdhip64.so*") + logger = logging.getLogger(__name__) @@ -105,37 +109,63 @@ def find_cudart_libraries() -> Iterator[Path]: def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: - print( - f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, " - f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.", - ) + if not HIP_ENVIRONMENT: + print( + f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, " + f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.", + ) + else: + print( + f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}" + ) + binary_path = get_cuda_bnb_library_path(cuda_specs) if not binary_path.exists(): - print_dedented( - f""" - Library not found: {binary_path}. Maybe you need to compile it from source? - If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`, - for example, `make CUDA_VERSION=113`. + if not HIP_ENVIRONMENT: + print_dedented( + f""" + Library not found: {binary_path}. Maybe you need to compile it from source? + If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`, + for example, `make CUDA_VERSION=113`. + + The CUDA version for the compile might depend on your conda install, if using conda. + Inspect CUDA version via `conda list | grep cuda`. + """, + ) + else: + print_dedented( + f""" + Library not found: {binary_path}. + Maybe you need to compile it from source? If you compiled from source, check that ROCM_VERSION + in PyTorch Settings matches your ROCM install. If not, reinstall PyTorch for your ROCm version + and rebuild bitsandbytes. + """, + ) - The CUDA version for the compile might depend on your conda install, if using conda. - Inspect CUDA version via `conda list | grep cuda`. - """, - ) cuda_major, cuda_minor = cuda_specs.cuda_version_tuple - if cuda_major < 11: - print_dedented( - """ - WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8(). - You will be only to use 8-bit optimizers and quantization routines! - """, - ) + if not HIP_ENVIRONMENT: + if cuda_major < 11: + print_dedented( + """ + WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8(). + You will be only to use 8-bit optimizers and quantization routines! + """, + ) + + print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}") + else: + if (cuda_major, cuda_minor) < (6, 1): + print_dedented( + """ + WARNING: bitandbytes is fully supported only from ROCm 6.1. + """, + ) - print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}") # 7.5 is the minimum CC for cublaslt - if not cuda_specs.has_cublaslt: + if not cuda_specs.has_cublaslt and not HIP_ENVIRONMENT: print_dedented( """ WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU! @@ -152,25 +182,41 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: def print_cuda_runtime_diagnostics() -> None: cudart_paths = list(find_cudart_libraries()) if not cudart_paths: - print("CUDA SETUP: WARNING! CUDA runtime files not found in any environmental path.") + print(f"{BNB_BACKEND} SETUP: WARNING! {BNB_BACKEND} runtime files not found in any environmental path.") elif len(cudart_paths) > 1: + backend_version = torch.version.cuda if not HIP_ENVIRONMENT else torch.version.hip print_dedented( f""" - Found duplicate CUDA runtime files (see below). + Found duplicate {BNB_BACKEND} runtime files (see below). - We select the PyTorch default CUDA runtime, which is {torch.version.cuda}, - but this might mismatch with the CUDA version that is needed for bitsandbytes. - To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. - - For example, if you want to use the CUDA version 122, - BNB_CUDA_VERSION=122 python ... - - OR set the environmental variable in your .bashrc: - export BNB_CUDA_VERSION=122 - - In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g. - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2, + We select the PyTorch default {BNB_BACKEND} runtime, which is {backend_version}, + but this might mismatch with the {BNB_BACKEND} version that is needed for bitsandbytes. """, ) + if not HIP_ENVIRONMENT: + print_dedented( + f""" + To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. + + For example, if you want to use the CUDA version 122, + BNB_CUDA_VERSION=122 python ... + + OR set the environmental variable in your .bashrc: + export BNB_CUDA_VERSION=122 + + In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g. + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2, + """, + ) + else: + print_dedented( + f""" + To resolve it, install PyTorch built for the ROCm version you want to use + + and set LD_LIBRARY_PATH to your ROCm install path, e.g. + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/rocm-6.1.2, + """, + ) + for pth in cudart_paths: - print(f"* Found CUDA runtime at: {pth}") + print(f"* Found {BNB_BACKEND} runtime at: {pth}") diff --git a/bitsandbytes/diagnostics/main.py b/bitsandbytes/diagnostics/main.py index 1ce096f69..ff4d2fd2a 100644 --- a/bitsandbytes/diagnostics/main.py +++ b/bitsandbytes/diagnostics/main.py @@ -11,17 +11,19 @@ ) from bitsandbytes.diagnostics.utils import print_dedented, print_header +from bitsandbytes.cextension import HIP_ENVIRONMENT, BNB_BACKEND def sanity_check(): from bitsandbytes.cextension import lib if lib is None: + compute_backend = "cuda" if not HIP_ENVIRONMENT else "hip" print_dedented( - """ + f""" Couldn't load the bitsandbytes library, likely due to missing binaries. Please ensure bitsandbytes is properly installed. - For source installations, compile the binaries with `cmake -DCOMPUTE_BACKEND=cuda -S .`. + For source installations, compile the binaries with `cmake -DCOMPUTE_BACKEND={compute_backend} -S .`. See the documentation for more details if needed. Trying a simple check anyway, but this will likely fail... @@ -49,19 +51,24 @@ def main(): print_header("OTHER") cuda_specs = get_cuda_specs() - print("CUDA specs:", cuda_specs) + if HIP_ENVIRONMENT: + rocm_specs = f" rocm_version_string=\'{cuda_specs.cuda_version_string}\'," + rocm_specs+= f" rocm_version_tuple={cuda_specs.cuda_version_tuple}" + print(f"{BNB_BACKEND} specs:{rocm_specs}") + else: + print(f"{BNB_BACKEND} specs:{cuda_specs}") if not torch.cuda.is_available(): - print("Torch says CUDA is not available. Possible reasons:") - print("1. CUDA driver not installed") - print("2. CUDA not installed") - print("3. You have multiple conflicting CUDA libraries") + print(f"Torch says {BNB_BACKEND} is not available. Possible reasons:") + print(f"1. {BNB_BACKEND} driver not installed") + print(f"2. {BNB_BACKEND} not installed") + print(f"3. You have multiple conflicting {BNB_BACKEND} libraries") if cuda_specs: print_cuda_diagnostics(cuda_specs) print_cuda_runtime_diagnostics() print_header("") print_header("DEBUG INFO END") print_header("") - print("Checking that the library is importable and CUDA is callable...") + print(f"Checking that the library is importable and {BNB_BACKEND} is callable...") try: sanity_check() print("SUCCESS!") From 70c3d6bfd0906eb99d71edc3b8cbf3301c8846d5 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Sun, 25 Aug 2024 01:17:42 +0000 Subject: [PATCH 35/47] Fix tuple error --- bitsandbytes/diagnostics/cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 6679c49cc..c89a10e9c 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -40,7 +40,7 @@ ) if HIP_ENVIRONMENT: - CUDA_RUNTIME_LIB_PATTERNS = ("libamdhip64.so*") + CUDA_RUNTIME_LIB_PATTERNS = ("libamdhip64.so*",) logger = logging.getLogger(__name__) From 7b038e9c9f55d274532449ddd4d9e158bf92eecf Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Sun, 25 Aug 2024 01:44:39 +0000 Subject: [PATCH 36/47] Fix library detection bug for recursive and symlink cases --- bitsandbytes/diagnostics/cuda.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index c89a10e9c..515349787 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -60,8 +60,8 @@ def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path except OSError: # Assume an esoteric error trying to poke at the directory pass for lib_pattern in CUDA_RUNTIME_LIB_PATTERNS: - for pth in dir.glob(lib_pattern): - if pth.is_file(): + for pth in dir.rglob(lib_pattern): + if pth.is_file() and not pth.is_symlink(): yield pth except (OSError, PermissionError): pass From 343c9fae1386f921c06f14c44a2c65334958eaa0 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Sun, 25 Aug 2024 01:47:43 +0000 Subject: [PATCH 37/47] fix pre-commit errors --- bitsandbytes/diagnostics/cuda.py | 14 ++++---------- bitsandbytes/diagnostics/main.py | 6 +++--- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 515349787..6c66c6219 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -5,8 +5,7 @@ import torch -from bitsandbytes.cextension import get_cuda_bnb_library_path -from bitsandbytes.cextension import HIP_ENVIRONMENT, BNB_BACKEND +from bitsandbytes.cextension import BNB_BACKEND, HIP_ENVIRONMENT, get_cuda_bnb_library_path from bitsandbytes.consts import NONPYTORCH_DOC_URL from bitsandbytes.cuda_specs import CUDASpecs from bitsandbytes.diagnostics.utils import print_dedented @@ -115,10 +114,7 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.", ) else: - print( - f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}" - ) - + print(f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}") binary_path = get_cuda_bnb_library_path(cuda_specs) if not binary_path.exists(): @@ -143,7 +139,6 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: """, ) - cuda_major, cuda_minor = cuda_specs.cuda_version_tuple if not HIP_ENVIRONMENT: if cuda_major < 11: @@ -163,7 +158,6 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: """, ) - # 7.5 is the minimum CC for cublaslt if not cuda_specs.has_cublaslt and not HIP_ENVIRONMENT: print_dedented( @@ -195,7 +189,7 @@ def print_cuda_runtime_diagnostics() -> None: ) if not HIP_ENVIRONMENT: print_dedented( - f""" + """ To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. For example, if you want to use the CUDA version 122, @@ -210,7 +204,7 @@ def print_cuda_runtime_diagnostics() -> None: ) else: print_dedented( - f""" + """ To resolve it, install PyTorch built for the ROCm version you want to use and set LD_LIBRARY_PATH to your ROCm install path, e.g. diff --git a/bitsandbytes/diagnostics/main.py b/bitsandbytes/diagnostics/main.py index ff4d2fd2a..9165cbeed 100644 --- a/bitsandbytes/diagnostics/main.py +++ b/bitsandbytes/diagnostics/main.py @@ -3,6 +3,7 @@ import torch +from bitsandbytes.cextension import BNB_BACKEND, HIP_ENVIRONMENT from bitsandbytes.consts import PACKAGE_GITHUB_URL from bitsandbytes.cuda_specs import get_cuda_specs from bitsandbytes.diagnostics.cuda import ( @@ -11,7 +12,6 @@ ) from bitsandbytes.diagnostics.utils import print_dedented, print_header -from bitsandbytes.cextension import HIP_ENVIRONMENT, BNB_BACKEND def sanity_check(): from bitsandbytes.cextension import lib @@ -52,8 +52,8 @@ def main(): print_header("OTHER") cuda_specs = get_cuda_specs() if HIP_ENVIRONMENT: - rocm_specs = f" rocm_version_string=\'{cuda_specs.cuda_version_string}\'," - rocm_specs+= f" rocm_version_tuple={cuda_specs.cuda_version_tuple}" + rocm_specs = f" rocm_version_string='{cuda_specs.cuda_version_string}'," + rocm_specs += f" rocm_version_tuple={cuda_specs.cuda_version_tuple}" print(f"{BNB_BACKEND} specs:{rocm_specs}") else: print(f"{BNB_BACKEND} specs:{cuda_specs}") From f2ea1379a02baf91fbff158a560f5705e40734f6 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Wed, 11 Sep 2024 19:10:05 +0000 Subject: [PATCH 38/47] Remove recursive path lib search --- bitsandbytes/diagnostics/cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 6c66c6219..63f129819 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -59,7 +59,7 @@ def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path except OSError: # Assume an esoteric error trying to poke at the directory pass for lib_pattern in CUDA_RUNTIME_LIB_PATTERNS: - for pth in dir.rglob(lib_pattern): + for pth in dir.glob(lib_pattern): if pth.is_file() and not pth.is_symlink(): yield pth except (OSError, PermissionError): From ee6abedf79ef143324e2ceef9b1a7220d6ec5ceb Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Wed, 11 Sep 2024 19:24:55 +0000 Subject: [PATCH 39/47] Create function for runtime lib patterns --- bitsandbytes/diagnostics/cuda.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 63f129819..038235d59 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -32,17 +32,18 @@ "_", # current Python interpreter } -CUDA_RUNTIME_LIB_PATTERNS = ( - "cudart64*.dll", # Windows - "libcudart*.so*", # libcudart.so, libcudart.so.11.0, libcudart.so.12.0, libcudart.so.12.1, libcudart.so.12.2 etc. - "nvcuda*.dll", # Windows -) - -if HIP_ENVIRONMENT: - CUDA_RUNTIME_LIB_PATTERNS = ("libamdhip64.so*",) - logger = logging.getLogger(__name__) +def get_runtime_lib_patterns() -> tuple: + if HIP_ENVIRONMENT: + return ("libamdhip64.so*",) + else: + return ( + "cudart64*.dll", # Windows + "libcudart*.so*", # libcudart.so, libcudart.so.11.0, libcudart.so.12.0, libcudart.so.12.1, libcudart.so.12.2 etc. + "nvcuda*.dll", # Windows + ) + def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path]: for dir_string in paths_list_candidate.split(os.pathsep): @@ -58,7 +59,7 @@ def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path continue except OSError: # Assume an esoteric error trying to poke at the directory pass - for lib_pattern in CUDA_RUNTIME_LIB_PATTERNS: + for lib_pattern in get_runtime_lib_patterns(): for pth in dir.glob(lib_pattern): if pth.is_file() and not pth.is_symlink(): yield pth From 6f9cd268e7bde88ac5f5776c124156ac29409882 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:54:28 -0500 Subject: [PATCH 40/47] Update logger format Co-authored-by: Aarni Koskela --- bitsandbytes/cextension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index c8bb382b6..3d08c9084 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -99,7 +99,7 @@ def get_native_library() -> BNBNativeLibrary: if cuda_binary_path.exists(): binary_path = cuda_binary_path else: - logger.warning(f"Could not find the bitsandbytes {BNB_BACKEND} binary at {cuda_binary_path}") + logger.warning("Could not find the bitsandbytes %s binary at %r", BNB_BACKEND, cuda_binary_path) logger.debug(f"Loading bitsandbytes native library from: {binary_path}") dll = ct.cdll.LoadLibrary(str(binary_path)) From 570137ca2e6ec995ebc8eddebd94f55bed581a64 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:57:20 -0500 Subject: [PATCH 41/47] Update error reporting Co-authored-by: Aarni Koskela --- csrc/ops.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index 5c0688b91..25c42863e 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -576,7 +576,7 @@ template int igemmlt(hipblasLtHandl if (returnedAlgoCount == 0) { has_error = 1; - printf("Error: Matmul Algo Heurisitic didn't return algorithms\n"); + fprintf(stderr, "Error: Matmul Algo Heuristic didn't return algorithms\n"); } else { From 3380df4f17d3c11d17f3bfc5cfb7b8a5c519085b Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:58:02 -0500 Subject: [PATCH 42/47] Remove commented code Co-authored-by: Aarni Koskela --- csrc/ops.hip | 1 - 1 file changed, 1 deletion(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index 25c42863e..70e5fdee7 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -630,7 +630,6 @@ template int igemmlt(hipblasLtHandl } else { - //has_error |= checkHipblasStatus(hipblasLtMatmulDescSetAttribute(matmulDesc, hipblasLt_MATMUL_DESC_POINTER_MODE, &alphaVec, sizeof(alphaVec))); float beta = 0.0f; has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); From 1c5bd4ff8bab2103bc0c6579586fb969c8be13f0 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:58:31 -0500 Subject: [PATCH 43/47] Update error reporting Co-authored-by: Aarni Koskela --- csrc/ops.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index 70e5fdee7..a808d5ecb 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -643,7 +643,7 @@ template int igemmlt(hipblasLtHandl if (Adesc) has_error |= checkHipblasStatus(hipblasLtMatrixLayoutDestroy(Adesc)); if (matmulDesc) has_error |= checkHipblasStatus(hipblasLtMatmulDescDestroy(matmulDesc)); if(has_error == 1) - printf("error detected\n"); + fprintf(stderr, "error detected\n"); return has_error; #endif // NO_HIPBLASLT From f39ff4869a8b9f76963b9017af669aa10000c376 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 12 Sep 2024 19:03:17 +0000 Subject: [PATCH 44/47] Update error reporting --- csrc/ops.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index a808d5ecb..4fdc3cbfa 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -618,7 +618,7 @@ template int igemmlt(hipblasLtHandl if (returnedAlgoCount == 0) { has_error = 1; - printf("Error: Matmul Algo Heurisitic didn't return algorithms\n"); + fprintf(stderr, "Error: Matmul Algo Heuristic didn't return algorithms\n"); } else { From f57addd0fe846bfca081058b5157b8abef172d56 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 12 Sep 2024 19:08:06 +0000 Subject: [PATCH 45/47] Create hip diagnostics functions --- bitsandbytes/cextension.py | 2 +- bitsandbytes/diagnostics/cuda.py | 177 +++++++++++++++++-------------- bitsandbytes/diagnostics/main.py | 8 +- 3 files changed, 105 insertions(+), 82 deletions(-) diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index 3d08c9084..e322693b5 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -120,7 +120,7 @@ def get_native_library() -> BNBNativeLibrary: hip_major, hip_minor = map(int, torch.version.hip.split(".")[0:2]) HIP_ENVIRONMENT, BNB_HIP_VERSION = True, hip_major * 100 + hip_minor BNB_HIP_VERSION_SHORT = f"{hip_major}{hip_minor}" - BNB_BACKEND = "ROCM" + BNB_BACKEND = "ROCm" else: HIP_ENVIRONMENT, BNB_HIP_VERSION = False, 0 BNB_HIP_VERSION_SHORT = "" diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 038235d59..a6edc3814 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -108,59 +108,38 @@ def find_cudart_libraries() -> Iterator[Path]: yield from find_cuda_libraries_in_path_list(value) -def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: - if not HIP_ENVIRONMENT: - print( - f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, " - f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.", - ) - else: - print(f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}") +def _print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: + print( + f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, " + f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.", + ) binary_path = get_cuda_bnb_library_path(cuda_specs) if not binary_path.exists(): - if not HIP_ENVIRONMENT: - print_dedented( - f""" - Library not found: {binary_path}. Maybe you need to compile it from source? - If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`, - for example, `make CUDA_VERSION=113`. - - The CUDA version for the compile might depend on your conda install, if using conda. - Inspect CUDA version via `conda list | grep cuda`. - """, - ) - else: - print_dedented( - f""" - Library not found: {binary_path}. - Maybe you need to compile it from source? If you compiled from source, check that ROCM_VERSION - in PyTorch Settings matches your ROCM install. If not, reinstall PyTorch for your ROCm version - and rebuild bitsandbytes. - """, - ) + print_dedented( + f""" + Library not found: {binary_path}. Maybe you need to compile it from source? + If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`, + for example, `make CUDA_VERSION=113`. + + The CUDA version for the compile might depend on your conda install, if using conda. + Inspect CUDA version via `conda list | grep cuda`. + """, + ) cuda_major, cuda_minor = cuda_specs.cuda_version_tuple - if not HIP_ENVIRONMENT: - if cuda_major < 11: - print_dedented( - """ - WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8(). - You will be only to use 8-bit optimizers and quantization routines! - """, - ) - - print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}") - else: - if (cuda_major, cuda_minor) < (6, 1): - print_dedented( - """ - WARNING: bitandbytes is fully supported only from ROCm 6.1. - """, - ) + if cuda_major < 11: + print_dedented( + """ + WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8(). + You will be only to use 8-bit optimizers and quantization routines! + """, + ) + + print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}") # 7.5 is the minimum CC for cublaslt - if not cuda_specs.has_cublaslt and not HIP_ENVIRONMENT: + if not cuda_specs.has_cublaslt: print_dedented( """ WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU! @@ -174,44 +153,88 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: # (2) Multiple CUDA versions installed -def print_cuda_runtime_diagnostics() -> None: +def _print_hip_diagnostics(cuda_specs: CUDASpecs) -> None: + print(f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}") + + binary_path = get_cuda_bnb_library_path(cuda_specs) + if not binary_path.exists(): + print_dedented( + f""" + Library not found: {binary_path}. + Maybe you need to compile it from source? If you compiled from source, check that ROCM_VERSION + in PyTorch Settings matches your ROCm install. If not, reinstall PyTorch for your ROCm version + and rebuild bitsandbytes. + """, + ) + + hip_major, hip_minor = cuda_specs.cuda_version_tuple + if (hip_major, hip_minor) < (6, 1): + print_dedented( + """ + WARNING: bitsandbytes is fully supported only from ROCm 6.1. + """, + ) + + +def print_diagnostics(cuda_specs: CUDASpecs)) -> None: + if HIP_ENVIRONMENT: + _print_hip_diagnostics(cuda_specs) + else: + _print_cuda_diagnostics(cuda_specs) + + +def _print_cuda_runtime_diagnostics() -> None: + cudart_paths = list(find_cudart_libraries()) + if not cudart_paths: + print("WARNING! CUDA runtime files not found in any environmental path.") + elif len(cudart_paths) > 1: + print_dedented( + f""" + Found duplicate CUDA runtime files (see below). + + We select the PyTorch default CUDA runtime, which is {torch.version.cuda}, + but this might mismatch with the CUDA version that is needed for bitsandbytes. + To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. + + For example, if you want to use the CUDA version 122, + BNB_CUDA_VERSION=122 python ... + + OR set the environmental variable in your .bashrc: + export BNB_CUDA_VERSION=122 + + In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g. + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2, + """, + ) + for pth in cudart_paths: + print(f"* Found CUDA runtime at: {pth}") + + +def _print_hip_runtime_diagnostics() -> None: cudart_paths = list(find_cudart_libraries()) if not cudart_paths: - print(f"{BNB_BACKEND} SETUP: WARNING! {BNB_BACKEND} runtime files not found in any environmental path.") + print(f"WARNING! ROCm runtime files not found in any environmental path.") elif len(cudart_paths) > 1: - backend_version = torch.version.cuda if not HIP_ENVIRONMENT else torch.version.hip print_dedented( f""" - Found duplicate {BNB_BACKEND} runtime files (see below). + Found duplicate ROCm runtime files (see below). - We select the PyTorch default {BNB_BACKEND} runtime, which is {backend_version}, - but this might mismatch with the {BNB_BACKEND} version that is needed for bitsandbytes. + We select the PyTorch default ROCm runtime, which is {torch.version.hip}, + but this might mismatch with the ROCm version that is needed for bitsandbytes. + + To resolve it, install PyTorch built for the ROCm version you want to use + + and set LD_LIBRARY_PATH to your ROCm install path, e.g. + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm-6.1.2/lib, """, ) - if not HIP_ENVIRONMENT: - print_dedented( - """ - To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. - - For example, if you want to use the CUDA version 122, - BNB_CUDA_VERSION=122 python ... - - OR set the environmental variable in your .bashrc: - export BNB_CUDA_VERSION=122 - - In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g. - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2, - """, - ) - else: - print_dedented( - """ - To resolve it, install PyTorch built for the ROCm version you want to use - - and set LD_LIBRARY_PATH to your ROCm install path, e.g. - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/rocm-6.1.2, - """, - ) for pth in cudart_paths: - print(f"* Found {BNB_BACKEND} runtime at: {pth}") + print(f"* Found ROCm runtime at: {pth}") + + +def print_runtime_diagnostics() -> None: + if HIP_ENVIRONMENT: + _print_hip_runtime_diagnostics() + else: + _print_cuda_runtime_diagnostics() diff --git a/bitsandbytes/diagnostics/main.py b/bitsandbytes/diagnostics/main.py index 9165cbeed..8dc43ed2a 100644 --- a/bitsandbytes/diagnostics/main.py +++ b/bitsandbytes/diagnostics/main.py @@ -7,8 +7,8 @@ from bitsandbytes.consts import PACKAGE_GITHUB_URL from bitsandbytes.cuda_specs import get_cuda_specs from bitsandbytes.diagnostics.cuda import ( - print_cuda_diagnostics, - print_cuda_runtime_diagnostics, + print_diagnostics, + print_runtime_diagnostics, ) from bitsandbytes.diagnostics.utils import print_dedented, print_header @@ -63,8 +63,8 @@ def main(): print(f"2. {BNB_BACKEND} not installed") print(f"3. You have multiple conflicting {BNB_BACKEND} libraries") if cuda_specs: - print_cuda_diagnostics(cuda_specs) - print_cuda_runtime_diagnostics() + print_diagnostics(cuda_specs) + print_runtime_diagnostics() print_header("") print_header("DEBUG INFO END") print_header("") From 251a0e87690cb6bd0a9df16b3b6a0392d4fd7f0d Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 12 Sep 2024 20:03:05 +0000 Subject: [PATCH 46/47] Fix Typo --- bitsandbytes/diagnostics/cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index a6edc3814..91b70f3b1 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -176,7 +176,7 @@ def _print_hip_diagnostics(cuda_specs: CUDASpecs) -> None: ) -def print_diagnostics(cuda_specs: CUDASpecs)) -> None: +def print_diagnostics(cuda_specs: CUDASpecs) -> None: if HIP_ENVIRONMENT: _print_hip_diagnostics(cuda_specs) else: From 260a3ac8da54a4c3519172f0194152db22116829 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 12 Sep 2024 22:01:49 +0000 Subject: [PATCH 47/47] Fix pre-commit checks --- bitsandbytes/diagnostics/cuda.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 91b70f3b1..014b753a9 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -5,7 +5,7 @@ import torch -from bitsandbytes.cextension import BNB_BACKEND, HIP_ENVIRONMENT, get_cuda_bnb_library_path +from bitsandbytes.cextension import HIP_ENVIRONMENT, get_cuda_bnb_library_path from bitsandbytes.consts import NONPYTORCH_DOC_URL from bitsandbytes.cuda_specs import CUDASpecs from bitsandbytes.diagnostics.utils import print_dedented @@ -34,15 +34,16 @@ logger = logging.getLogger(__name__) + def get_runtime_lib_patterns() -> tuple: if HIP_ENVIRONMENT: return ("libamdhip64.so*",) else: return ( - "cudart64*.dll", # Windows - "libcudart*.so*", # libcudart.so, libcudart.so.11.0, libcudart.so.12.0, libcudart.so.12.1, libcudart.so.12.2 etc. - "nvcuda*.dll", # Windows - ) + "cudart64*.dll", # Windows + "libcudart*.so*", # libcudart.so, libcudart.so.11.0, libcudart.so.12.0, libcudart.so.12.1, libcudart.so.12.2 etc. + "nvcuda*.dll", # Windows + ) def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path]: @@ -213,7 +214,7 @@ def _print_cuda_runtime_diagnostics() -> None: def _print_hip_runtime_diagnostics() -> None: cudart_paths = list(find_cudart_libraries()) if not cudart_paths: - print(f"WARNING! ROCm runtime files not found in any environmental path.") + print("WARNING! ROCm runtime files not found in any environmental path.") elif len(cudart_paths) > 1: print_dedented( f"""