From 5891465f3908c0e938e5501586a51dbdaecbfa6a Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Wed, 19 Jun 2024 22:22:50 +0000 Subject: [PATCH 01/54] Add build job for rocm --- .github/workflows/python-package.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 72e1b099a..78bc747c3 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -103,6 +103,28 @@ jobs: name: shared_library_cuda_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.cuda_version }} path: output/* retention-days: 7 + build-shared-libs-rocm: + strategy: + matrix: + os: [ubuntu-latest] + arch: [x86_64] + runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents + steps: + - uses: actions/checkout@v4 + - name: Set up Docker multiarch + if: startsWith(matrix.os, 'ubuntu') + uses: docker/setup-qemu-action@v2 + - name: Clean up disk space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - name: Build C++ + run: bash .github/scripts/build-rocm.sh + env: + build_os: ${{ matrix.os }} + build_arch: ${{ matrix.arch }} build-wheels: needs: - build-shared-libs From d03a680871b2d665ba9f420b7513cab35b0b6960 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Wed, 19 Jun 2024 22:23:08 +0000 Subject: [PATCH 02/54] Add rocm build script --- .github/scripts/build-rocm.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/scripts/build-rocm.sh diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh new file mode 100644 index 000000000..fc7515aa7 --- /dev/null +++ b/.github/scripts/build-rocm.sh @@ -0,0 +1,19 @@ +#!/bin/bash +declare build_arch +declare build_os + +set -xeuo pipefail +if [ "${build_os:0:6}" == ubuntu ]; then + image=rocm/dev-ubuntu-22.04:6.1-complete + echo "Using image $image" + docker run --rm --platform "linux/$build_arch" -i \ + -w /src -v "$PWD:/src" "$image" sh -c \ + "apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ + && cmake -DCOMPUTE_BACKEND=hip . \ + && cmake --build ." +fi + +#output_dir="output/${build_os}/${build_arch}" +#mkdir -p "${output_dir}" +#(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}") From ec9000f5444726589935ba8107249eddade9689d Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 20 Jun 2024 21:02:16 +0000 Subject: [PATCH 03/54] Copy shared obj file into output_dir --- .github/scripts/build-rocm.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index fc7515aa7..616e8c250 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -14,6 +14,6 @@ if [ "${build_os:0:6}" == ubuntu ]; then && cmake --build ." fi -#output_dir="output/${build_os}/${build_arch}" -#mkdir -p "${output_dir}" -#(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}") +output_dir="output/${build_os}/${build_arch}" +mkdir -p "${output_dir}" +(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}") From 9b8c1da639c76c0fd41df00b835ab02b3508a64b Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 20 Jun 2024 21:02:50 +0000 Subject: [PATCH 04/54] upload build artifacts and enable wheels build --- .github/workflows/python-package.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 78bc747c3..3b243993b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -125,10 +125,17 @@ jobs: env: build_os: ${{ matrix.os }} build_arch: ${{ matrix.arch }} + - name: Upload build artifact + uses: actions/upload-artifact@v4 + with: + name: shared_library_rocm_${{ matrix.os }}_${{ matrix.arch }} + path: output/* + retention-days: 7 build-wheels: needs: - build-shared-libs - build-shared-libs-cuda + - build-shared-libs-rocm strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] From 1413c5f3a2aed51140b86daa8ee9283c67cce738 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 20 Jun 2024 16:10:54 -0500 Subject: [PATCH 05/54] Remove cuda build temporarily --- .github/workflows/python-package.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 3b243993b..0b0b35416 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -134,7 +134,6 @@ jobs: build-wheels: needs: - build-shared-libs - - build-shared-libs-cuda - build-shared-libs-rocm strategy: matrix: From fd655b02663d1f692734b1a6376421dfbe1064b9 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Mon, 29 Jul 2024 21:00:16 +0000 Subject: [PATCH 06/54] Add ROCm version to .so filename --- CMakeLists.txt | 2 ++ bitsandbytes/cextension.py | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3bedefd51..c526678c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -192,7 +192,9 @@ elseif(BUILD_HIP) # get hip version execute_process(COMMAND hipconfig --version OUTPUT_VARIABLE HIP_CONFIG_VERSION) string(REGEX MATCH "[0-9]+\\.[0-9]+" HIP_VERSION "${HIP_CONFIG_VERSION}") + string(REPLACE "." "" HIP_VERSION_SHORT "${HIP_VERSION}") + string(APPEND BNB_OUTPUT_NAME "${HIP_VERSION_SHORT}") if(NO_CUBLASLT OR HIP_VERSION VERSION_LESS "6.1") string(APPEND BNB_OUTPUT_NAME "_nohipblaslt") endif() diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index 03d2cbd61..6e391a752 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -38,9 +38,9 @@ def get_cuda_bnb_library_path(cuda_specs: CUDASpecs) -> Path: """ if torch.version.hip: if BNB_HIP_VERSION < 601: - return PACKAGE_DIR / f"libbitsandbytes_hip_nohipblaslt{DYNAMIC_LIBRARY_SUFFIX}" + return PACKAGE_DIR / f"libbitsandbytes_hip{BNB_HIP_VERSION_SHORT}_nohipblaslt{DYNAMIC_LIBRARY_SUFFIX}" else: - return PACKAGE_DIR / f"libbitsandbytes_hip{DYNAMIC_LIBRARY_SUFFIX}" + return PACKAGE_DIR / f"libbitsandbytes_hip{BNB_HIP_VERSION_SHORT}{DYNAMIC_LIBRARY_SUFFIX}" library_name = f"libbitsandbytes_cuda{cuda_specs.cuda_version_string}" if not cuda_specs.has_cublaslt: # if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt @@ -119,8 +119,10 @@ def get_native_library() -> BNBNativeLibrary: if torch.version.hip: hip_major, hip_minor = map(int, torch.version.hip.split(".")[0:2]) HIP_ENVIRONMENT, BNB_HIP_VERSION = True, hip_major * 100 + hip_minor + BNB_HIP_VERSION_SHORT = str(hip_major) + str(hip_minor) else: HIP_ENVIRONMENT, BNB_HIP_VERSION = False, 0 + BNB_HIP_VERSION_SHORT = "" lib = get_native_library() except Exception as e: lib = None From 6b77f4c3f7aa042518d566489e13b774c96f68e3 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Mon, 29 Jul 2024 21:16:57 +0000 Subject: [PATCH 07/54] Add rocm_version to whls build --- .github/scripts/build-rocm.sh | 3 ++- .github/workflows/python-package.yml | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index 616e8c250..cc15210fd 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -1,10 +1,11 @@ #!/bin/bash declare build_arch declare build_os +declare rocm_version set -xeuo pipefail if [ "${build_os:0:6}" == ubuntu ]; then - image=rocm/dev-ubuntu-22.04:6.1-complete + image=rocm/dev-ubuntu-22.04:${rocm_version}-complete echo "Using image $image" docker run --rm --platform "linux/$build_arch" -i \ -w /src -v "$PWD:/src" "$image" sh -c \ diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 0b0b35416..cab735562 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -108,6 +108,8 @@ jobs: matrix: os: [ubuntu-latest] arch: [x86_64] + rocm_version: + ["6.1.2"] runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents steps: - uses: actions/checkout@v4 @@ -128,7 +130,7 @@ jobs: - name: Upload build artifact uses: actions/upload-artifact@v4 with: - name: shared_library_rocm_${{ matrix.os }}_${{ matrix.arch }} + name: shared_library_rocm_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.rocm_version }} path: output/* retention-days: 7 build-wheels: From 78324b32075b7ae6076c304e1dfd5f71db01704b Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Mon, 29 Jul 2024 21:23:24 +0000 Subject: [PATCH 08/54] Revert "Remove cuda build temporarily" This reverts commit 1413c5f3a2aed51140b86daa8ee9283c67cce738. --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index cab735562..6a4a6205b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -136,6 +136,7 @@ jobs: build-wheels: needs: - build-shared-libs + - build-shared-libs-cuda - build-shared-libs-rocm strategy: matrix: From c146b8b8f2fe9d6fec5f1f1b8da25b1ec60d6ac6 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Mon, 29 Jul 2024 21:36:41 +0000 Subject: [PATCH 09/54] Add rocm_version env var --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 6a4a6205b..f4cc5486b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -127,6 +127,7 @@ jobs: env: build_os: ${{ matrix.os }} build_arch: ${{ matrix.arch }} + rocm_version: ${{ matrix.rocm_version }} - name: Upload build artifact uses: actions/upload-artifact@v4 with: From d6c3df47a46d55c093e9bd8bf61ee3489bac605e Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 05:01:18 +0000 Subject: [PATCH 10/54] Remove thrush header files --- csrc/kernels.hip | 2 -- csrc/ops_hip.cuh | 6 ------ 2 files changed, 8 deletions(-) diff --git a/csrc/kernels.hip b/csrc/kernels.hip index ca77dceda..d8d7cdba5 100644 --- a/csrc/kernels.hip +++ b/csrc/kernels.hip @@ -10,8 +10,6 @@ #include #include -#include -#include //#include diff --git a/csrc/ops_hip.cuh b/csrc/ops_hip.cuh index 1b9c13063..e57cbb3b5 100644 --- a/csrc/ops_hip.cuh +++ b/csrc/ops_hip.cuh @@ -21,12 +21,6 @@ #include #include -/* -#include -#include -*/ - - #define CUDA_CHECK_RETURN(value) { \ hipError_t _m_cudaStat = value; \ if (_m_cudaStat != hipSuccess) { \ From 7e9a65c33f66fffcb14ee2438170718777c06022 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 06:44:49 +0000 Subject: [PATCH 11/54] Print node info --- .github/scripts/build-rocm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index cc15210fd..8aac2c04b 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -11,7 +11,7 @@ if [ "${build_os:0:6}" == ubuntu ]; then -w /src -v "$PWD:/src" "$image" sh -c \ "apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ - && cmake -DCOMPUTE_BACKEND=hip . \ + && rocminfo && cmake -DCOMPUTE_BACKEND=hip . \ && cmake --build ." fi From cdb209a2eb896d9c4166f53e9b2aa580c10e42c0 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 06:51:34 +0000 Subject: [PATCH 12/54] print cuda node info --- .github/scripts/build-cuda.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/build-cuda.sh b/.github/scripts/build-cuda.sh index 0f9b8d726..fc79a92f6 100644 --- a/.github/scripts/build-cuda.sh +++ b/.github/scripts/build-cuda.sh @@ -15,7 +15,7 @@ for NO_CUBLASLT in ON OFF; do docker run --platform "linux/$build_arch" -i -w /src -v "$PWD:/src" "$image" sh -c \ "apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ - && cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" -DNO_CUBLASLT=${NO_CUBLASLT} . \ + && nvidia-smi && cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" -DNO_CUBLASLT=${NO_CUBLASLT} . \ && cmake --build ." else pip install cmake==3.28.3 From 77e149917dd4bb5be87099289edf53421fef6fe8 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 06:56:11 +0000 Subject: [PATCH 13/54] Revert "print cuda node info" This reverts commit cdb209a2eb896d9c4166f53e9b2aa580c10e42c0. --- .github/scripts/build-cuda.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/build-cuda.sh b/.github/scripts/build-cuda.sh index fc79a92f6..0f9b8d726 100644 --- a/.github/scripts/build-cuda.sh +++ b/.github/scripts/build-cuda.sh @@ -15,7 +15,7 @@ for NO_CUBLASLT in ON OFF; do docker run --platform "linux/$build_arch" -i -w /src -v "$PWD:/src" "$image" sh -c \ "apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ - && nvidia-smi && cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" -DNO_CUBLASLT=${NO_CUBLASLT} . \ + && cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" -DNO_CUBLASLT=${NO_CUBLASLT} . \ && cmake --build ." else pip install cmake==3.28.3 From 7c9190990478d3980eece86909b5faf4d36b3e16 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 06:56:54 +0000 Subject: [PATCH 14/54] Revert "Print node info" This reverts commit 7e9a65c33f66fffcb14ee2438170718777c06022. --- .github/scripts/build-rocm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index 8aac2c04b..cc15210fd 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -11,7 +11,7 @@ if [ "${build_os:0:6}" == ubuntu ]; then -w /src -v "$PWD:/src" "$image" sh -c \ "apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ - && rocminfo && cmake -DCOMPUTE_BACKEND=hip . \ + && cmake -DCOMPUTE_BACKEND=hip . \ && cmake --build ." fi From b78b3400b59e2f40dad5ba4f676e7ffd46dff978 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 07:14:18 +0000 Subject: [PATCH 15/54] Add rocm arch to compile command --- .github/scripts/build-rocm.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index cc15210fd..a5933da3f 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -4,6 +4,7 @@ declare build_os declare rocm_version set -xeuo pipefail +bnb_rocm_arch="gfx906;gfx908;gfx90a;gfx942;gfx1100;gfx1030" if [ "${build_os:0:6}" == ubuntu ]; then image=rocm/dev-ubuntu-22.04:${rocm_version}-complete echo "Using image $image" @@ -11,7 +12,7 @@ if [ "${build_os:0:6}" == ubuntu ]; then -w /src -v "$PWD:/src" "$image" sh -c \ "apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ - && cmake -DCOMPUTE_BACKEND=hip . \ + && cmake -DCOMPUTE_BACKEND=hip -DBNB_ROCM_ARCH=\"${bnb_rocm_arch}\" . \ && cmake --build ." fi From a62b9d454b25f5f7933e7793fd553cc81699a925 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 19:21:47 +0000 Subject: [PATCH 16/54] Rename .so files to rocm --- CMakeLists.txt | 2 +- bitsandbytes/cextension.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a7671239..0891f75b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,7 +194,7 @@ elseif(BUILD_HIP) list(APPEND SRC_FILES ${HIP_FILES}) - string(APPEND BNB_OUTPUT_NAME "_hip") + string(APPEND BNB_OUTPUT_NAME "_rocm") # get hip version execute_process(COMMAND hipconfig --version OUTPUT_VARIABLE HIP_CONFIG_VERSION) diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index 6e391a752..a096d0d51 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -38,9 +38,9 @@ def get_cuda_bnb_library_path(cuda_specs: CUDASpecs) -> Path: """ if torch.version.hip: if BNB_HIP_VERSION < 601: - return PACKAGE_DIR / f"libbitsandbytes_hip{BNB_HIP_VERSION_SHORT}_nohipblaslt{DYNAMIC_LIBRARY_SUFFIX}" + return PACKAGE_DIR / f"libbitsandbytes_rocm{BNB_HIP_VERSION_SHORT}_nohipblaslt{DYNAMIC_LIBRARY_SUFFIX}" else: - return PACKAGE_DIR / f"libbitsandbytes_hip{BNB_HIP_VERSION_SHORT}{DYNAMIC_LIBRARY_SUFFIX}" + return PACKAGE_DIR / f"libbitsandbytes_rocm{BNB_HIP_VERSION_SHORT}{DYNAMIC_LIBRARY_SUFFIX}" library_name = f"libbitsandbytes_cuda{cuda_specs.cuda_version_string}" if not cuda_specs.has_cublaslt: # if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt From 9059bff540eac1b871eca220120c37a32186d481 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 19:27:12 +0000 Subject: [PATCH 17/54] Update default gpu arch --- .github/scripts/build-rocm.sh | 2 +- CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index a5933da3f..b508fac69 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -4,7 +4,7 @@ declare build_os declare rocm_version set -xeuo pipefail -bnb_rocm_arch="gfx906;gfx908;gfx90a;gfx942;gfx1100;gfx1030" +bnb_rocm_arch="gfx90a;gfx942;gfx1100" if [ "${build_os:0:6}" == ubuntu ]; then image=rocm/dev-ubuntu-22.04:${rocm_version}-complete echo "Using image $image" diff --git a/CMakeLists.txt b/CMakeLists.txt index 0891f75b0..eac72fe52 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -185,7 +185,7 @@ elseif(BUILD_HIP) set(CMAKE_HIP_ARCHITECTURES ${BNB_ROCM_ARCH}) else() if (NOT AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES) - set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx940;gfx941;gfx942") + set(CMAKE_HIP_ARCHITECTURES "gfx90a;gfx942;gfx1100") elseif (AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES) set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS}) endif() From c5a406ad9aed7738a236c9c184a4b1bf2ccd422c Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 19:48:32 +0000 Subject: [PATCH 18/54] Skip cpu based igemmlt int tests on ROCm --- tests/test_functional.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_functional.py b/tests/test_functional.py index 4e82c530a..a9d926b89 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -584,6 +584,9 @@ def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, trans @pytest.mark.parametrize("ldb", (0,), ids=id_formatter("ldb")) @pytest.mark.parametrize("device", ("cuda", "cpu"), ids=id_formatter("device")) def test_igemmlt_int(dim1, dim2, dim3, dim4, dims, ldb, device): + if HIP_ENVIRONMENT and device == "cpu": + pytest.skip("this test is not supported on ROCm yet") + for i in range(k): if dims == 2: A = torch.randint(-128, 127, size=(dim1, dim3), device=device).to(torch.int8) From 9cbb5e12a8987c57188917e5353e46492ef8d1eb Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 21:19:53 +0000 Subject: [PATCH 19/54] Update Documentation --- docs/source/installation.mdx | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index f917f2623..0daa5e279 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -146,13 +146,23 @@ Please follow these steps to install bitsandbytes with device-specific backend s bitsandbytes is fully supported from ROCm 6.1 onwards (currently in alpha release). > [!TIP] -> If you already installed ROCm and PyTorch, skip Docker steps below and please check that the torch version matches your ROCm install. To install torch for a specific ROCm version, please refer to step 3 of wheels install in [Installing PyTorch for ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/pytorch-install.html#using-wheels-package) guide. +> If you would like to install ROCm and PyTorch on bare metal, skip Docker steps and refer to our official guides at [ROCm installation overview](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/install-overview.html#rocm-install-overview) and [Installing PyTorch for ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/pytorch-install.html#using-wheels-package) (Step 3 of wheels build for quick installation). Please make sure to get PyTorch wheel for the installed ROCm version. ```bash -# Create a docker container with latest pytorch. It comes with ROCm and pytorch preinstalled -docker pull rocm/pytorch:latest -docker run -it --device=/dev/kfd --device=/dev/dri --group-add video rocm/pytorch:latest +# Create a docker container with latest ROCm image, which includes ROCm libraries +docker pull rocm/dev-ubuntu-22.04:6.1.2-complete +docker run -it --device=/dev/kfd --device=/dev/dri --group-add video rocm/dev-ubuntu-22.04:6.1.2-complete +apt-get update && apt-get install -y git && cd home +# Install pytorch compatible with above ROCm version +pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/rocm6.1/ + +# Install bitsandbytes from PyPI +# (This is supported on Ubuntu 22.04, Python 3.10, ROCm 6.1.0/6.1.1/6.1.2 and gpu arch - gfx90a, gfx942, gfx1100 +# Please install from source as given below if your configuration doesn't match with these) +pip install bitsandbytes + +# Install bitsandbytes from source # Clone bitsandbytes repo, ROCm backend is currently enabled on multi-backend-refactor branch git clone --depth 1 -b multi-backend-refactor https://github.com/TimDettmers/bitsandbytes.git && cd bitsandbytes/ From 358062473b3ec475ecf14dd7effc2b02754ff947 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 21:23:07 +0000 Subject: [PATCH 20/54] Update upstream repo name --- docs/source/installation.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 0daa5e279..a71fe3261 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -164,7 +164,7 @@ pip install bitsandbytes # Install bitsandbytes from source # Clone bitsandbytes repo, ROCm backend is currently enabled on multi-backend-refactor branch -git clone --depth 1 -b multi-backend-refactor https://github.com/TimDettmers/bitsandbytes.git && cd bitsandbytes/ +git clone --depth 1 -b multi-backend-refactor https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/ # Install dependencies pip install -r requirements-dev.txt From 3bde1b7bc3b52163d8f35bf654e933879213992e Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Tue, 30 Jul 2024 21:28:39 +0000 Subject: [PATCH 21/54] Update docs --- docs/source/installation.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index a71fe3261..3ed694ac1 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -159,7 +159,7 @@ pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/roc # Install bitsandbytes from PyPI # (This is supported on Ubuntu 22.04, Python 3.10, ROCm 6.1.0/6.1.1/6.1.2 and gpu arch - gfx90a, gfx942, gfx1100 -# Please install from source as given below if your configuration doesn't match with these) +# Please install from source if your configuration doesn't match with these) pip install bitsandbytes # Install bitsandbytes from source From db1df723a9782f2a8b6080ebffdb806fa2c44365 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:11:53 -0500 Subject: [PATCH 22/54] Update string format Co-authored-by: Aarni Koskela --- bitsandbytes/cextension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index a096d0d51..cfeaf4f44 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -119,7 +119,7 @@ def get_native_library() -> BNBNativeLibrary: if torch.version.hip: hip_major, hip_minor = map(int, torch.version.hip.split(".")[0:2]) HIP_ENVIRONMENT, BNB_HIP_VERSION = True, hip_major * 100 + hip_minor - BNB_HIP_VERSION_SHORT = str(hip_major) + str(hip_minor) + BNB_HIP_VERSION_SHORT = f"{hip_major}{hip_minor}" else: HIP_ENVIRONMENT, BNB_HIP_VERSION = False, 0 BNB_HIP_VERSION_SHORT = "" From e498b4d09ba2f4328572ee22a27f94377359a4b2 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Wed, 31 Jul 2024 16:25:40 -0500 Subject: [PATCH 23/54] Remove pre-release option for torch install --- docs/source/installation.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 3ed694ac1..e92981a3b 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -155,7 +155,7 @@ docker run -it --device=/dev/kfd --device=/dev/dri --group-add video rocm/dev-ub apt-get update && apt-get install -y git && cd home # Install pytorch compatible with above ROCm version -pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/rocm6.1/ +pip install torch --index-url https://download.pytorch.org/whl/nightly/rocm6.1/ # Install bitsandbytes from PyPI # (This is supported on Ubuntu 22.04, Python 3.10, ROCm 6.1.0/6.1.1/6.1.2 and gpu arch - gfx90a, gfx942, gfx1100 From 7d2e02705c8489795a3c115fda42fcb6de9200b0 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 1 Aug 2024 15:43:22 -0500 Subject: [PATCH 24/54] Update pytorch install path Co-authored-by: Titus <9048635+Titus-von-Koeller@users.noreply.github.com> --- docs/source/installation.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index e92981a3b..0e8da0cda 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -155,7 +155,7 @@ docker run -it --device=/dev/kfd --device=/dev/dri --group-add video rocm/dev-ub apt-get update && apt-get install -y git && cd home # Install pytorch compatible with above ROCm version -pip install torch --index-url https://download.pytorch.org/whl/nightly/rocm6.1/ +pip install torch --index-url https://download.pytorch.org/whl/rocm6.1/ # Install bitsandbytes from PyPI # (This is supported on Ubuntu 22.04, Python 3.10, ROCm 6.1.0/6.1.1/6.1.2 and gpu arch - gfx90a, gfx942, gfx1100 From 0c76b1cdadf4d393e2f71b5dc0554b389f5fa803 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 18:59:19 +0000 Subject: [PATCH 25/54] Add messages for Heuristics error --- csrc/ops.hip | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index 157e84629..5c0688b91 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -576,6 +576,7 @@ template int igemmlt(hipblasLtHandl if (returnedAlgoCount == 0) { has_error = 1; + printf("Error: Matmul Algo Heurisitic didn't return algorithms\n"); } else { @@ -614,18 +615,26 @@ template int igemmlt(hipblasLtHandl heuristicResult, &returnedAlgoCount)); - if(!SCALE_ROWS) + if (returnedAlgoCount == 0) { - float alpha = 1.0f, beta = 0.0f; - - has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); + has_error = 1; + printf("Error: Matmul Algo Heurisitic didn't return algorithms\n"); } else { - //has_error |= checkHipblasStatus(hipblasLtMatmulDescSetAttribute(matmulDesc, hipblasLt_MATMUL_DESC_POINTER_MODE, &alphaVec, sizeof(alphaVec))); - float beta = 0.0f; - - has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); + if(!SCALE_ROWS) + { + float alpha = 1.0f, beta = 0.0f; + + has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); + } + else + { + //has_error |= checkHipblasStatus(hipblasLtMatmulDescSetAttribute(matmulDesc, hipblasLt_MATMUL_DESC_POINTER_MODE, &alphaVec, sizeof(alphaVec))); + float beta = 0.0f; + + has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); + } } } @@ -635,7 +644,7 @@ template int igemmlt(hipblasLtHandl if (Adesc) has_error |= checkHipblasStatus(hipblasLtMatrixLayoutDestroy(Adesc)); if (matmulDesc) has_error |= checkHipblasStatus(hipblasLtMatmulDescDestroy(matmulDesc)); if(has_error == 1) - printf("error detected"); + printf("error detected\n"); return has_error; #endif // NO_HIPBLASLT From 714d9e9adb41c5983ae62105fb355186c54255d3 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 20:10:10 +0000 Subject: [PATCH 26/54] Remove toolcache for disk space --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 91e6d82a6..1c0488401 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -120,6 +120,7 @@ jobs: sudo rm -rf /opt/ghc sudo rm -rf "/usr/local/share/boost" sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /opt/hostedtoolcache - name: Build C++ run: bash .github/scripts/build-rocm.sh env: From ce77361934721642ab1dc2f6751074cac027762f Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 20:19:29 +0000 Subject: [PATCH 27/54] print disk usage --- .github/workflows/python-package.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 1c0488401..210f22a9f 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -116,11 +116,13 @@ jobs: uses: docker/setup-qemu-action@v2 - name: Clean up disk space run: | + sudo df -h sudo rm -rf /usr/share/dotnet sudo rm -rf /opt/ghc sudo rm -rf "/usr/local/share/boost" sudo rm -rf "$AGENT_TOOLSDIRECTORY" sudo rm -rf /opt/hostedtoolcache + sudo df -h - name: Build C++ run: bash .github/scripts/build-rocm.sh env: From b87c2b93a7065ae6d893d36695e5e798aee79663 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 20:33:55 +0000 Subject: [PATCH 28/54] Clean disk space for linux --- .github/workflows/python-package.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 210f22a9f..68b45affe 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -123,6 +123,11 @@ jobs: sudo rm -rf "$AGENT_TOOLSDIRECTORY" sudo rm -rf /opt/hostedtoolcache sudo df -h + if: startsWith(matrix.os, 'ubuntu') + run: | + sudo rm -rf /swapfile + apt-get clean + sudo df -h - name: Build C++ run: bash .github/scripts/build-rocm.sh env: From 828fdc6e34095da83f2a9174fd2a1ef6ed8386e9 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 20:38:29 +0000 Subject: [PATCH 29/54] Fix for ubuntu --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 68b45affe..5d3baa6ee 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -123,6 +123,7 @@ jobs: sudo rm -rf "$AGENT_TOOLSDIRECTORY" sudo rm -rf /opt/hostedtoolcache sudo df -h + - name: Clean up disk space Ubuntu if: startsWith(matrix.os, 'ubuntu') run: | sudo rm -rf /swapfile From 5721601db924739f456a6d4df0386bc9fc8e6eda Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 20:40:09 +0000 Subject: [PATCH 30/54] Add sudo for apt clean --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 5d3baa6ee..1aece2298 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -127,7 +127,7 @@ jobs: if: startsWith(matrix.os, 'ubuntu') run: | sudo rm -rf /swapfile - apt-get clean + sudo apt-get clean sudo df -h - name: Build C++ run: bash .github/scripts/build-rocm.sh From d58303f921a3555644ac2a4ce2e197d757123726 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 20:56:30 +0000 Subject: [PATCH 31/54] Update clean up disk list --- .github/workflows/python-package.yml | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 1aece2298..f784446e6 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -117,17 +117,23 @@ jobs: - name: Clean up disk space run: | sudo df -h - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf "/usr/local/share/boost" - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - sudo rm -rf /opt/hostedtoolcache - sudo df -h - - name: Clean up disk space Ubuntu - if: startsWith(matrix.os, 'ubuntu') - run: | - sudo rm -rf /swapfile - sudo apt-get clean + sudo rm -rf \ + /usr/share/dotnet \ + /opt/ghc \ + "/usr/local/share/boost" \ + "$AGENT_TOOLSDIRECTORY" \ + /opt/hostedtoolcache \ + /opt/google/chrome \ + /opt/microsoft/msedge \ + /opt/microsoft/powershell \ + /opt/pipx \ + /usr/lib/mono \ + /usr/local/julia* \ + /usr/local/lib/android \ + /usr/local/lib/node_modules \ + /usr/local/share/chromium \ + /usr/local/share/powershell \ + /usr/share/swift sudo df -h - name: Build C++ run: bash .github/scripts/build-rocm.sh From 483e8ca48da03bd9a619d2d7f54db200a4c08748 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 16 Aug 2024 21:04:34 +0000 Subject: [PATCH 32/54] remove disk usage print --- .github/workflows/python-package.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index f784446e6..d2da82501 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -116,7 +116,6 @@ jobs: uses: docker/setup-qemu-action@v2 - name: Clean up disk space run: | - sudo df -h sudo rm -rf \ /usr/share/dotnet \ /opt/ghc \ @@ -134,7 +133,6 @@ jobs: /usr/local/share/chromium \ /usr/local/share/powershell \ /usr/share/swift - sudo df -h - name: Build C++ run: bash .github/scripts/build-rocm.sh env: From 52ba52eee1681ea878ec82511a46b07c3ef05ae0 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Sat, 24 Aug 2024 23:25:40 +0000 Subject: [PATCH 33/54] Add BNB_BACKEND variable --- bitsandbytes/cextension.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index cfeaf4f44..063931a8a 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -99,7 +99,7 @@ def get_native_library() -> BNBNativeLibrary: if cuda_binary_path.exists(): binary_path = cuda_binary_path else: - logger.warning("Could not find the bitsandbytes CUDA binary at %r", cuda_binary_path) + logger.warning(f"Could not find the bitsandbytes {BNB_BACKEND} binary at {cuda_binary_path}") logger.debug(f"Loading bitsandbytes native library from: {binary_path}") dll = ct.cdll.LoadLibrary(str(binary_path)) @@ -120,21 +120,23 @@ def get_native_library() -> BNBNativeLibrary: hip_major, hip_minor = map(int, torch.version.hip.split(".")[0:2]) HIP_ENVIRONMENT, BNB_HIP_VERSION = True, hip_major * 100 + hip_minor BNB_HIP_VERSION_SHORT = f"{hip_major}{hip_minor}" + BNB_BACKEND = "ROCM" else: HIP_ENVIRONMENT, BNB_HIP_VERSION = False, 0 BNB_HIP_VERSION_SHORT = "" + BNB_BACKEND = "CUDA" lib = get_native_library() except Exception as e: lib = None logger.error(f"Could not load bitsandbytes native library: {e}", exc_info=True) if torch.cuda.is_available(): logger.warning( - """ -CUDA Setup failed despite CUDA being available. Please run the following command to get more information: + f""" +{BNB_BACKEND} Setup failed despite {BNB_BACKEND} being available. Please run the following command to get more information: python -m bitsandbytes -Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them +Inspect the output of the command and see if you can locate {BNB_BACKEND} libraries. You might need to add them to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues """, From 755dfbe61a6d4a2238ee60d3f2a42f62d2ad2163 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Sat, 24 Aug 2024 23:26:50 +0000 Subject: [PATCH 34/54] Update diagnostic functions for ROCm --- bitsandbytes/diagnostics/cuda.py | 120 +++++++++++++++++++++---------- bitsandbytes/diagnostics/main.py | 23 +++--- 2 files changed, 98 insertions(+), 45 deletions(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 8974c6400..6679c49cc 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -6,6 +6,7 @@ import torch from bitsandbytes.cextension import get_cuda_bnb_library_path +from bitsandbytes.cextension import HIP_ENVIRONMENT, BNB_BACKEND from bitsandbytes.consts import NONPYTORCH_DOC_URL from bitsandbytes.cuda_specs import CUDASpecs from bitsandbytes.diagnostics.utils import print_dedented @@ -38,6 +39,9 @@ "nvcuda*.dll", # Windows ) +if HIP_ENVIRONMENT: + CUDA_RUNTIME_LIB_PATTERNS = ("libamdhip64.so*") + logger = logging.getLogger(__name__) @@ -105,37 +109,63 @@ def find_cudart_libraries() -> Iterator[Path]: def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: - print( - f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, " - f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.", - ) + if not HIP_ENVIRONMENT: + print( + f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, " + f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.", + ) + else: + print( + f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}" + ) + binary_path = get_cuda_bnb_library_path(cuda_specs) if not binary_path.exists(): - print_dedented( - f""" - Library not found: {binary_path}. Maybe you need to compile it from source? - If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`, - for example, `make CUDA_VERSION=113`. + if not HIP_ENVIRONMENT: + print_dedented( + f""" + Library not found: {binary_path}. Maybe you need to compile it from source? + If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`, + for example, `make CUDA_VERSION=113`. + + The CUDA version for the compile might depend on your conda install, if using conda. + Inspect CUDA version via `conda list | grep cuda`. + """, + ) + else: + print_dedented( + f""" + Library not found: {binary_path}. + Maybe you need to compile it from source? If you compiled from source, check that ROCM_VERSION + in PyTorch Settings matches your ROCM install. If not, reinstall PyTorch for your ROCm version + and rebuild bitsandbytes. + """, + ) - The CUDA version for the compile might depend on your conda install, if using conda. - Inspect CUDA version via `conda list | grep cuda`. - """, - ) cuda_major, cuda_minor = cuda_specs.cuda_version_tuple - if cuda_major < 11: - print_dedented( - """ - WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8(). - You will be only to use 8-bit optimizers and quantization routines! - """, - ) + if not HIP_ENVIRONMENT: + if cuda_major < 11: + print_dedented( + """ + WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8(). + You will be only to use 8-bit optimizers and quantization routines! + """, + ) + + print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}") + else: + if (cuda_major, cuda_minor) < (6, 1): + print_dedented( + """ + WARNING: bitandbytes is fully supported only from ROCm 6.1. + """, + ) - print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}") # 7.5 is the minimum CC for cublaslt - if not cuda_specs.has_cublaslt: + if not cuda_specs.has_cublaslt and not HIP_ENVIRONMENT: print_dedented( """ WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU! @@ -152,25 +182,41 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: def print_cuda_runtime_diagnostics() -> None: cudart_paths = list(find_cudart_libraries()) if not cudart_paths: - print("CUDA SETUP: WARNING! CUDA runtime files not found in any environmental path.") + print(f"{BNB_BACKEND} SETUP: WARNING! {BNB_BACKEND} runtime files not found in any environmental path.") elif len(cudart_paths) > 1: + backend_version = torch.version.cuda if not HIP_ENVIRONMENT else torch.version.hip print_dedented( f""" - Found duplicate CUDA runtime files (see below). + Found duplicate {BNB_BACKEND} runtime files (see below). - We select the PyTorch default CUDA runtime, which is {torch.version.cuda}, - but this might mismatch with the CUDA version that is needed for bitsandbytes. - To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. - - For example, if you want to use the CUDA version 122, - BNB_CUDA_VERSION=122 python ... - - OR set the environmental variable in your .bashrc: - export BNB_CUDA_VERSION=122 - - In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g. - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2, + We select the PyTorch default {BNB_BACKEND} runtime, which is {backend_version}, + but this might mismatch with the {BNB_BACKEND} version that is needed for bitsandbytes. """, ) + if not HIP_ENVIRONMENT: + print_dedented( + f""" + To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. + + For example, if you want to use the CUDA version 122, + BNB_CUDA_VERSION=122 python ... + + OR set the environmental variable in your .bashrc: + export BNB_CUDA_VERSION=122 + + In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g. + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2, + """, + ) + else: + print_dedented( + f""" + To resolve it, install PyTorch built for the ROCm version you want to use + + and set LD_LIBRARY_PATH to your ROCm install path, e.g. + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/rocm-6.1.2, + """, + ) + for pth in cudart_paths: - print(f"* Found CUDA runtime at: {pth}") + print(f"* Found {BNB_BACKEND} runtime at: {pth}") diff --git a/bitsandbytes/diagnostics/main.py b/bitsandbytes/diagnostics/main.py index 1ce096f69..ff4d2fd2a 100644 --- a/bitsandbytes/diagnostics/main.py +++ b/bitsandbytes/diagnostics/main.py @@ -11,17 +11,19 @@ ) from bitsandbytes.diagnostics.utils import print_dedented, print_header +from bitsandbytes.cextension import HIP_ENVIRONMENT, BNB_BACKEND def sanity_check(): from bitsandbytes.cextension import lib if lib is None: + compute_backend = "cuda" if not HIP_ENVIRONMENT else "hip" print_dedented( - """ + f""" Couldn't load the bitsandbytes library, likely due to missing binaries. Please ensure bitsandbytes is properly installed. - For source installations, compile the binaries with `cmake -DCOMPUTE_BACKEND=cuda -S .`. + For source installations, compile the binaries with `cmake -DCOMPUTE_BACKEND={compute_backend} -S .`. See the documentation for more details if needed. Trying a simple check anyway, but this will likely fail... @@ -49,19 +51,24 @@ def main(): print_header("OTHER") cuda_specs = get_cuda_specs() - print("CUDA specs:", cuda_specs) + if HIP_ENVIRONMENT: + rocm_specs = f" rocm_version_string=\'{cuda_specs.cuda_version_string}\'," + rocm_specs+= f" rocm_version_tuple={cuda_specs.cuda_version_tuple}" + print(f"{BNB_BACKEND} specs:{rocm_specs}") + else: + print(f"{BNB_BACKEND} specs:{cuda_specs}") if not torch.cuda.is_available(): - print("Torch says CUDA is not available. Possible reasons:") - print("1. CUDA driver not installed") - print("2. CUDA not installed") - print("3. You have multiple conflicting CUDA libraries") + print(f"Torch says {BNB_BACKEND} is not available. Possible reasons:") + print(f"1. {BNB_BACKEND} driver not installed") + print(f"2. {BNB_BACKEND} not installed") + print(f"3. You have multiple conflicting {BNB_BACKEND} libraries") if cuda_specs: print_cuda_diagnostics(cuda_specs) print_cuda_runtime_diagnostics() print_header("") print_header("DEBUG INFO END") print_header("") - print("Checking that the library is importable and CUDA is callable...") + print(f"Checking that the library is importable and {BNB_BACKEND} is callable...") try: sanity_check() print("SUCCESS!") From 70c3d6bfd0906eb99d71edc3b8cbf3301c8846d5 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Sun, 25 Aug 2024 01:17:42 +0000 Subject: [PATCH 35/54] Fix tuple error --- bitsandbytes/diagnostics/cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 6679c49cc..c89a10e9c 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -40,7 +40,7 @@ ) if HIP_ENVIRONMENT: - CUDA_RUNTIME_LIB_PATTERNS = ("libamdhip64.so*") + CUDA_RUNTIME_LIB_PATTERNS = ("libamdhip64.so*",) logger = logging.getLogger(__name__) From 7b038e9c9f55d274532449ddd4d9e158bf92eecf Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Sun, 25 Aug 2024 01:44:39 +0000 Subject: [PATCH 36/54] Fix library detection bug for recursive and symlink cases --- bitsandbytes/diagnostics/cuda.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index c89a10e9c..515349787 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -60,8 +60,8 @@ def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path except OSError: # Assume an esoteric error trying to poke at the directory pass for lib_pattern in CUDA_RUNTIME_LIB_PATTERNS: - for pth in dir.glob(lib_pattern): - if pth.is_file(): + for pth in dir.rglob(lib_pattern): + if pth.is_file() and not pth.is_symlink(): yield pth except (OSError, PermissionError): pass From 343c9fae1386f921c06f14c44a2c65334958eaa0 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Sun, 25 Aug 2024 01:47:43 +0000 Subject: [PATCH 37/54] fix pre-commit errors --- bitsandbytes/diagnostics/cuda.py | 14 ++++---------- bitsandbytes/diagnostics/main.py | 6 +++--- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 515349787..6c66c6219 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -5,8 +5,7 @@ import torch -from bitsandbytes.cextension import get_cuda_bnb_library_path -from bitsandbytes.cextension import HIP_ENVIRONMENT, BNB_BACKEND +from bitsandbytes.cextension import BNB_BACKEND, HIP_ENVIRONMENT, get_cuda_bnb_library_path from bitsandbytes.consts import NONPYTORCH_DOC_URL from bitsandbytes.cuda_specs import CUDASpecs from bitsandbytes.diagnostics.utils import print_dedented @@ -115,10 +114,7 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.", ) else: - print( - f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}" - ) - + print(f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}") binary_path = get_cuda_bnb_library_path(cuda_specs) if not binary_path.exists(): @@ -143,7 +139,6 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: """, ) - cuda_major, cuda_minor = cuda_specs.cuda_version_tuple if not HIP_ENVIRONMENT: if cuda_major < 11: @@ -163,7 +158,6 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: """, ) - # 7.5 is the minimum CC for cublaslt if not cuda_specs.has_cublaslt and not HIP_ENVIRONMENT: print_dedented( @@ -195,7 +189,7 @@ def print_cuda_runtime_diagnostics() -> None: ) if not HIP_ENVIRONMENT: print_dedented( - f""" + """ To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. For example, if you want to use the CUDA version 122, @@ -210,7 +204,7 @@ def print_cuda_runtime_diagnostics() -> None: ) else: print_dedented( - f""" + """ To resolve it, install PyTorch built for the ROCm version you want to use and set LD_LIBRARY_PATH to your ROCm install path, e.g. diff --git a/bitsandbytes/diagnostics/main.py b/bitsandbytes/diagnostics/main.py index ff4d2fd2a..9165cbeed 100644 --- a/bitsandbytes/diagnostics/main.py +++ b/bitsandbytes/diagnostics/main.py @@ -3,6 +3,7 @@ import torch +from bitsandbytes.cextension import BNB_BACKEND, HIP_ENVIRONMENT from bitsandbytes.consts import PACKAGE_GITHUB_URL from bitsandbytes.cuda_specs import get_cuda_specs from bitsandbytes.diagnostics.cuda import ( @@ -11,7 +12,6 @@ ) from bitsandbytes.diagnostics.utils import print_dedented, print_header -from bitsandbytes.cextension import HIP_ENVIRONMENT, BNB_BACKEND def sanity_check(): from bitsandbytes.cextension import lib @@ -52,8 +52,8 @@ def main(): print_header("OTHER") cuda_specs = get_cuda_specs() if HIP_ENVIRONMENT: - rocm_specs = f" rocm_version_string=\'{cuda_specs.cuda_version_string}\'," - rocm_specs+= f" rocm_version_tuple={cuda_specs.cuda_version_tuple}" + rocm_specs = f" rocm_version_string='{cuda_specs.cuda_version_string}'," + rocm_specs += f" rocm_version_tuple={cuda_specs.cuda_version_tuple}" print(f"{BNB_BACKEND} specs:{rocm_specs}") else: print(f"{BNB_BACKEND} specs:{cuda_specs}") From f2ea1379a02baf91fbff158a560f5705e40734f6 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Wed, 11 Sep 2024 19:10:05 +0000 Subject: [PATCH 38/54] Remove recursive path lib search --- bitsandbytes/diagnostics/cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 6c66c6219..63f129819 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -59,7 +59,7 @@ def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path except OSError: # Assume an esoteric error trying to poke at the directory pass for lib_pattern in CUDA_RUNTIME_LIB_PATTERNS: - for pth in dir.rglob(lib_pattern): + for pth in dir.glob(lib_pattern): if pth.is_file() and not pth.is_symlink(): yield pth except (OSError, PermissionError): From ee6abedf79ef143324e2ceef9b1a7220d6ec5ceb Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Wed, 11 Sep 2024 19:24:55 +0000 Subject: [PATCH 39/54] Create function for runtime lib patterns --- bitsandbytes/diagnostics/cuda.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 63f129819..038235d59 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -32,17 +32,18 @@ "_", # current Python interpreter } -CUDA_RUNTIME_LIB_PATTERNS = ( - "cudart64*.dll", # Windows - "libcudart*.so*", # libcudart.so, libcudart.so.11.0, libcudart.so.12.0, libcudart.so.12.1, libcudart.so.12.2 etc. - "nvcuda*.dll", # Windows -) - -if HIP_ENVIRONMENT: - CUDA_RUNTIME_LIB_PATTERNS = ("libamdhip64.so*",) - logger = logging.getLogger(__name__) +def get_runtime_lib_patterns() -> tuple: + if HIP_ENVIRONMENT: + return ("libamdhip64.so*",) + else: + return ( + "cudart64*.dll", # Windows + "libcudart*.so*", # libcudart.so, libcudart.so.11.0, libcudart.so.12.0, libcudart.so.12.1, libcudart.so.12.2 etc. + "nvcuda*.dll", # Windows + ) + def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path]: for dir_string in paths_list_candidate.split(os.pathsep): @@ -58,7 +59,7 @@ def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path continue except OSError: # Assume an esoteric error trying to poke at the directory pass - for lib_pattern in CUDA_RUNTIME_LIB_PATTERNS: + for lib_pattern in get_runtime_lib_patterns(): for pth in dir.glob(lib_pattern): if pth.is_file() and not pth.is_symlink(): yield pth From 6f9cd268e7bde88ac5f5776c124156ac29409882 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:54:28 -0500 Subject: [PATCH 40/54] Update logger format Co-authored-by: Aarni Koskela --- bitsandbytes/cextension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index c8bb382b6..3d08c9084 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -99,7 +99,7 @@ def get_native_library() -> BNBNativeLibrary: if cuda_binary_path.exists(): binary_path = cuda_binary_path else: - logger.warning(f"Could not find the bitsandbytes {BNB_BACKEND} binary at {cuda_binary_path}") + logger.warning("Could not find the bitsandbytes %s binary at %r", BNB_BACKEND, cuda_binary_path) logger.debug(f"Loading bitsandbytes native library from: {binary_path}") dll = ct.cdll.LoadLibrary(str(binary_path)) From 570137ca2e6ec995ebc8eddebd94f55bed581a64 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:57:20 -0500 Subject: [PATCH 41/54] Update error reporting Co-authored-by: Aarni Koskela --- csrc/ops.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index 5c0688b91..25c42863e 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -576,7 +576,7 @@ template int igemmlt(hipblasLtHandl if (returnedAlgoCount == 0) { has_error = 1; - printf("Error: Matmul Algo Heurisitic didn't return algorithms\n"); + fprintf(stderr, "Error: Matmul Algo Heuristic didn't return algorithms\n"); } else { From 3380df4f17d3c11d17f3bfc5cfb7b8a5c519085b Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:58:02 -0500 Subject: [PATCH 42/54] Remove commented code Co-authored-by: Aarni Koskela --- csrc/ops.hip | 1 - 1 file changed, 1 deletion(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index 25c42863e..70e5fdee7 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -630,7 +630,6 @@ template int igemmlt(hipblasLtHandl } else { - //has_error |= checkHipblasStatus(hipblasLtMatmulDescSetAttribute(matmulDesc, hipblasLt_MATMUL_DESC_POINTER_MODE, &alphaVec, sizeof(alphaVec))); float beta = 0.0f; has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); From 1c5bd4ff8bab2103bc0c6579586fb969c8be13f0 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:58:31 -0500 Subject: [PATCH 43/54] Update error reporting Co-authored-by: Aarni Koskela --- csrc/ops.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index 70e5fdee7..a808d5ecb 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -643,7 +643,7 @@ template int igemmlt(hipblasLtHandl if (Adesc) has_error |= checkHipblasStatus(hipblasLtMatrixLayoutDestroy(Adesc)); if (matmulDesc) has_error |= checkHipblasStatus(hipblasLtMatmulDescDestroy(matmulDesc)); if(has_error == 1) - printf("error detected\n"); + fprintf(stderr, "error detected\n"); return has_error; #endif // NO_HIPBLASLT From f39ff4869a8b9f76963b9017af669aa10000c376 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 12 Sep 2024 19:03:17 +0000 Subject: [PATCH 44/54] Update error reporting --- csrc/ops.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index a808d5ecb..4fdc3cbfa 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -618,7 +618,7 @@ template int igemmlt(hipblasLtHandl if (returnedAlgoCount == 0) { has_error = 1; - printf("Error: Matmul Algo Heurisitic didn't return algorithms\n"); + fprintf(stderr, "Error: Matmul Algo Heuristic didn't return algorithms\n"); } else { From f57addd0fe846bfca081058b5157b8abef172d56 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 12 Sep 2024 19:08:06 +0000 Subject: [PATCH 45/54] Create hip diagnostics functions --- bitsandbytes/cextension.py | 2 +- bitsandbytes/diagnostics/cuda.py | 177 +++++++++++++++++-------------- bitsandbytes/diagnostics/main.py | 8 +- 3 files changed, 105 insertions(+), 82 deletions(-) diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index 3d08c9084..e322693b5 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -120,7 +120,7 @@ def get_native_library() -> BNBNativeLibrary: hip_major, hip_minor = map(int, torch.version.hip.split(".")[0:2]) HIP_ENVIRONMENT, BNB_HIP_VERSION = True, hip_major * 100 + hip_minor BNB_HIP_VERSION_SHORT = f"{hip_major}{hip_minor}" - BNB_BACKEND = "ROCM" + BNB_BACKEND = "ROCm" else: HIP_ENVIRONMENT, BNB_HIP_VERSION = False, 0 BNB_HIP_VERSION_SHORT = "" diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 038235d59..a6edc3814 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -108,59 +108,38 @@ def find_cudart_libraries() -> Iterator[Path]: yield from find_cuda_libraries_in_path_list(value) -def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: - if not HIP_ENVIRONMENT: - print( - f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, " - f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.", - ) - else: - print(f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}") +def _print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: + print( + f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, " + f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.", + ) binary_path = get_cuda_bnb_library_path(cuda_specs) if not binary_path.exists(): - if not HIP_ENVIRONMENT: - print_dedented( - f""" - Library not found: {binary_path}. Maybe you need to compile it from source? - If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`, - for example, `make CUDA_VERSION=113`. - - The CUDA version for the compile might depend on your conda install, if using conda. - Inspect CUDA version via `conda list | grep cuda`. - """, - ) - else: - print_dedented( - f""" - Library not found: {binary_path}. - Maybe you need to compile it from source? If you compiled from source, check that ROCM_VERSION - in PyTorch Settings matches your ROCM install. If not, reinstall PyTorch for your ROCm version - and rebuild bitsandbytes. - """, - ) + print_dedented( + f""" + Library not found: {binary_path}. Maybe you need to compile it from source? + If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`, + for example, `make CUDA_VERSION=113`. + + The CUDA version for the compile might depend on your conda install, if using conda. + Inspect CUDA version via `conda list | grep cuda`. + """, + ) cuda_major, cuda_minor = cuda_specs.cuda_version_tuple - if not HIP_ENVIRONMENT: - if cuda_major < 11: - print_dedented( - """ - WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8(). - You will be only to use 8-bit optimizers and quantization routines! - """, - ) - - print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}") - else: - if (cuda_major, cuda_minor) < (6, 1): - print_dedented( - """ - WARNING: bitandbytes is fully supported only from ROCm 6.1. - """, - ) + if cuda_major < 11: + print_dedented( + """ + WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8(). + You will be only to use 8-bit optimizers and quantization routines! + """, + ) + + print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}") # 7.5 is the minimum CC for cublaslt - if not cuda_specs.has_cublaslt and not HIP_ENVIRONMENT: + if not cuda_specs.has_cublaslt: print_dedented( """ WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU! @@ -174,44 +153,88 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: # (2) Multiple CUDA versions installed -def print_cuda_runtime_diagnostics() -> None: +def _print_hip_diagnostics(cuda_specs: CUDASpecs) -> None: + print(f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}") + + binary_path = get_cuda_bnb_library_path(cuda_specs) + if not binary_path.exists(): + print_dedented( + f""" + Library not found: {binary_path}. + Maybe you need to compile it from source? If you compiled from source, check that ROCM_VERSION + in PyTorch Settings matches your ROCm install. If not, reinstall PyTorch for your ROCm version + and rebuild bitsandbytes. + """, + ) + + hip_major, hip_minor = cuda_specs.cuda_version_tuple + if (hip_major, hip_minor) < (6, 1): + print_dedented( + """ + WARNING: bitsandbytes is fully supported only from ROCm 6.1. + """, + ) + + +def print_diagnostics(cuda_specs: CUDASpecs)) -> None: + if HIP_ENVIRONMENT: + _print_hip_diagnostics(cuda_specs) + else: + _print_cuda_diagnostics(cuda_specs) + + +def _print_cuda_runtime_diagnostics() -> None: + cudart_paths = list(find_cudart_libraries()) + if not cudart_paths: + print("WARNING! CUDA runtime files not found in any environmental path.") + elif len(cudart_paths) > 1: + print_dedented( + f""" + Found duplicate CUDA runtime files (see below). + + We select the PyTorch default CUDA runtime, which is {torch.version.cuda}, + but this might mismatch with the CUDA version that is needed for bitsandbytes. + To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. + + For example, if you want to use the CUDA version 122, + BNB_CUDA_VERSION=122 python ... + + OR set the environmental variable in your .bashrc: + export BNB_CUDA_VERSION=122 + + In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g. + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2, + """, + ) + for pth in cudart_paths: + print(f"* Found CUDA runtime at: {pth}") + + +def _print_hip_runtime_diagnostics() -> None: cudart_paths = list(find_cudart_libraries()) if not cudart_paths: - print(f"{BNB_BACKEND} SETUP: WARNING! {BNB_BACKEND} runtime files not found in any environmental path.") + print(f"WARNING! ROCm runtime files not found in any environmental path.") elif len(cudart_paths) > 1: - backend_version = torch.version.cuda if not HIP_ENVIRONMENT else torch.version.hip print_dedented( f""" - Found duplicate {BNB_BACKEND} runtime files (see below). + Found duplicate ROCm runtime files (see below). - We select the PyTorch default {BNB_BACKEND} runtime, which is {backend_version}, - but this might mismatch with the {BNB_BACKEND} version that is needed for bitsandbytes. + We select the PyTorch default ROCm runtime, which is {torch.version.hip}, + but this might mismatch with the ROCm version that is needed for bitsandbytes. + + To resolve it, install PyTorch built for the ROCm version you want to use + + and set LD_LIBRARY_PATH to your ROCm install path, e.g. + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm-6.1.2/lib, """, ) - if not HIP_ENVIRONMENT: - print_dedented( - """ - To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. - - For example, if you want to use the CUDA version 122, - BNB_CUDA_VERSION=122 python ... - - OR set the environmental variable in your .bashrc: - export BNB_CUDA_VERSION=122 - - In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g. - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2, - """, - ) - else: - print_dedented( - """ - To resolve it, install PyTorch built for the ROCm version you want to use - - and set LD_LIBRARY_PATH to your ROCm install path, e.g. - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/rocm-6.1.2, - """, - ) for pth in cudart_paths: - print(f"* Found {BNB_BACKEND} runtime at: {pth}") + print(f"* Found ROCm runtime at: {pth}") + + +def print_runtime_diagnostics() -> None: + if HIP_ENVIRONMENT: + _print_hip_runtime_diagnostics() + else: + _print_cuda_runtime_diagnostics() diff --git a/bitsandbytes/diagnostics/main.py b/bitsandbytes/diagnostics/main.py index 9165cbeed..8dc43ed2a 100644 --- a/bitsandbytes/diagnostics/main.py +++ b/bitsandbytes/diagnostics/main.py @@ -7,8 +7,8 @@ from bitsandbytes.consts import PACKAGE_GITHUB_URL from bitsandbytes.cuda_specs import get_cuda_specs from bitsandbytes.diagnostics.cuda import ( - print_cuda_diagnostics, - print_cuda_runtime_diagnostics, + print_diagnostics, + print_runtime_diagnostics, ) from bitsandbytes.diagnostics.utils import print_dedented, print_header @@ -63,8 +63,8 @@ def main(): print(f"2. {BNB_BACKEND} not installed") print(f"3. You have multiple conflicting {BNB_BACKEND} libraries") if cuda_specs: - print_cuda_diagnostics(cuda_specs) - print_cuda_runtime_diagnostics() + print_diagnostics(cuda_specs) + print_runtime_diagnostics() print_header("") print_header("DEBUG INFO END") print_header("") From 251a0e87690cb6bd0a9df16b3b6a0392d4fd7f0d Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 12 Sep 2024 20:03:05 +0000 Subject: [PATCH 46/54] Fix Typo --- bitsandbytes/diagnostics/cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index a6edc3814..91b70f3b1 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -176,7 +176,7 @@ def _print_hip_diagnostics(cuda_specs: CUDASpecs) -> None: ) -def print_diagnostics(cuda_specs: CUDASpecs)) -> None: +def print_diagnostics(cuda_specs: CUDASpecs) -> None: if HIP_ENVIRONMENT: _print_hip_diagnostics(cuda_specs) else: From 260a3ac8da54a4c3519172f0194152db22116829 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 12 Sep 2024 22:01:49 +0000 Subject: [PATCH 47/54] Fix pre-commit checks --- bitsandbytes/diagnostics/cuda.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 91b70f3b1..014b753a9 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -5,7 +5,7 @@ import torch -from bitsandbytes.cextension import BNB_BACKEND, HIP_ENVIRONMENT, get_cuda_bnb_library_path +from bitsandbytes.cextension import HIP_ENVIRONMENT, get_cuda_bnb_library_path from bitsandbytes.consts import NONPYTORCH_DOC_URL from bitsandbytes.cuda_specs import CUDASpecs from bitsandbytes.diagnostics.utils import print_dedented @@ -34,15 +34,16 @@ logger = logging.getLogger(__name__) + def get_runtime_lib_patterns() -> tuple: if HIP_ENVIRONMENT: return ("libamdhip64.so*",) else: return ( - "cudart64*.dll", # Windows - "libcudart*.so*", # libcudart.so, libcudart.so.11.0, libcudart.so.12.0, libcudart.so.12.1, libcudart.so.12.2 etc. - "nvcuda*.dll", # Windows - ) + "cudart64*.dll", # Windows + "libcudart*.so*", # libcudart.so, libcudart.so.11.0, libcudart.so.12.0, libcudart.so.12.1, libcudart.so.12.2 etc. + "nvcuda*.dll", # Windows + ) def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path]: @@ -213,7 +214,7 @@ def _print_cuda_runtime_diagnostics() -> None: def _print_hip_runtime_diagnostics() -> None: cudart_paths = list(find_cudart_libraries()) if not cudart_paths: - print(f"WARNING! ROCm runtime files not found in any environmental path.") + print("WARNING! ROCm runtime files not found in any environmental path.") elif len(cudart_paths) > 1: print_dedented( f""" From 7e787da31be97b0128795e2ff916857d1344375d Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 19 Sep 2024 15:25:49 +0000 Subject: [PATCH 48/54] Enable 6.2 build --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index d2da82501..21c4c1895 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -107,7 +107,7 @@ jobs: os: [ubuntu-latest] arch: [x86_64] rocm_version: - ["6.1.2"] + ["6.1.2", "6.2"] runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents steps: - uses: actions/checkout@v4 From ef9e03c07281d4075242c263afcbcfb4facc0c84 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 19 Sep 2024 16:32:23 +0000 Subject: [PATCH 49/54] Skip gemv 4 bit cpu test --- tests/test_functional.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_functional.py b/tests/test_functional.py index a9d926b89..35187db78 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -2303,6 +2303,7 @@ def test_gemv_4bit(dtype, storage_type, quant_storage, double_quant, kind): assert maxratio < 1.02 and maxratio > 0.98 +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("kind", ["fc1", "fc2", "attn", "attn_packed"]) @pytest.mark.parametrize("quant_type", ["nf4", "fp4"]) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=describe_dtype) From e00f4ea1bd07d073f3c75c87487f362f374d91b3 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 20 Sep 2024 20:39:20 +0000 Subject: [PATCH 50/54] Update documentation for 6.2.0 pip install --- docs/source/installation.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 60419b38a..146fb0ddd 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -167,7 +167,7 @@ apt-get update && apt-get install -y git && cd home pip install torch --index-url https://download.pytorch.org/whl/rocm6.1/ # Install bitsandbytes from PyPI -# (This is supported on Ubuntu 22.04, Python 3.10, ROCm 6.1.0/6.1.1/6.1.2 and gpu arch - gfx90a, gfx942, gfx1100 +# (This is supported on Ubuntu 22.04, Python 3.10, ROCm 6.1.0/6.1.1/6.1.2/6.2.0 and gpu arch - gfx90a, gfx942, gfx1100 # Please install from source if your configuration doesn't match with these) pip install bitsandbytes From 9035efa24099b16beb5a147539d2e1059bb39f39 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Fri, 27 Sep 2024 18:37:29 +0000 Subject: [PATCH 51/54] Update README for default branch change --- README.md | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 7823168ac..de61acb2b 100644 --- a/README.md +++ b/README.md @@ -6,21 +6,26 @@ The `bitsandbytes` library is a lightweight Python wrapper around CUDA custom fu The library includes quantization primitives for 8-bit & 4-bit operations, through `bitsandbytes.nn.Linear8bitLt` and `bitsandbytes.nn.Linear4bit` and 8-bit optimizers through `bitsandbytes.optim` module. -There are ongoing efforts to support further hardware backends, i.e. Intel CPU + GPU, AMD GPU, Apple Silicon. Windows support is quite far along and is on its way as well. +This fork is actively developed for ROCm and updates are being pushed into `multi-backend-refactor` branch of upstream bitsandbytes. Users can use either of these to run bitsandbytes on AMD GPUs. -**Please head to the official documentation page:** +**Note: The default branch of this fork is switched from `rocm_enabled` to `rocm_enabled_multi_backend`. This is synced with `multi-backend-refactor` branch of upstream, and latest developements are pushed here until upstream branch is merged into `main`.** -**[https://huggingface.co/docs/bitsandbytes/main](https://huggingface.co/docs/bitsandbytes/main)** - -## ALPHA TESTERS WANTED: `multi-backend-refactor` AMD GPU + Intel CPU/GPU specific BNB backend implementations +**Installation for ROCm:** -We're in the process of a complex refactor in order to allow the support of additional hardware backends, other than CUDA, in BNB. The efforts around this are already quite far along and there's plenty of functionality already in place that is in need for users to take a hands-on approach! Mac support will likely soon also see progress. However, I recommend waiting 2 weeks until the device abstraction has further consolidated (**breaking changes upcoming**). +For latest develop version: +```bash +git clone --recurse https://github.com/ROCm/bitsandbytes +cd bitsandbytes +git checkout rocm_enabled_multi_backend +pip install -r requirements-dev.txt +cmake -DCOMPUTE_BACKEND=hip -S . #Use -DBNB_ROCM_ARCH="gfx90a;gfx942" to target specific gpu arch +make +pip install . +``` -Currently, you still need to compile from source, after checking out the `multi-backend-refactor` branch (instructions WIP, but [the current docs on the compilation from source](https://huggingface.co/docs/bitsandbytes/main/en/installation#compile-from-source) are a good starting point; [feel free to share tips / input in this Github discussion](https://github.com/TimDettmers/bitsandbytes/discussions/1219). We'll soon enable nightly releases to make this much easier for you! +**For more details, please head to the official documentation page:** -Please give feedback to us in [this dedicated Github Discussion space](https://github.com/TimDettmers/bitsandbytes/discussions/categories/catch-all-alpha-testing-the-multi-backend-refactor)! - -We're super excited about these recent developments and grateful for any constructive input or support that you can give to help us make this a reality. BNB is a community project and we're excited for your collaboration 🤗 +**[https://huggingface.co/docs/bitsandbytes/main](https://huggingface.co/docs/bitsandbytes/main)** ## License From b5614d0a1b99010ecb758ed648b7cb5950649d75 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:48:33 -0500 Subject: [PATCH 52/54] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index de61acb2b..5cfe0b71d 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ The library includes quantization primitives for 8-bit & 4-bit operations, throu This fork is actively developed for ROCm and updates are being pushed into `multi-backend-refactor` branch of upstream bitsandbytes. Users can use either of these to run bitsandbytes on AMD GPUs. -**Note: The default branch of this fork is switched from `rocm_enabled` to `rocm_enabled_multi_backend`. This is synced with `multi-backend-refactor` branch of upstream, and latest developements are pushed here until upstream branch is merged into `main`.** +**Note: The default branch of this fork is switched from `rocm_enabled` to `rocm_enabled_multi_backend`. This is synced periodically with `multi-backend-refactor` branch of upstream, and latest developments are pushed here until upstream branch is merged into `main`.** **Installation for ROCm:** From 592d67031c21c16b04ed6379bbfedfe8157a1f29 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Wed, 16 Oct 2024 19:36:10 +0000 Subject: [PATCH 53/54] Sync README with upstream --- README.md | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 5cfe0b71d..7823168ac 100644 --- a/README.md +++ b/README.md @@ -6,26 +6,21 @@ The `bitsandbytes` library is a lightweight Python wrapper around CUDA custom fu The library includes quantization primitives for 8-bit & 4-bit operations, through `bitsandbytes.nn.Linear8bitLt` and `bitsandbytes.nn.Linear4bit` and 8-bit optimizers through `bitsandbytes.optim` module. -This fork is actively developed for ROCm and updates are being pushed into `multi-backend-refactor` branch of upstream bitsandbytes. Users can use either of these to run bitsandbytes on AMD GPUs. +There are ongoing efforts to support further hardware backends, i.e. Intel CPU + GPU, AMD GPU, Apple Silicon. Windows support is quite far along and is on its way as well. -**Note: The default branch of this fork is switched from `rocm_enabled` to `rocm_enabled_multi_backend`. This is synced periodically with `multi-backend-refactor` branch of upstream, and latest developments are pushed here until upstream branch is merged into `main`.** +**Please head to the official documentation page:** -**Installation for ROCm:** +**[https://huggingface.co/docs/bitsandbytes/main](https://huggingface.co/docs/bitsandbytes/main)** -For latest develop version: -```bash -git clone --recurse https://github.com/ROCm/bitsandbytes -cd bitsandbytes -git checkout rocm_enabled_multi_backend -pip install -r requirements-dev.txt -cmake -DCOMPUTE_BACKEND=hip -S . #Use -DBNB_ROCM_ARCH="gfx90a;gfx942" to target specific gpu arch -make -pip install . -``` +## ALPHA TESTERS WANTED: `multi-backend-refactor` AMD GPU + Intel CPU/GPU specific BNB backend implementations -**For more details, please head to the official documentation page:** +We're in the process of a complex refactor in order to allow the support of additional hardware backends, other than CUDA, in BNB. The efforts around this are already quite far along and there's plenty of functionality already in place that is in need for users to take a hands-on approach! Mac support will likely soon also see progress. However, I recommend waiting 2 weeks until the device abstraction has further consolidated (**breaking changes upcoming**). -**[https://huggingface.co/docs/bitsandbytes/main](https://huggingface.co/docs/bitsandbytes/main)** +Currently, you still need to compile from source, after checking out the `multi-backend-refactor` branch (instructions WIP, but [the current docs on the compilation from source](https://huggingface.co/docs/bitsandbytes/main/en/installation#compile-from-source) are a good starting point; [feel free to share tips / input in this Github discussion](https://github.com/TimDettmers/bitsandbytes/discussions/1219). We'll soon enable nightly releases to make this much easier for you! + +Please give feedback to us in [this dedicated Github Discussion space](https://github.com/TimDettmers/bitsandbytes/discussions/categories/catch-all-alpha-testing-the-multi-backend-refactor)! + +We're super excited about these recent developments and grateful for any constructive input or support that you can give to help us make this a reality. BNB is a community project and we're excited for your collaboration 🤗 ## License From 233bb7a3cfa294b1402075ced282f211bc35b2ed Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Wed, 16 Oct 2024 19:58:42 +0000 Subject: [PATCH 54/54] Remove depth --- docs/source/installation.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 609865436..d1acb2cd6 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -308,7 +308,7 @@ bitsandbytes is fully supported from ROCm 6.1 onwards (currently in alpha releas ```bash # Install bitsandbytes from source # Clone bitsandbytes repo, ROCm backend is currently enabled on multi-backend-refactor branch -git clone --depth 1 -b multi-backend-refactor https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/ +git clone -b multi-backend-refactor https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/ # Install dependencies pip install -r requirements-dev.txt