ROCm · TNTran92 · Apr 6, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,118 @@
+# This CMake config hopefully makes it easier to compile.
+# Ensure the CUDA Toolkit is available on your path. Then run:
+#   For  GCC: `cmake -B build . && cmake --build build`
+#   For MSVC: `cmake -B build . && cmake --build build --config Release`
+# You can also use the following options and variables
+#  - COMPUTE_BACKEND: Set to `cpu`, `cuda`, or `mps` to select the backend
+#  - NO_CUBLASLT: Default OFF, will skip building/linking CUBLASLT support
+#  - CUDA_VERSION: The expected CUDA version, for sanity checking. The actual version
+#                  is whatever CMake finds on your path.
+#  - COMPUTE_CAPABILITY: Which GPU Arch/Compute codes to provide to NVCC.
+#                        Separate by semicolons, i.e. `-DCOMPUTE_CAPABILITY=89;90`
+#                        Check your compute capability here: https://developer.nvidia.com/cuda-gpus
+#  - PTXAS_VERBOSE: Pass the `-v` option to the PTX Assembler
+cmake_minimum_required(VERSION 3.22.1)
+
+project(bitsandbytes LANGUAGES CXX)
+
+# If run without specifying a build type, default to using the Release configuration:
+#    optimizing the generated binaries for performance and also adds the `-DNDEBUG` flag,
+#    which turns off a bunch of asserts which seem to link to new symbols in libstdc++,
+#    worsening our many_linux compliance..
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE Release)
+endif()
+
+# Define included source files
+set(CPP_FILES csrc/common.cpp csrc/cpu_ops.cpp csrc/pythonInterface.c)
+set(CUDA_FILES csrc/ops.hip.cpp csrc/kernels.hip.cpp)
+# C++ sources are always included
+list(APPEND SRC_FILES ${CPP_FILES})
+
+set(COMPUTE_BACKEND "hip" CACHE STRING "The compute backend to use (cpu, hip)")
+set_property(CACHE COMPUTE_BACKEND PROPERTY STRINGS cpu cuda mps hip)
+option(PTXAS_VERBOSE "Pass through -v flag to PTX Assembler" OFF)
+
+if(NOT DEFINED HIP_PATH)
+    if(NOT DEFINED ENV{HIP_PATH})
+        set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
+    else()
+        set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
+    endif()
+endif()
+message("HIP_PATH: " ${HIP_PATH})
+set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
+find_package(HIP REQUIRED)
+if (HIP_FOUND)
+  message(STATUS "Found HIP: " ${HIP_VERSION})
+else()
+  message(FATAL_ERROR "Could not find HIP")
+endif()
+find_package(rocthrust REQUIRED)
+find_package(hipblas REQUIRED)
+find_package(hipsparse REQUIRED)
+find_package(rocrand REQUIRED)
+find_package(hipblaslt REQUIRED)
+# Search for rocm in common locations
+list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm /opt/rocm)
+list(APPEND HIP_PATH /opt/rocm/llvm/bin/)
+# Find HIP.
+# The user may override AMDGPU_TARGETS defined in the HIP config file
+# to select the AMDGPU archs to compile for.
+# ex. set(AMDGPU_TARGETS "gfx803;gfx900;gfx906")
+# Find OpenMP.
+#find_package(OpenMP REQUIRED)
+# Set compiler and linker.
+if(NOT WIN32)
+  set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE})
+  set(CMAKE_CXX_LINKER   ${HIP_HIPCC_EXECUTABLE})
+  set(CMAKE_CXXFLAGS -D__HIP_PLATFORM_AMD__)
+  set(CMAKE_CFLAGS -D__HIP_PLATFORM_AMD__)
+endif()
+message("Current CMAKE_CXX_COMPILER (should show hipcc): " ${CMAKE_CXX_COMPILER})
+message("Current CMAKE_CXX_LINKER (should show hipcc): " ${CMAKE_CXX_LINKER})
+
+set(BNB_OUTPUT_NAME "bitsandbytes")
+
+message(STATUS "Configuring ${PROJECT_NAME} (Backend: ${COMPUTE_BACKEND})")
+
+if(${COMPUTE_BACKEND} STREQUAL "hip")
+    set(BUILD_HIP on)
+    set(BUILD_CUDA OFF)
+    set(BUILD_MPS OFF)
+    set(NO_CUBLASLT ON)
+else()
+    set(BUILD_CUDA OFF)
+    set(BUILD_MPS OFF)
+endif()
+
+
+if(BUILD_HIP)
+    list(APPEND SRC_FILES ${CUDA_FILES})
+    # real name
+    string(APPEND BNB_OUTPUT_NAME "_hip_nohipblaslt")
+    add_compile_definitions(BUILD_HIP)
+else()
+    string(APPEND BNB_OUTPUT_NAME "_cpu")
+    set(GPU_SOURCES)
+endif()
+
+if (BUILD_HIP)
+    set_source_files_properties(${CPP_FILES} PROPERTIES LANGUAGE CXX)
+    message("Working on: " ${CPP_FILES})
+    add_library(bitsandbytes SHARED ${SRC_FILES})
+    target_include_directories(bitsandbytes PRIVATE ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/include /opt/rocm/include/rocwmma)
+    target_compile_features(bitsandbytes PUBLIC cxx_std_14)
+    target_compile_definitions(bitsandbytes PUBLIC NO_CUBLASLT)
+    target_include_directories(bitsandbytes PUBLIC csrc include)
+    target_link_libraries(bitsandbytes PUBLIC hip::device roc::rocthrust roc::hipblas roc::hipsparse roc::rocrand roc::rocprim roc::hipblaslt ) 
+else()
+    set_source_files_properties(${CPP_FILES} PROPERTIES LANGUAGE CXX)
+    add_library(bitsandbytes SHARED ${SRC_FILES})
+    target_compile_features(bitsandbytes PUBLIC cxx_std_14)
+    target_include_directories(bitsandbytes PUBLIC csrc include)
+    target_link_libraries(bitsandbytes PUBLIC hip::device)
+endif()
+
+set_target_properties(bitsandbytes PROPERTIES OUTPUT_NAME ${BNB_OUTPUT_NAME})
+set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/bitsandbytes")
diff --git a/bitsandbytes/__init__.py b/bitsandbytes/__init__.py
@@ -3,14 +3,14 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from . import cuda_setup, utils, research
+from . import cuda_setup, research, utils
 from .autograd._functions import (
     MatmulLtState,
     bmm_cublas,
     matmul,
+    matmul_4bit,
     matmul_cublas,
     mm_cublas,
-    matmul_4bit
 )
 from .cextension import COMPILED_WITH_CUDA
 from .nn import modules
@@ -24,6 +24,6 @@
     "optim.optimizer.MockArgs": False,
 }
 
-__version__ = "0.42.0"
+__version__ = "0.43.0"
 
 PACKAGE_GITHUB_URL = "https://github.com/TimDettmers/bitsandbytes"
diff --git a/bitsandbytes/__main__.py b/bitsandbytes/__main__.py
@@ -1,44 +1,16 @@
+import glob
 import os
 import sys
-import shlex
-import subprocess
-
 from warnings import warn
-from typing import Tuple
-from os.path import isdir
 
 import torch
 
 HEADER_WIDTH = 60
 
-def execute_and_return(command_string: str) -> Tuple[str, str]:
-    def _decode(subprocess_err_out_tuple):
-        return tuple(
-            to_decode.decode("UTF-8").strip()
-            for to_decode in subprocess_err_out_tuple
-        )
-
-    def execute_and_return_decoded_std_streams(command_string):
-        return _decode(
-            subprocess.Popen(
-                shlex.split(command_string),
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-            ).communicate()
-        )
-
-    std_out, std_err = execute_and_return_decoded_std_streams(command_string)
-    return std_out, std_err
-
-def find_file_recursive(folder, filename):
-    folder = shlex.quote(folder)
-    filename = shlex.quote(filename)
-    cmd = f'find {folder} -name {filename}'
-    out, err = execute_and_return(cmd)
-    if len(err) > 0:
-        raise RuntimeError('Something when wrong when trying to find file. Maybe you do not have a linux system?')
 
-    return out
+def find_dynamic_library(folder, filename):
+    for ext in ("so", "dll", "dylib"):
+        yield from glob.glob(os.path.join(folder, "**", filename + ext))
 
 
 def generate_bug_report_information():
@@ -47,38 +19,25 @@ def generate_bug_report_information():
     print_header("")
     print('')
 
-    if 'CONDA_PREFIX' in os.environ:
-        paths = find_file_recursive(os.environ['CONDA_PREFIX'], '*cuda*so')
-        print_header("ANACONDA CUDA PATHS")
-        print(paths)
-        print('')
-    if isdir('/usr/local/'):
-        paths = find_file_recursive('/usr/local', '*cuda*so')
-        print_header("/usr/local CUDA PATHS")
-        print(paths)
-        print('')
-
-    if isdir(os.getcwd()):
-        paths = find_file_recursive(os.getcwd(), '*cuda*so')
-        print_header("WORKING DIRECTORY CUDA PATHS")
-        print(paths)
-        print('')
-
-    print_header("LD_LIBRARY CUDA PATHS")
-    if 'LD_LIBRARY_PATH' in os.environ:
-        lib_path = os.environ['LD_LIBRARY_PATH'].strip()
-        for path in set(lib_path.split(':')):
-            try:
-                if isdir(path):
-                    print_header(f"{path} CUDA PATHS")
-                    paths = find_file_recursive(path, '*cuda*so')
-                    print(paths)
-            except:
-                print(f'Could not read LD_LIBRARY_PATH: {path}')
-    print('')
-
-
-
+    path_sources = [
+        ("ANACONDA CUDA PATHS", os.environ.get("CONDA_PREFIX")),
+        ("/usr/local CUDA PATHS", "/usr/local"),
+        ("CUDA PATHS", os.environ.get("CUDA_PATH")),
+        ("WORKING DIRECTORY CUDA PATHS", os.getcwd()),
+    ]
+    try:
+        ld_library_path = os.environ.get("LD_LIBRARY_PATH")
+        if ld_library_path:
+            for path in set(ld_library_path.strip().split(os.pathsep)):
+                path_sources.append((f"LD_LIBRARY_PATH {path} CUDA PATHS", path))
+    except Exception as e:
+        print(f"Could not parse LD_LIBRARY_PATH: {e}")
+
+    for name, path in path_sources:
+        if path and os.path.isdir(path):
+            print_header(name)
+            print(list(find_dynamic_library(path, '*cuda*')))
+            print("")
 
 
 def print_header(
@@ -89,67 +48,61 @@ def print_header(
 
 
 def print_debug_info() -> None:
+    from . import PACKAGE_GITHUB_URL
     print(
         "\nAbove we output some debug information. Please provide this info when "
         f"creating an issue via {PACKAGE_GITHUB_URL}/issues/new/choose ...\n"
     )
 
 
-generate_bug_report_information()
+def main():
+    generate_bug_report_information()
 
+    from . import COMPILED_WITH_CUDA
+    from .cuda_setup.main import get_compute_capabilities
 
-from . import COMPILED_WITH_CUDA, PACKAGE_GITHUB_URL
-from .cuda_setup.env_vars import to_be_ignored
-from .cuda_setup.main import get_compute_capabilities
-
+    print_header("OTHER")
+    print(f"COMPILED_WITH_CUDA = {COMPILED_WITH_CUDA}")
+    print(f"COMPUTE_CAPABILITIES_PER_GPU = {get_compute_capabilities()}")
+    print_header("")
+    print_header("DEBUG INFO END")
+    print_header("")
+    print("Checking that the library is importable and CUDA is callable...")
+    print("\nWARNING: Please be sure to sanitize sensitive info from any such env vars!\n")
 
-print_header("OTHER")
-print(f"COMPILED_WITH_CUDA = {COMPILED_WITH_CUDA}")
-print(f"COMPUTE_CAPABILITIES_PER_GPU = {get_compute_capabilities()}")
-print_header("")
-print_header("DEBUG INFO END")
-print_header("")
-print(
-    """
-Running a quick check that:
-    + library is importable
-    + CUDA function is callable
-"""
-)
-print("\nWARNING: Please be sure to sanitize sensible info from any such env vars!\n")
+    try:
+        from bitsandbytes.optim import Adam
 
-try:
-    from bitsandbytes.optim import Adam
+        p = torch.nn.Parameter(torch.rand(10, 10).cuda())
+        a = torch.rand(10, 10).cuda()
 
-    p = torch.nn.Parameter(torch.rand(10, 10).cuda())
-    a = torch.rand(10, 10).cuda()
+        p1 = p.data.sum().item()
 
-    p1 = p.data.sum().item()
+        adam = Adam([p])
 
-    adam = Adam([p])
+        out = a * p
+        loss = out.sum()
+        loss.backward()
+        adam.step()
 
-    out = a * p
-    loss = out.sum()
-    loss.backward()
-    adam.step()
+        p2 = p.data.sum().item()
 
-    p2 = p.data.sum().item()
+        assert p1 != p2
+        print("SUCCESS!")
+        print("Installation was successful!")
+    except ImportError:
+        print()
+        warn(
+            f"WARNING: {__package__} is currently running as CPU-only!\n"
+            "Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
+            f"If you think that this is so erroneously,\nplease report an issue!"
+        )
+        print_debug_info()
+    except Exception as e:
+        print(e)
+        print_debug_info()
+        sys.exit(1)
 
-    assert p1 != p2
-    print("SUCCESS!")
-    print("Installation was successful!")
-    sys.exit(0)
 
-except ImportError:
-    print()
-    warn(
-        f"WARNING: {__package__} is currently running as CPU-only!\n"
-        "Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
-        f"If you think that this is so erroneously,\nplease report an issue!"
-    )
-    print_debug_info()
-    sys.exit(0)
-except Exception as e:
-    print(e)
-    print_debug_info()
-    sys.exit(1)
+if __name__ == "__main__":
+    main()
diff --git a/bitsandbytes/autograd/__init__.py b/bitsandbytes/autograd/__init__.py
@@ -1 +1 @@
-from ._functions import undo_layout, get_inverse_transform_indices
+from ._functions import get_inverse_transform_indices, undo_layout
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from ._functions import undo_layout, get_inverse_transform_indices
		from ._functions import get_inverse_transform_indices, undo_layout