Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade fro 0.42 to 0.43 #18

Draft
wants to merge 1 commit into
base: rocm_enabled
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# This CMake config hopefully makes it easier to compile.
# Ensure the CUDA Toolkit is available on your path. Then run:
# For GCC: `cmake -B build . && cmake --build build`
# For MSVC: `cmake -B build . && cmake --build build --config Release`
# You can also use the following options and variables
# - COMPUTE_BACKEND: Set to `cpu`, `cuda`, or `mps` to select the backend
# - NO_CUBLASLT: Default OFF, will skip building/linking CUBLASLT support
# - CUDA_VERSION: The expected CUDA version, for sanity checking. The actual version
# is whatever CMake finds on your path.
# - COMPUTE_CAPABILITY: Which GPU Arch/Compute codes to provide to NVCC.
# Separate by semicolons, i.e. `-DCOMPUTE_CAPABILITY=89;90`
# Check your compute capability here: https://developer.nvidia.com/cuda-gpus
# - PTXAS_VERBOSE: Pass the `-v` option to the PTX Assembler
cmake_minimum_required(VERSION 3.22.1)

project(bitsandbytes LANGUAGES CXX)

# If run without specifying a build type, default to using the Release configuration:
# optimizing the generated binaries for performance and also adds the `-DNDEBUG` flag,
# which turns off a bunch of asserts which seem to link to new symbols in libstdc++,
# worsening our many_linux compliance..
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()

# Define included source files
set(CPP_FILES csrc/common.cpp csrc/cpu_ops.cpp csrc/pythonInterface.c)
set(CUDA_FILES csrc/ops.hip.cpp csrc/kernels.hip.cpp)
# C++ sources are always included
list(APPEND SRC_FILES ${CPP_FILES})

set(COMPUTE_BACKEND "hip" CACHE STRING "The compute backend to use (cpu, hip)")
set_property(CACHE COMPUTE_BACKEND PROPERTY STRINGS cpu cuda mps hip)
option(PTXAS_VERBOSE "Pass through -v flag to PTX Assembler" OFF)

if(NOT DEFINED HIP_PATH)
if(NOT DEFINED ENV{HIP_PATH})
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
else()
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
endif()
endif()
message("HIP_PATH: " ${HIP_PATH})
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
find_package(HIP REQUIRED)
if (HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})
else()
message(FATAL_ERROR "Could not find HIP")
endif()
find_package(rocthrust REQUIRED)
find_package(hipblas REQUIRED)
find_package(hipsparse REQUIRED)
find_package(rocrand REQUIRED)
find_package(hipblaslt REQUIRED)
# Search for rocm in common locations
list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm /opt/rocm)
list(APPEND HIP_PATH /opt/rocm/llvm/bin/)
# Find HIP.
# The user may override AMDGPU_TARGETS defined in the HIP config file
# to select the AMDGPU archs to compile for.
# ex. set(AMDGPU_TARGETS "gfx803;gfx900;gfx906")
# Find OpenMP.
#find_package(OpenMP REQUIRED)
# Set compiler and linker.
if(NOT WIN32)
set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE})
set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
set(CMAKE_CXXFLAGS -D__HIP_PLATFORM_AMD__)
set(CMAKE_CFLAGS -D__HIP_PLATFORM_AMD__)
endif()
message("Current CMAKE_CXX_COMPILER (should show hipcc): " ${CMAKE_CXX_COMPILER})
message("Current CMAKE_CXX_LINKER (should show hipcc): " ${CMAKE_CXX_LINKER})

set(BNB_OUTPUT_NAME "bitsandbytes")

message(STATUS "Configuring ${PROJECT_NAME} (Backend: ${COMPUTE_BACKEND})")

if(${COMPUTE_BACKEND} STREQUAL "hip")
set(BUILD_HIP on)
set(BUILD_CUDA OFF)
set(BUILD_MPS OFF)
set(NO_CUBLASLT ON)
else()
set(BUILD_CUDA OFF)
set(BUILD_MPS OFF)
endif()


if(BUILD_HIP)
list(APPEND SRC_FILES ${CUDA_FILES})
# real name
string(APPEND BNB_OUTPUT_NAME "_hip_nohipblaslt")
add_compile_definitions(BUILD_HIP)
else()
string(APPEND BNB_OUTPUT_NAME "_cpu")
set(GPU_SOURCES)
endif()

if (BUILD_HIP)
set_source_files_properties(${CPP_FILES} PROPERTIES LANGUAGE CXX)
message("Working on: " ${CPP_FILES})
add_library(bitsandbytes SHARED ${SRC_FILES})
target_include_directories(bitsandbytes PRIVATE ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/include /opt/rocm/include/rocwmma)
target_compile_features(bitsandbytes PUBLIC cxx_std_14)
target_compile_definitions(bitsandbytes PUBLIC NO_CUBLASLT)
target_include_directories(bitsandbytes PUBLIC csrc include)
target_link_libraries(bitsandbytes PUBLIC hip::device roc::rocthrust roc::hipblas roc::hipsparse roc::rocrand roc::rocprim roc::hipblaslt )
else()
set_source_files_properties(${CPP_FILES} PROPERTIES LANGUAGE CXX)
add_library(bitsandbytes SHARED ${SRC_FILES})
target_compile_features(bitsandbytes PUBLIC cxx_std_14)
target_include_directories(bitsandbytes PUBLIC csrc include)
target_link_libraries(bitsandbytes PUBLIC hip::device)
endif()

set_target_properties(bitsandbytes PROPERTIES OUTPUT_NAME ${BNB_OUTPUT_NAME})
set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/bitsandbytes")
6 changes: 3 additions & 3 deletions bitsandbytes/__init__.py
Original file line number Diff line number Diff line change
@@ -3,14 +3,14 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from . import cuda_setup, utils, research
from . import cuda_setup, research, utils
from .autograd._functions import (
MatmulLtState,
bmm_cublas,
matmul,
matmul_4bit,
matmul_cublas,
mm_cublas,
matmul_4bit
)
from .cextension import COMPILED_WITH_CUDA
from .nn import modules
@@ -24,6 +24,6 @@
"optim.optimizer.MockArgs": False,
}

__version__ = "0.42.0"
__version__ = "0.43.0"

PACKAGE_GITHUB_URL = "https://github.com/TimDettmers/bitsandbytes"
175 changes: 64 additions & 111 deletions bitsandbytes/__main__.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,16 @@
import glob
import os
import sys
import shlex
import subprocess

from warnings import warn
from typing import Tuple
from os.path import isdir

import torch

HEADER_WIDTH = 60

def execute_and_return(command_string: str) -> Tuple[str, str]:
def _decode(subprocess_err_out_tuple):
return tuple(
to_decode.decode("UTF-8").strip()
for to_decode in subprocess_err_out_tuple
)

def execute_and_return_decoded_std_streams(command_string):
return _decode(
subprocess.Popen(
shlex.split(command_string),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
).communicate()
)

std_out, std_err = execute_and_return_decoded_std_streams(command_string)
return std_out, std_err

def find_file_recursive(folder, filename):
folder = shlex.quote(folder)
filename = shlex.quote(filename)
cmd = f'find {folder} -name {filename}'
out, err = execute_and_return(cmd)
if len(err) > 0:
raise RuntimeError('Something when wrong when trying to find file. Maybe you do not have a linux system?')

return out
def find_dynamic_library(folder, filename):
for ext in ("so", "dll", "dylib"):
yield from glob.glob(os.path.join(folder, "**", filename + ext))


def generate_bug_report_information():
@@ -47,38 +19,25 @@ def generate_bug_report_information():
print_header("")
print('')

if 'CONDA_PREFIX' in os.environ:
paths = find_file_recursive(os.environ['CONDA_PREFIX'], '*cuda*so')
print_header("ANACONDA CUDA PATHS")
print(paths)
print('')
if isdir('/usr/local/'):
paths = find_file_recursive('/usr/local', '*cuda*so')
print_header("/usr/local CUDA PATHS")
print(paths)
print('')

if isdir(os.getcwd()):
paths = find_file_recursive(os.getcwd(), '*cuda*so')
print_header("WORKING DIRECTORY CUDA PATHS")
print(paths)
print('')

print_header("LD_LIBRARY CUDA PATHS")
if 'LD_LIBRARY_PATH' in os.environ:
lib_path = os.environ['LD_LIBRARY_PATH'].strip()
for path in set(lib_path.split(':')):
try:
if isdir(path):
print_header(f"{path} CUDA PATHS")
paths = find_file_recursive(path, '*cuda*so')
print(paths)
except:
print(f'Could not read LD_LIBRARY_PATH: {path}')
print('')



path_sources = [
("ANACONDA CUDA PATHS", os.environ.get("CONDA_PREFIX")),
("/usr/local CUDA PATHS", "/usr/local"),
("CUDA PATHS", os.environ.get("CUDA_PATH")),
("WORKING DIRECTORY CUDA PATHS", os.getcwd()),
]
try:
ld_library_path = os.environ.get("LD_LIBRARY_PATH")
if ld_library_path:
for path in set(ld_library_path.strip().split(os.pathsep)):
path_sources.append((f"LD_LIBRARY_PATH {path} CUDA PATHS", path))
except Exception as e:
print(f"Could not parse LD_LIBRARY_PATH: {e}")

for name, path in path_sources:
if path and os.path.isdir(path):
print_header(name)
print(list(find_dynamic_library(path, '*cuda*')))
print("")


def print_header(
@@ -89,67 +48,61 @@ def print_header(


def print_debug_info() -> None:
from . import PACKAGE_GITHUB_URL
print(
"\nAbove we output some debug information. Please provide this info when "
f"creating an issue via {PACKAGE_GITHUB_URL}/issues/new/choose ...\n"
)


generate_bug_report_information()
def main():
generate_bug_report_information()

from . import COMPILED_WITH_CUDA
from .cuda_setup.main import get_compute_capabilities

from . import COMPILED_WITH_CUDA, PACKAGE_GITHUB_URL
from .cuda_setup.env_vars import to_be_ignored
from .cuda_setup.main import get_compute_capabilities

print_header("OTHER")
print(f"COMPILED_WITH_CUDA = {COMPILED_WITH_CUDA}")
print(f"COMPUTE_CAPABILITIES_PER_GPU = {get_compute_capabilities()}")
print_header("")
print_header("DEBUG INFO END")
print_header("")
print("Checking that the library is importable and CUDA is callable...")
print("\nWARNING: Please be sure to sanitize sensitive info from any such env vars!\n")

print_header("OTHER")
print(f"COMPILED_WITH_CUDA = {COMPILED_WITH_CUDA}")
print(f"COMPUTE_CAPABILITIES_PER_GPU = {get_compute_capabilities()}")
print_header("")
print_header("DEBUG INFO END")
print_header("")
print(
"""
Running a quick check that:
+ library is importable
+ CUDA function is callable
"""
)
print("\nWARNING: Please be sure to sanitize sensible info from any such env vars!\n")
try:
from bitsandbytes.optim import Adam

try:
from bitsandbytes.optim import Adam
p = torch.nn.Parameter(torch.rand(10, 10).cuda())
a = torch.rand(10, 10).cuda()

p = torch.nn.Parameter(torch.rand(10, 10).cuda())
a = torch.rand(10, 10).cuda()
p1 = p.data.sum().item()

p1 = p.data.sum().item()
adam = Adam([p])

adam = Adam([p])
out = a * p
loss = out.sum()
loss.backward()
adam.step()

out = a * p
loss = out.sum()
loss.backward()
adam.step()
p2 = p.data.sum().item()

p2 = p.data.sum().item()
assert p1 != p2
print("SUCCESS!")
print("Installation was successful!")
except ImportError:
print()
warn(
f"WARNING: {__package__} is currently running as CPU-only!\n"
"Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
f"If you think that this is so erroneously,\nplease report an issue!"
)
print_debug_info()
except Exception as e:
print(e)
print_debug_info()
sys.exit(1)

assert p1 != p2
print("SUCCESS!")
print("Installation was successful!")
sys.exit(0)

except ImportError:
print()
warn(
f"WARNING: {__package__} is currently running as CPU-only!\n"
"Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
f"If you think that this is so erroneously,\nplease report an issue!"
)
print_debug_info()
sys.exit(0)
except Exception as e:
print(e)
print_debug_info()
sys.exit(1)
if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion bitsandbytes/autograd/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from ._functions import undo_layout, get_inverse_transform_indices
from ._functions import get_inverse_transform_indices, undo_layout
Loading