Merge remote-tracking branch 'main/main' into upstream_device_abstrac…

…tion
bitsandbytes-foundation · Feb 7, 2024 · 9f23308 · 9f23308
2 parents d20c017 + 88ab630
commit 9f23308
Show file tree

Hide file tree

Showing 7 changed files with 51 additions and 36 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -136,7 +136,7 @@ elseif(BUILD_MPS)
                 VERBATIM)
     add_custom_target(metallib DEPENDS "bitsandbytes/bitsandbytes.metallib")
 else()
-    set(LIBSUFFIX "cpu")
+    string(APPEND BNB_OUTPUT_NAME "_cpu")
     set(GPU_SOURCES)
 endif()
 

diff --git a/bitsandbytes/device_setup/cuda/main.py b/bitsandbytes/device_setup/cuda/main.py
@@ -28,16 +28,15 @@
 
 from .env_vars import get_potentially_lib_path_containing_env_vars
 
-if platform.system() == 'Windows':  # Windows
+DYNAMIC_LIBRARY_SUFFIX = { "Darwin": ".dylib", "Windows": ".dll", "Linux": ".so"}.get(platform.system(), ".so")
+if platform.system() == "Windows":  # Windows
     CUDA_RUNTIME_LIBS = ["nvcuda.dll"]
-    DYNAMIC_LIBRARY_SUFFIX = ".dll"
 else:  # Linux or other
     # these are the most common libs names
     # libcudart.so is missing by default for a conda install with PyTorch 2.0 and instead
     # we have libcudart.so.11.0 which causes a lot of errors before
     # not sure if libcudart.so.12.0 exists in pytorch installs, but it does not hurt
     CUDA_RUNTIME_LIBS = ["libcudart.so", "libcudart.so.11.0", "libcudart.so.12.0", "libcudart.so.12.1", "libcudart.so.12.2"]
-    DYNAMIC_LIBRARY_SUFFIX = ".so"
 
 
 class CUDASetup:

diff --git a/docs/source/compiling.mdx b/docs/source/compiling.mdx
@@ -1,40 +1,49 @@
 # Compiling from Source[[compiling]]
 
-To compile from source, the CUDA Toolkit is required. Ensure `nvcc` is installed; if not, follow these steps to install it along with the CUDA Toolkit:
+## Linux
+
+To compile from source, you need the following:
+
+* The ability to compile C++ (gcc, make, headers, etc)
+* CMake (version 3.22.1 or newer)
+* Python 3.10 or newer
+* [The CUDA toolkit](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) (nvcc)
+
+On Ubuntu, install the first two with `apt-get install -y build-essential cmake`.
+
+To install the CUDA toolkit, follow the [instructions from your distro](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html).
 
-```bash
-wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/install_cuda.sh
-# Use the following syntax: cuda_install CUDA_VERSION INSTALL_PREFIX EXPORT_TO_BASH
-#   CUDA_VERSION options include 110 to 122
-#   EXPORT_TO_BASH: 0 for False, 1 for True
 
-# Example for installing CUDA 11.7 at ~/local/cuda-11.7 and exporting the path to .bashrc:
-bash install_cuda.sh 117 ~/local 1
-```
 
-For a single compile run with a specific CUDA version, set `CUDA_HOME` to point to your CUDA installation directory. For instance, to compile using CUDA 11.7 located at `~/local/cuda-11.7`, use:
+To install the package from source, then run
 
 ```
-CUDA_HOME=~/local/cuda-11.7 CUDA_VERSION=117 make cuda11x
+pip install -r requirements-dev.txt
+cmake -DCOMPUTE_BACKEND=cuda -S .
+make
+pip install .
 ```
 
-## General Compilation Steps
+If you have multiple versions of CUDA installed, or have installed it in a non-standard location, please refer to [cmake CUDA documentation](https://cliutils.gitlab.io/modern-cmake/chapters/packages/CUDA.html) for how to configure the CUDA compiler used.
 
-1. Use `CUDA_VERSION=XXX make [target]` to compile, where `[target]` includes options like `cuda92`, `cuda10x`, `cuda11x`, and others.
-2. Install with `python setup.py install`.
+## Windows
 
-Ensure `nvcc` is available in your system. If using Anaconda, determine your CUDA version with PyTorch using `conda list | grep cudatoolkit` and match it by downloading the corresponding version from the [CUDA Toolkit Archive](https://developer.nvidia.com/cuda-toolkit-archive).
+The following is required to install from source on Windows
 
-To install CUDA locally without administrative rights:
+* [Microsoft Visual Studio](https://visualstudio.microsoft.com/downloads/) with C++ support
+* CMake (version 3.22.1 or newer)
+* Python 3.10 or newer
+* [The CUDA toolkit](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html) (nvcc)
 
-```bash
-wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/install_cuda.sh
-# Follow the same syntax and example as mentioned earlier
-```
-
-The compilation process relies on the `CUDA_HOME` environment variable to locate CUDA. If `CUDA_HOME` is unset, it will attempt to infer the location from `nvcc`. If `nvcc` is not in your path, you may need to add it or set `CUDA_HOME` manually. For example, if `python -m bitsandbytes` indicates your CUDA path as `/usr/local/cuda-11.7`, you can set `CUDA_HOME` to this path.
+To install the CUDA toolkit, follow the [instructions for Windows](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
 
-If compilation issues arise, please report them.
+To install the package from source, then run
+```
+pip install -r requirements-dev.txt
+cmake -DCOMPUTE_BACKEND=cuda -S .
+cmake --build . --config Release
+pip install .
+```
 
 ## Compilation for Kepler Architecture
 

diff --git a/docs/source/index.mdx b/docs/source/index.mdx
@@ -8,9 +8,9 @@ There are ongoing efforts to support further hardware backends, i.e. Intel CPU +
 
 ## API documentation
 
-- [Linear4bit](quantizaton#linear4bit)
-- [Linear8bit](quantizaton#linear8bit)
-- [StableEmbedding](optimizers#stableembedding)
+- [Quantization](quantization)
+- [Integrations](integrations)
+- [Optimizers](optimizers)
 
 # License
 

diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx
@@ -21,25 +21,32 @@ pip install bitsandbytes
 
 ### From source
 
+You need CMake and Python installed. For Linux, make sure to install a compiler (`apt install build-essential`, for example).
+
 ```bash
 git clone https://github.com/TimDettmers/bitsandbytes.git && cd bitsandbytes/
-cmake -B build -DBUILD_CUDA=ON -S .
+pip install -r requirements-dev.txt
+cmake -DCOMPUTE_BACKEND=cuda -S .
+make
 pip install .
 ```
-Note support for non-CUDA GPUs (e.g. AMD, Intel), is also coming soon.
+Note support for non-CUDA GPUs (e.g. AMD, Intel, Apple Silicon), is also coming soon.
 For a more detailed compilation guide, head to the [dedicated page on the topic](./compiling)
 
 </hfoption>
 <hfoption id="Windows">
 
 ## Windows
 
+Windows builds require Visual Studio with C++ support, as well as the Cuda SDK installed.
+
 Currently for Windows users, you need to build bitsandbytes from source:
 
 ```bash
 git clone https://github.com/TimDettmers/bitsandbytes.git && cd bitsandbytes/
-cmake -B build -DBUILD_CUDA=ON -S .
-cmake --build build --config Release
+pip install -r requirements-dev.txt
+cmake -DCOMPUTE_BACKEND=cuda -S .
+cmake --build . --config Release
 python -m build --wheel
 ```
 

diff --git a/docs/source/integrations.mdx b/docs/source/integrations.mdx
@@ -4,7 +4,7 @@ With Transformers it's very easy to load any model in 4 or 8-bit, quantizing the
 
 Please review the [bitsandbytes section in the Accelerate docs](https://huggingface.co/docs/transformers/v4.37.2/en/quantization#bitsandbytes).
 
-Details about the BitsAndBytesConfig can be found here](https://huggingface.co/docs/transformers/v4.37.2/en/main_classes/quantization#transformers.BitsAndBytesConfig).
+Details about the BitsAndBytesConfig can be found [here](https://huggingface.co/docs/transformers/v4.37.2/en/main_classes/quantization#transformers.BitsAndBytesConfig).
 
 ## Beware: bf16 is optional compute data type
 If your hardware supports it, `bf16` is the optimal compute dtype. The default is `float32` for backward compatibility and numerical stability. `float16` often leads to numerical instabilities, but `bfloat16` provides the benefits of both worlds: numerical stability and significant computation speedup. Therefore, be sure to check if your hardware supports `bf16` and configure it using the `bnb_4bit_compute_dtype` parameter in BitsAndBytesConfig:

diff --git a/docs/source/optimizers.mdx b/docs/source/optimizers.mdx
@@ -184,7 +184,7 @@ class MyModule(torch.nn.Module):
 
 Here we'll provide further auto-generated API docs soon. Please feel free to contribute doc-strings for the respective optimizers, as `bitsandbytes` is a community effort.
 
-## StableEmbedding
+### StableEmbedding[[stable-emb-api]]
 
 [[autodoc]] bitsandbytes.nn.StableEmbedding
     - __init__