diff --git a/docs/source/faqs.mdx b/docs/source/faqs.mdx
index b9549e9d8..b95a1d799 100644
--- a/docs/source/faqs.mdx
+++ b/docs/source/faqs.mdx
@@ -1,6 +1,6 @@
 # FAQs
 
-Please submit your questions in [this Github Discussion thread](https://github.com/TimDettmers/bitsandbytes/discussions/1013) if you feel that they will likely affect a lot of other users and that they haven't been sufficiently covered in the documentation.
+Please submit your questions in [this Github Discussion thread](https://github.com/bitsandbytes-foundation/bitsandbytes/discussions/1013) if you feel that they will likely affect a lot of other users and that they haven't been sufficiently covered in the documentation.
 
 We'll pick the most generally applicable ones and post the QAs here or integrate them into the general documentation (also feel free to submit doc PRs, please).
 
diff --git a/docs/source/fsdp_qlora.md b/docs/source/fsdp_qlora.md
index c67202dc6..45fe0949b 100644
--- a/docs/source/fsdp_qlora.md
+++ b/docs/source/fsdp_qlora.md
@@ -5,7 +5,7 @@ FSDP-QLoRA combines data parallelism (FSDP enables sharding model parameters, op
 This guide provides a brief guide on how bitsandbytes supports storing quantized weights to enable FSDP-QLoRA, and how to run training with the Hugging Face libraries.
 
 > [!TIP]
-> Other changes required for bitsandbytes to support FSDP-QLoRA, such as reconstructing the weights from the quantization metadata and preventing quantizing already quantized weights when they're moved from a CPU to GPU, are documented in this [Pull Request](https://github.com/TimDettmers/bitsandbytes/pull/970) and described in the [Enabling 70B Finetuning on Consumer GPUs](https://www.answer.ai/posts/2024-03-14-fsdp-qlora-deep-dive) blog post. We highly recommend reading these resources for a better understanding of FSDP-QLoRA!
+> Other changes required for bitsandbytes to support FSDP-QLoRA, such as reconstructing the weights from the quantization metadata and preventing quantizing already quantized weights when they're moved from a CPU to GPU, are documented in this [Pull Request](https://github.com/bitsandbytes-foundation/bitsandbytes/pull/970) and described in the [Enabling 70B Finetuning on Consumer GPUs](https://www.answer.ai/posts/2024-03-14-fsdp-qlora-deep-dive) blog post. We highly recommend reading these resources for a better understanding of FSDP-QLoRA!
 
 ## Quantized data storage
 
diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx
index da4ab3b44..1eea3247b 100644
--- a/docs/source/installation.mdx
+++ b/docs/source/installation.mdx
@@ -19,7 +19,7 @@ Welcome to the installation guide for the `bitsandbytes` library! This document
 
 ## CUDA[[cuda]]
 
-`bitsandbytes` is currently only supported on CUDA GPUs for CUDA versions **11.0 - 12.6**. However, there's an ongoing multi-backend effort under development, which is currently in alpha. If you're interested in providing feedback or testing, check out [the multi-backend section below](#multi-backend).
+`bitsandbytes` is currently only supported on CUDA GPUs for CUDA versions **11.0 - 12.8**. However, there's an ongoing multi-backend effort under development, which is currently in alpha. If you're interested in providing feedback or testing, check out [the multi-backend section below](#multi-backend).
 
 ### Supported CUDA Configurations[[cuda-pip]]
 
@@ -28,10 +28,8 @@ The latest version of the distributed `bitsandbytes` package is built with the f
 | **OS**      | **CUDA Toolkit** | **Host Compiler**         |
 |-------------|------------------|----------------------|
 | **Linux**   | 11.7 - 12.3      | GCC 11.4             |
-|             | 12.4 - 12.6      | GCC 13.2             |
-| **Windows** | 11.7 - 12.6      | MSVC 19.42+ (VS2022) |
-|             | 12.4+            | GCC 13.2             |
-| **Windows** | 11.7 - 12.6      | MSVC 19.38+ (VS2022) |
+|             | 12.4 - 12.8      | GCC 13.2             |
+| **Windows** | 11.7 - 12.8      | MSVC 19.42+ (VS2022) |
 
 For CUDA systems, ensure your hardware meets the following requirements:
 
@@ -104,7 +102,6 @@ Now to install the bitsandbytes package from source, run the following commands:
 
 ```bash
 git clone https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/
-pip install -r requirements-dev.txt
 cmake -DCOMPUTE_BACKEND=cuda -S .
 make
 pip install -e .   # `-e` for "editable" install, when developing BNB (otherwise leave that out)
@@ -152,7 +149,7 @@ Then locally install the CUDA version you need with this script from bitsandbyte
 ```bash
 wget https://raw.githubusercontent.com/bitsandbytes-foundation/bitsandbytes/main/install_cuda.sh
 # Syntax cuda_install CUDA_VERSION INSTALL_PREFIX EXPORT_TO_BASH
-#   CUDA_VERSION in {110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126}
+#   CUDA_VERSION in {110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 128}
 #   EXPORT_TO_BASH in {0, 1} with 0=False and 1=True
 
 # For example, the following installs CUDA 12.6 to ~/local/cuda-12.6 and exports the path to your .bashrc
@@ -228,7 +225,7 @@ pip install "transformers>=4.45.1"
 <hfoption id="AMD ROCm">
 
 > [!WARNING]
-> Pre-compiled binaries are only built for ROCm versions `6.1.0`/`6.1.1`/`6.1.2`/`6.2.0` and `gfx90a`, `gfx942`, `gfx1100` GPU architectures. [Find the pip install instructions here](#multi-backend-pip).
+> Pre-compiled binaries are only built for ROCm versions `6.1.2`/`6.2.4`/`6.3.2` and `gfx90a`, `gfx942`, `gfx1100` GPU architectures. [Find the pip install instructions here](#multi-backend-pip).
 >
 > Other supported versions that don't come with pre-compiled binaries [can be compiled for with these instructions](#multi-backend-compile).
 >
@@ -320,9 +317,6 @@ bitsandbytes is fully supported from ROCm 6.1 onwards (currently in alpha releas
 # Clone bitsandbytes repo, ROCm backend is currently enabled on multi-backend-refactor branch
 git clone -b multi-backend-refactor https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/
 
-# Install dependencies
-pip install -r requirements-dev.txt
-
 # Compile & install
 apt-get install -y build-essential cmake  # install build tools dependencies, unless present
 cmake -DCOMPUTE_BACKEND=hip -S .  # Use -DBNB_ROCM_ARCH="gfx90a;gfx942" to target specific gpu arch
@@ -345,7 +339,6 @@ The below commands are for Linux. For installing on Windows, please adapt the be
 ```
 git clone --depth 1 -b multi-backend-refactor https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/
 pip install intel_extension_for_pytorch
-pip install -r requirements-dev.txt
 cmake -DCOMPUTE_BACKEND=cpu -S .
 make
 pip install -e .   # `-e` for "editable" install, when developing BNB (otherwise leave that out)
@@ -365,9 +358,6 @@ pip install -e .   # `-e` for "editable" install, when developing BNB (otherwise
 # Clone bitsandbytes repo, Ascend NPU backend is currently enabled on multi-backend-refactor branch
 git clone -b multi-backend-refactor https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/
 
-# Install dependencies
-pip install -r requirements-dev.txt
-
 # Compile & install
 apt-get install -y build-essential cmake  # install build tools dependencies, unless present
 cmake -DCOMPUTE_BACKEND=npu -S .