diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml
index 10272be87..a19e7511d 100644
--- a/.github/workflows/build_documentation.yml
+++ b/.github/workflows/build_documentation.yml
@@ -14,5 +14,6 @@ jobs:
       commit_sha: ${{ github.sha }}
       package: bitsandbytes
       repo_owner: TimDettmers
+      custom_container: huggingface/transformers-doc-builder
     secrets:
       hf_token: ${{ secrets.HUGGINGFACE_PUSH }}
diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
index d6455fd11..cc833df5d 100644
--- a/.github/workflows/build_pr_documentation.yml
+++ b/.github/workflows/build_pr_documentation.yml
@@ -16,3 +16,4 @@ jobs:
       pr_number: ${{ github.event.number }}
       package: bitsandbytes
       repo_owner: TimDettmers
+      custom_container: huggingface/transformers-doc-builder
diff --git a/bitsandbytes/backends/_subinterfaces.py b/bitsandbytes/backends/_subinterfaces.py
new file mode 100644
index 000000000..7ad062edc
--- /dev/null
+++ b/bitsandbytes/backends/_subinterfaces.py
@@ -0,0 +1,78 @@
+from abc import ABC, abstractmethod
+
+
+class RequiredUtilities(ABC):
+    @abstractmethod
+    def check_matmul():
+        raise NotImplementedError
+
+
+class FourBitMatmul(ABC):
+    @abstractmethod
+    def quantize_4bit():
+        raise NotImplementedError
+
+    @abstractmethod
+    def dequantize_4bit():
+        raise NotImplementedError
+
+    @abstractmethod
+    def gemv_4bit():
+        raise NotImplementedError
+
+
+class EightBitMatMul(ABC):
+    @abstractmethod
+    def mm_dequant():
+        raise NotImplementedError
+
+    @abstractmethod
+    def double_quant():
+        raise NotImplementedError
+
+    @abstractmethod
+    def extract_outliers():
+        raise NotImplementedError
+
+    @abstractmethod
+    def igemmlt():
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_col_row_absmax():
+        raise NotImplementedError
+
+
+class KBitQuantization(ABC):
+    @abstractmethod
+    def quantize_blockwise():
+        raise NotImplementedError
+
+    @abstractmethod
+    def dequantize_blockwise():
+        raise NotImplementedError
+
+    @abstractmethod
+    def estimate_quantiles():
+        raise NotImplementedError
+
+    @abstractmethod
+    def create_quant_map():
+        raise NotImplementedError
+
+
+class EightBitOptimizer(ABC):
+    @abstractmethod
+    def optimizer_update_32bit():
+        """Needed only for testing purposes."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def optimizer_update_8bit_blockwise():
+        raise NotImplementedError
+
+
+class CompleteBnbAlgorithmsInterface(
+    RequiredUtilities, FourBitMatmul, EightBitMatMul, KBitQuantization, EightBitOptimizer
+):
+    pass
diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
index c8ae7358d..8927c7688 100644
--- a/bitsandbytes/cextension.py
+++ b/bitsandbytes/cextension.py
@@ -1,21 +1,3 @@
-"""
-extract factors the build is dependent on:
-[X] compute capability
-    [ ] TODO: Q - What if we have multiple GPUs of different makes?
-- CUDA version
-- Software:
-    - CPU-only: only CPU quantization functions (no optimizer, no matrix multiple)
-    - CuBLAS-LT: full-build 8-bit optimizer
-    - no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`)
-
-evaluation:
-    - if paths faulty, return meaningful error
-    - else:
-        - determine CUDA version
-        - determine capabilities
-        - based on that set the default path
-"""
-
 import ctypes as ct
 import logging
 import os