diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 000000000..140753af4
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,132 @@
+# This CMake config hopefully makes it easier to compile.
+# Ensure the CUDA Toolkit is available on your path. Then run:
+#   For  GCC: `cmake -B build . && cmake --build build`
+#   For MSVC: `cmake -B build . && cmake --build build --config Release`
+# You can also use the following options
+#  - BUILD_CUDA: Default ON, will build with CUDA
+#  - NO_CUBLASLT: Default OFF, will skip building/linking CUBLASLT support
+#  - CUDA_VERSION: The expected CUDA version, for sanity checking. The actual version
+#                  is whatever CMake finds on your path.
+#  - COMPUTE_CAPABILITY: Which GPU Arch/Compute codes to provide to NVCC.
+#                        Separate by semicolons, i.e. `-DCOMPUTE_CAPABILITY=89;90`
+#                        Check your compute capability here: https://developer.nvidia.com/cuda-gpus
+#  - PTXAS_VERBOSE: Pass the `-v` option to the PTX Assembler
+cmake_minimum_required(VERSION 3.18)
+
+project(bitsandbytes LANGUAGES C CXX)
+
+option(BUILD_CUDA "Build bitsandbytes with CUDA support" ON)
+option(NO_CUBLASLT "Disable CUBLAS" OFF)
+option(PTXAS_VERBOSE "Pass through -v flag to PTX Assembler" OFF)
+
+set(CPP_FILES csrc/common.cpp csrc/cpu_ops.cpp csrc/pythonInterface.c)
+list(APPEND CUDA_FILES csrc/ops.cu csrc/kernels.cu)
+list(APPEND SRC_FILES ${CPP_FILES})
+
+message(STATUS "BUILD_CUDA := ${BUILD_CUDA}")
+message(STATUS "NO_CUBLASLT := ${NO_CUBLASLT}")
+
+set(BNB_OUTPUT_NAME "bitsandbytes")
+
+if(BUILD_CUDA)
+    enable_language(CUDA) # This will fail if CUDA is not found
+
+    # Convert the CUDA version from X.Y.z to XY. There's probably a shorter way of doing this
+    string(REGEX MATCH "^[0-9]+.[0-9]+" _CUDA_VERSION_FIRST_TWO "${CMAKE_CUDA_COMPILER_VERSION}")
+    string(REPLACE "." "" CUDA_VERSION_SHORT "${_CUDA_VERSION_FIRST_TWO}")
+
+    # Expose a cache variable that the user can set to ensure the correct version of CUDA is found
+    set(CUDA_VERSION "${CUDA_VERSION_SHORT}" CACHE STRING "Expected CUDA Version Shortcode")
+
+    message(STATUS "CUDA Version: ${CUDA_VERSION_SHORT} (${CMAKE_CUDA_COMPILER_VERSION})")
+    message(STATUS "CUDA Compiler: ${CMAKE_CUDA_COMPILER}")
+
+    # It should match the discovered version
+    if(NOT CUDA_VERSION STREQUAL "${CUDA_VERSION_SHORT}")
+        message(FATAL_ERROR "You've specified CUDA version ${CUDA_VERSION} however the CUDA compiler found is ${CUDA_VERSION_SHORT}."
+            " Ensure the desired CUDA compiler is the first one available on your PATH."
+        )
+    endif()
+
+    if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11.0")
+        message(FATAL_ERROR "CUDA Version < 11 is not supported")
+    elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0")
+        message(FATAL_ERROR "CUDA Version > 12 is not supported")
+    endif()
+
+    string(APPEND CMAKE_CUDA_FLAGS " --use_fast_math")
+    if(PTXAS_VERBOSE)
+        # Verbose? Outputs register usage information, and other things...
+        string(APPEND CMAKE_CUDA_FLAGS " -Xptxas=-v")
+    endif()
+
+    foreach(capability ${CMAKE_CUDA_ARCHITECTURES_ALL})
+        # Most of the items here are like: `xx-real`, so we just extract the `xx` portion
+        string(REGEX MATCH "[0-9]+" capability_id "${capability}")
+        if(capability_id GREATER 0)
+            list(APPEND POSSIBLE_CAPABILITIES ${capability_id})
+        endif()
+    endforeach()
+
+    # This can be changed via -D argument to CMake
+    # By default all possible capabilities are compiled
+    set(COMPUTE_CAPABILITY "${POSSIBLE_CAPABILITIES}" CACHE STRING "Compute Capabilities Targeted")
+
+    message(STATUS "CUDA Capabilities Available: ${POSSIBLE_CAPABILITIES}")
+    message(STATUS "CUDA Capabilities  Selected: ${COMPUTE_CAPABILITY}")
+
+    foreach(capability ${COMPUTE_CAPABILITY})
+        string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_${capability},code=sm_${capability}")
+    endforeach()
+
+    message(STATUS "CUDA NVCC Flags: ${CMAKE_CUDA_FLAGS}")
+
+    list(APPEND SRC_FILES ${CUDA_FILES})
+
+    string(APPEND BNB_OUTPUT_NAME "_cuda${CUDA_VERSION_SHORT}")
+    if(NO_CUBLASLT)
+        string(APPEND BNB_OUTPUT_NAME "_nocublaslt")
+    endif()
+else()
+    message(STATUS "Building CPU Only")
+    string(APPEND BNB_OUTPUT_NAME "_cpu")
+    if(NO_CUBLASLT)
+        message(WARNING "We're building in CPU only mode but NO_CUBLASLT is enabled. It will have no effect.")
+    endif()
+endif()
+
+set_source_files_properties(${CPP_FILES} PROPERTIES LANGUAGE CXX)
+add_library(bitsandbytes SHARED ${SRC_FILES})
+include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+target_include_directories(bitsandbytes PUBLIC csrc include)
+target_compile_features(bitsandbytes PUBLIC cxx_std_14)
+
+
+if(BUILD_CUDA)
+    target_compile_definitions(bitsandbytes PUBLIC BUILD_CUDA)
+    target_link_libraries(bitsandbytes PUBLIC cudart cublas cusparse)
+    if(NO_CUBLASLT)
+        target_compile_definitions(bitsandbytes PUBLIC NO_CUBLASLT)
+    else()
+        target_link_libraries(bitsandbytes PUBLIC cublasLt)
+    endif()
+
+    set_target_properties(bitsandbytes
+        PROPERTIES
+            CUDA_SEPARABLE_COMPILATION ON
+    )
+endif()
+
+if(WIN32)
+    set_target_properties(bitsandbytes PROPERTIES PREFIX "lib")
+endif()
+
+set_target_properties(bitsandbytes
+    PROPERTIES
+        OUTPUT_NAME ${BNB_OUTPUT_NAME}
+        # We have to use a generator expression to prevent MSVC Debug/Release subdirs being made
+        RUNTIME_OUTPUT_DIRECTORY "$<1:${CMAKE_SOURCE_DIR}/bitsandbytes>"
+        LIBRARY_OUTPUT_DIRECTORY "$<1:${CMAKE_SOURCE_DIR}/bitsandbytes>"
+        POSITION_INDEPENDENT_CODE ON # The `-fPIC` commands for non-windows compilers
+        WINDOWS_EXPORT_ALL_SYMBOLS ON # On Windows, export all c methods as DLL exports
+)