You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi everyone,
I'm trying to install bitsandbytes on SageMaker but running into some issues. The instance has CUDA 12.4 installed, and I'm trying to install bitsandbytes with CUDA 12.1 compatibility (since it's more stable for bitsandbytes).
Steps are:
# 1. Removed existing installations
pip uninstall -y bitsandbytes
# 2. Set up CUDA environment
CUDA_HOME="/root/local/cuda-12.1"
LD_LIBRARY_PATH="/root/local/cuda-12.1/lib64:$LD_LIBRARY_PATH"
BNB_CUDA_VERSION="121"
# 3. Cloned and installed from source
git clone https://github.com/timdettmers/bitsandbytes.git
cd bitsandbytes
pip install -e .
The final error is: ModuleNotFoundError: No module named 'bitsandbytes'
Has anyone successfully installed bitsandbytes on SageMaker? Any suggestions on what I might be missing?
Thank you!
Full Code:
import subprocess
import os
import sys
from pathlib import Path
import re
import requests
import json
def run_command(command, description=None):
"""Execute a command and handle potential errors"""
try:
if description:
print(f"\n{description}...")
result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
print(result.stdout)
return True
except subprocess.CalledProcessError as e:
print(f"Error executing command: {e.cmd}")
print(f"Error output: {e.stderr}")
return False
def cleanup_existing_installations():
"""Clean up existing PyTorch and CUDA packages"""
packages_to_remove = [
"torch",
"torchvision", # Added
"torchaudio", # Added
"torchtext", # Added
"triton",
"nvidia-cublas-cu12",
"nvidia-cuda-cupti-cu12",
"nvidia-cuda-nvrtc-cu12",
"nvidia-cuda-runtime-cu12",
"nvidia-cudnn-cu12",
"nvidia-cufft-cu12",
"nvidia-curand-cu12",
"nvidia-cusolver-cu12",
"nvidia-cusparse-cu12",
"nvidia-nccl-cu12",
"nvidia-nvtx-cu12"
]
for package in packages_to_remove:
run_command(f"pip uninstall -y {package}", f"Removing {package}")
# Clear pip cache
run_command("pip cache purge", "Clearing pip cache")
def get_cuda_version():
"""
Detect CUDA version from nvidia-smi and system
Returns tuple of (major_version, minor_version) or None if not found
"""
try:
# Try to get CUDA version from nvidia-smi
nvidia_smi = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE,
stderr=subprocess.PIPE, text=True)
if nvidia_smi.returncode == 0:
# Extract CUDA version from nvidia-smi output
match = re.search(r'CUDA Version: (\d+)\.(\d+)', nvidia_smi.stdout)
if match:
return (int(match.group(1)), int(match.group(2)))
# If nvidia-smi fails or doesn't show CUDA version, try nvcc
nvcc = subprocess.run(['nvcc', '--version'], stdout=subprocess.PIPE,
stderr=subprocess.PIPE, text=True)
if nvcc.returncode == 0:
match = re.search(r'release (\d+)\.(\d+)', nvcc.stdout)
if match:
return (int(match.group(1)), int(match.group(2)))
# As a last resort, check CUDA_HOME environment variable
cuda_home = os.environ.get('CUDA_HOME')
if cuda_home:
match = re.search(r'cuda-(\d+)\.(\d+)', cuda_home)
if match:
return (int(match.group(1)), int(match.group(2)))
except Exception as e:
print(f"Error detecting CUDA version: {e}")
return None
def get_compatible_cuda_version(cuda_version):
"""
Map detected CUDA version to compatible version for installation
Returns version code used in installation commands
"""
if cuda_version is None:
print("Could not detect CUDA version.")
return None
major, minor = cuda_version
# For CUDA 12.x, always use 12.1 packages as they're stable and widely available
if major == 12:
print(f"Detected CUDA {major}.{minor}, using CUDA 12.1 packages for compatibility")
return '121'
# For older versions
cuda_map = {
(11, 8): '118',
(11, 7): '117',
(11, 6): '116'
}
# Find the closest compatible version
for (comp_major, comp_minor), version_code in sorted(cuda_map.items(), reverse=True):
if major > comp_major or (major == comp_major and minor >= comp_minor):
return version_code
print("Warning: Your CUDA version might be too old. Using oldest supported version.")
return '116'
def get_compatible_torch_version(cuda_version_code):
"""
Determine compatible PyTorch version based on CUDA version
Returns tuple of (torch_version, is_stable)
"""
# Updated map of CUDA versions to compatible PyTorch versions
# Using more conservative version mapping that's known to work
cuda_torch_map = {
'124': ['2.1.0'], # CUDA 12.4 -> Use 2.1.0 as it's stable
'123': ['2.1.0'], # CUDA 12.3
'122': ['2.1.0'], # CUDA 12.2
'121': ['2.1.0'], # CUDA 12.1
'120': ['2.1.0'], # CUDA 12.0
'118': ['2.0.1'], # CUDA 11.8
'117': ['2.0.1'], # CUDA 11.7
'116': ['1.13.1'], # CUDA 11.6
'115': ['1.13.1'], # CUDA 11.5
'113': ['1.13.1'] # CUDA 11.3
}
try:
# Try to get available PyTorch versions from PyPI
response = requests.get("https://pypi.org/pypi/torch/json")
if response.status_code == 200:
available_versions = set(json.loads(response.text)['releases'].keys())
# Filter compatible versions that are actually available
compatible_versions = [v for v in cuda_torch_map.get(cuda_version_code, [])
if v in available_versions]
if compatible_versions:
return compatible_versions[0], True
except Exception as e:
print(f"Warning: Could not fetch PyTorch versions from PyPI: {e}")
# Fallback to default version for the detected CUDA version
default_version = cuda_torch_map.get(cuda_version_code, ['2.1.0'])[0]
return default_version, False
def install_dependencies(cuda_version_code):
"""Install PyTorch and required CUDA dependencies"""
# Get compatible PyTorch version
torch_version, is_stable = get_compatible_torch_version(cuda_version_code)
print(f"Installing PyTorch version {torch_version} for CUDA {cuda_version_code}")
# Install PyTorch ecosystem with CUDA support - this will bring CUDA dependencies
torch_command = (
f"pip install --no-cache-dir "
f"torch=={torch_version}+cu{cuda_version_code} "
f"torchvision "
f"torchaudio "
f"--index-url https://download.pytorch.org/whl/cu{cuda_version_code}"
)
if not run_command(torch_command, "Installing PyTorch ecosystem"):
return False
# Install other dependencies
other_packages = [
"transformers==4.44.2",
"peft==0.12.0",
"accelerate==0.33.0",
"trl==0.9.6",
"timm"
]
for package in other_packages:
if not run_command(f"pip install --no-cache-dir {package}", f"Installing {package}"):
return False
return True
def verify_cuda_installation():
"""Verify CUDA and cuDNN installation"""
try:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"CUDA version: {torch.version.cuda}")
print(f"cuDNN version: {torch.backends.cudnn.version()}")
print(f"GPU device name: {torch.cuda.get_device_name(0)}")
return True
except Exception as e:
print(f"Error verifying CUDA installation: {e}")
return False
def verify_pytorch():
"""Verify PyTorch installation and CUDA availability"""
verify_cmd = """python -c "import torch; print('PyTorch version:', torch.__version__); print('CUDA available:', torch.cuda.is_available()); print('CUDA version:', torch.version.cuda if torch.cuda.is_available() else 'N/A')" """
return run_command(verify_cmd, "Verifying PyTorch installation")
def install_cuda(cuda_version_code):
"""Install CUDA using local installation script"""
home_dir = str(Path.home())
# cuda_install_path = f"{home_dir}/local"
cuda_install_path = f"{home_dir}/local/cuda-{cuda_version_code[:2]}.{cuda_version_code[2]}"
# Check if cuda_install.sh exists in current directory
cuda_install_script_path = "/opt/ml/model/code/install_cuda.sh"
if not os.path.exists(cuda_install_script_path):
print("Error: install_cuda.sh not found in current directory")
print("Please ensure install_cuda.sh is in the same directory as this script")
return False
# Install CUDA with provided version
if not run_command(
f"bash {cuda_install_script_path} {cuda_version_code} {cuda_install_path} 1",
f"Installing CUDA {cuda_version_code}"
):
return False
return True
def install_bitsandbytes(cuda_version_code):
"""Clone and install bitsandbytes with proper CUDA support"""
build_dir = "/tmp/bitsandbytes_build"
# First uninstall existing bitsandbytes
run_command("pip uninstall -y bitsandbytes", "Removing existing bitsandbytes")
# Clean any existing build directory
if os.path.exists(build_dir):
run_command(f"rm -rf {build_dir}", "Cleaning up existing build directory")
home_dir = str(Path.home())
cuda_home = str(Path(home_dir) / "local" / f"cuda-{cuda_version_code[:2]}.{cuda_version_code[2]}")
cuda_lib = str(Path(cuda_home) / "lib64")
ld_library_path = f"{cuda_lib}:{os.environ.get('LD_LIBRARY_PATH', '')}"
global_paths = {
"CUDA_HOME": cuda_home,
"LD_LIBRARY_PATH": ld_library_path,
"BNB_CUDA_VERSION": cuda_version_code
}
# Clone repository to /tmp
if not run_command(
f"git clone https://github.com/timdettmers/bitsandbytes.git {build_dir}",
"Cloning bitsandbytes repository"
):
return False,global_paths
original_dir = os.getcwd()
try:
os.chdir(build_dir)
# Set environment variables
os.environ.update(global_paths)
print(f"Environment variables set:")
print(f"CUDA_HOME: {os.environ['CUDA_HOME']}")
print(f"LD_LIBRARY_PATH: {os.environ['LD_LIBRARY_PATH']}")
print(f"BNB_CUDA_VERSION: {os.environ['BNB_CUDA_VERSION']}")
# Install using pip install -e .
if not run_command("pip install -e .", "Installing bitsandbytes"):
return False,global_paths
# Try to install to a specific location that's in Python's path
if not run_command(
f"pip install -e . --prefix=/opt/conda",
"Installing bitsandbytes to conda environment"
):
return False,global_paths
verify_cuda_setup()
return True,global_paths
except Exception as e:
print(f"Error during bitsandbytes installation: {str(e)}")
return False,global_paths
finally:
os.chdir(original_dir)
run_command(f"rm -rf {build_dir}", "Cleaning up build directory")
def verify_cuda_setup():
# """Verify CUDA setup and print diagnostic information"""
# cuda_paths = [
# "/usr/local/cuda/lib64/libcudart.so",
# "/usr/local/cuda/lib64/libcuda.so"
# ]
"""Verify CUDA setup and print diagnostic information"""
cuda_home = os.environ.get('CUDA_HOME', '/usr/local/cuda')
cuda_paths = [
str(Path(cuda_home) / "lib64" / "libcudart.so"),
str(Path(cuda_home) / "lib64" / "libcuda.so")
]
print("\nCUDA Setup Verification:")
print(f"CUDA_HOME: {os.environ.get('CUDA_HOME', 'Not set')}")
print(f"LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH', 'Not set')}")
for path in cuda_paths:
if os.path.exists(path):
print(f"Found: {path}")
else:
print(f"Missing: {path}")
def try_pip_install_bitsandbytes(cuda_version_code):
"""Try to install bitsandbytes via pip as a fallback"""
major_version = int(cuda_version_code[:2])
if major_version >= 12:
return run_command(
"pip install --no-cache-dir --target /tmp/pip_target bitsandbytes>=0.41.1 && "
"pip install --no-cache-dir --no-deps -r /tmp/pip_target/bitsandbytes-0.41.1.dist-info/METADATA",
"Installing bitsandbytes via pip"
)
return False
def verify_installation():
"""Verify bitsandbytes installation"""
verify_cmd = (
"python -c \"import sys; print('Python path:', sys.path); "
"import bitsandbytes; print('BitsAndBytes version:', bitsandbytes.__version__); "
"print('BitsAndBytes path:', bitsandbytes.__file__)\""
)
return run_command(verify_cmd, "Verifying bitsandbytes installation")
def save_environment_config(cuda_home=None, ld_library_path=None, bnb_cuda_version=None):
"""Save environment configuration to JSON using actual installed paths
Args:
cuda_home (str): Path to CUDA installation
ld_library_path (str): Library path for CUDA
bnb_cuda_version (str): CUDA version for bitsandbytes
"""
import json
# Get from current environment if not provided
if cuda_home is None:
cuda_home = os.environ.get('CUDA_HOME', '/root/local/cuda-12.1')
if ld_library_path is None:
ld_library_path = os.environ.get('LD_LIBRARY_PATH', f"{cuda_home}/lib64:/usr/local/cuda/lib64")
if bnb_cuda_version is None:
bnb_cuda_version = os.environ.get('BNB_CUDA_VERSION', '121')
config = {
"CUDA_HOME": cuda_home,
"LD_LIBRARY_PATH": ld_library_path,
"BNB_CUDA_VERSION": bnb_cuda_version
}
config_path = "/opt/ml/model/code/env_config.json"
with open(config_path, "w") as f:
json.dump(config, f)
print(f"Saved environment config to {config_path}: {config}")
def setup_install_bnb():
print("Starting bitsandbytes installation process...")
# Detect CUDA version
cuda_version = get_cuda_version()
if cuda_version:
print(f"Detected CUDA version: {cuda_version[0]}.{cuda_version[1]}")
else:
print("Could not detect CUDA version")
sys.exit(1)
# Get compatible version code for installation
cuda_version_code = get_compatible_cuda_version(cuda_version)
if not cuda_version_code:
sys.exit(1)
print(f"Using CUDA version code {cuda_version_code} for installation")
# Set up CUDA environment variables
# set_cuda_environment()
steps = [
(lambda: install_dependencies(cuda_version_code), "Installing dependencies"),
(lambda: install_cuda(cuda_version_code), "Installing CUDA"),
(lambda: install_bitsandbytes(cuda_version_code), "Installing bitsandbytes"),
(verify_installation, "Verifying installation")
]
for step_func, description in steps:
print(f"\n=== {description} ===")
result = step_func()
if description == "Installing bitsandbytes":
success, paths = result
if success:
installation_paths = paths
else:
print(f"\nAttempting fallback pip installation...")
if try_pip_install_bitsandbytes(cuda_version_code):
print("Fallback installation successful!")
continue
elif not result:
print(f"\nError during {description}. Installation failed.")
sys.exit(1)
if installation_paths:
# Save environment configuration with actual paths from installation
save_environment_config(**installation_paths)
else:
print("Warning: No installation paths captured, using default values")
# save_environment_config(cuda_version_code=cuda_version_code)
verify_bitsandbytes_installation()
# save_environment_config()
# setup_environment()
print("\nInstallation completed successfully!")
def verify_bitsandbytes_installation():
"""Search for libbitsandbytes_cuda121.so and update the environment."""
file_name = "libbitsandbytes_cuda121.so"
search_paths = ["/usr/local/lib", "/usr/lib", "/opt/conda/lib"]
for base_path in search_paths:
base_path = Path(base_path)
if base_path.exists():
# Look for the file in the directory tree
matching_files = list(base_path.rglob(file_name))
if matching_files:
bnb_path = matching_files[0].parent # Get the parent directory of the file
print(f"Found {file_name} in: {bnb_path}")
os.environ["LD_LIBRARY_PATH"] = os.environ.get("LD_LIBRARY_PATH", "") + f":{bnb_path}"
print(f"Updated LD_LIBRARY_PATH: {os.environ['LD_LIBRARY_PATH']}")
return
print(f"Error: {file_name} not found in any of the search paths. Ensure bitsandbytes is correctly installed.")
sys.exit(1)
if __name__ == "__main__":
setup_install_bnb()
The text was updated successfully, but these errors were encountered:
Solved the issue by adding this in my python script os.environ['LD_LIBRARY_PATH'] = '/usr/local/cuda/lib64'
os.environ['CUDA_HOME'] = '/usr/local/cuda', then start installing bitsandbytes
Hi everyone,
I'm trying to install bitsandbytes on SageMaker but running into some issues. The instance has CUDA 12.4 installed, and I'm trying to install bitsandbytes with CUDA 12.1 compatibility (since it's more stable for bitsandbytes).
Steps are:
The LD_LIBRARY_PATH seems correct:
/root/local/cuda-12.1/lib64:/lib/x86_64-linux-gnu:/opt/conda/lib:/usr/local/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/home/.openmpi/lib/
However, I'm getting errors that suggest the CUDA libraries aren't being found:
The final error is:
ModuleNotFoundError: No module named 'bitsandbytes'
Has anyone successfully installed bitsandbytes on SageMaker? Any suggestions on what I might be missing?
Thank you!
Full Code:
The text was updated successfully, but these errors were encountered: