You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I'm trying to install bitsandbytes on Amazon SageMaker with CUDA 12.4. The installation process hangs during the make step. Here are the details:
Environment:
Platform: Amazon SageMaker
CUDA Version: 12.4.131
GCC Version: GNU 11.4.0
CMake: Successfully configured
Installation Steps Being Executed:
Installing build prerequisites (build-essential, cmake)
Removing any existing bitsandbytes installation
Cloning bitsandbytes repository to /tmp/bitsandbytes_build
Configuring with CMake using -DCOMPUTE_BACKEND=cuda
Attempting to build with make VERBOSE=1
Current Status:
CMake configuration completed successfully with the following capabilities:
CUDA Capabilities Available: 50;52;53;60;61;62;70;72;75;80;86;87;89;90
CUDA NVCC Flags: --use_fast_math
Build files successfully written to /tmp/bitsandbytes_build
Installation hangs at the "Building with Make..." step
No error messages are displayed; the process simply stops responding
Issue:
The installation process successfully completes the CMake configuration but appears to hang indefinitely during the make process. The last log entry shows "Building with Make..." with no further output or error messages.
Questions:
Are there known issues with building on SageMaker with CUDA 12.4?
Are there any specific make flags or configurations needed for SageMaker environments?
Is there a way to get more verbose output during the make process to identify where it's hanging?
Could you provide any guidance on how to proceed or what additional information would be helpful for debugging this issue?
Thank you!
The full code:
import subprocess
import os
import sys
from pathlib import Path
import re
def run_command(command, description=None, timeout=300): # 5 minutes timeout
"""Execute a command and handle potential errors"""
try:
if description:
print(f"\n{description}...")
print(f"Executing command: {command}") # Print the actual command
result = subprocess.run(
command,
shell=True,
check=True,
capture_output=True,
text=True,
timeout=timeout
)
print(result.stdout)
return True
except subprocess.TimeoutExpired:
print(f"Command timed out after {timeout} seconds")
return False
except subprocess.CalledProcessError as e:
print(f"Error executing command: {e.cmd}")
print(f"Error output: {e.stderr}")
return False
except Exception as e:
print(f"Unexpected error: {str(e)}")
return False
def install_bitsandbytes():
"""Install bitsandbytes with verbose build output"""
build_dir = "/tmp/bitsandbytes_build"
try:
# 1. Install build prerequisites
if not run_command("apt-get install -y build-essential cmake", "Installing build prerequisites"):
return False
# 2. Remove any existing installation
run_command("pip uninstall -y bitsandbytes", "Removing existing bitsandbytes")
# Clean build directory
if os.path.exists(build_dir):
run_command(f"rm -rf {build_dir}", "Cleaning up existing build directory")
# 3. Clone repository
if not run_command(
f"git clone https://github.com/bitsandbytes-foundation/bitsandbytes.git {build_dir}",
"Cloning bitsandbytes repository"
):
return False
original_dir = os.getcwd()
os.chdir(build_dir)
try:
# 4. Build using CMake with verbose output
if not run_command("cmake -DCOMPUTE_BACKEND=cuda -S .", "Configuring with CMake"):
return False
# 5. Make with verbose output
if not run_command("make VERBOSE=1", "Building with Make"):
# If verbose make fails, try regular make
print("Retrying with regular make...")
if not run_command("make", "Building with Make (retry)"):
return False
# 6. Install the package
if not run_command("pip install -e .", "Installing bitsandbytes"):
return False
return True
finally:
os.chdir(original_dir)
except Exception as e:
print(f"Error during bitsandbytes installation: {str(e)}")
return False
finally:
if os.path.exists(build_dir):
run_command(f"rm -rf {build_dir}", "Cleaning up build directory")
# Add this new function to check build environment
def check_build_environment():
"""Check the build environment for required components"""
checks = [
("cmake --version", "CMake"),
("gcc --version", "GCC"),
("nvcc --version", "NVIDIA CUDA Compiler"),
("nvidia-smi", "NVIDIA System Management Interface")
]
print("\nChecking build environment:")
for cmd, name in checks:
print(f"\nChecking {name}...")
try:
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.returncode == 0:
print(f"{name} found:")
print(result.stdout.strip())
else:
print(f"{name} check failed:")
print(result.stderr.strip())
except Exception as e:
print(f"Error checking {name}: {str(e)}")
def get_cuda_version():
"""Detect CUDA version from nvidia-smi"""
try:
nvidia_smi = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE,
stderr=subprocess.PIPE, text=True)
if nvidia_smi.returncode == 0:
match = re.search(r'CUDA Version: (\d+)\.(\d+)', nvidia_smi.stdout)
if match:
return (int(match.group(1)), int(match.group(2)))
except Exception as e:
print(f"Error detecting CUDA version: {e}")
return None
def install_bnb_packages():
# Check build environment first
check_build_environment()
# Check CUDA version
cuda_version = get_cuda_version()
if cuda_version:
print(f"Detected CUDA version: {cuda_version[0]}.{cuda_version[1]}")
else:
print("Warning: Could not detect CUDA version")
# Install bitsandbytes
print("\nInstalling bitsandbytes...")
if not install_bitsandbytes():
print("Failed to install bitsandbytes")
sys.exit(1)
# Verify installation
if not verify_installation():
print("Bitsandbytes installation verification failed")
sys.exit(1)
verify_bitsandbytes_installation()
print("\nInstallation completed successfully!")
def verify_installation():
"""Verify bitsandbytes installation"""
checks = [
"import bitsandbytes as bnb",
"print('BitsAndBytes version:', bnb.__version__)",
"print('CUDA available:', bnb.CUDA_AVAILABLE)",
"print('BitsAndBytes path:', bnb.__file__)"
]
verify_cmd = "python -c \"{}\"".format("; ".join(checks))
return run_command(verify_cmd, "Verifying bitsandbytes installation")
def verify_bitsandbytes_installation():
"""Search for libbitsandbytes_cuda121.so and update the environment."""
file_name = "libbitsandbytes_cuda121.so"
search_paths = ["/usr/local/lib", "/usr/lib", "/opt/conda/lib"]
for base_path in search_paths:
base_path = Path(base_path)
if base_path.exists():
# Look for the file in the directory tree
matching_files = list(base_path.rglob(file_name))
if matching_files:
bnb_path = matching_files[0].parent # Get the parent directory of the file
print(f"Found {file_name} in: {bnb_path}")
os.environ["LD_LIBRARY_PATH"] = os.environ.get("LD_LIBRARY_PATH", "") + f":{bnb_path}"
print(f"Updated LD_LIBRARY_PATH: {os.environ['LD_LIBRARY_PATH']}")
return
print(f"Error: {file_name} not found in any of the search paths. Ensure bitsandbytes is correctly installed.")
sys.exit(1)
if __name__ == "__main__":
install_bnb_packages()
The text was updated successfully, but these errors were encountered:
Solved the issue by adding this in my python script os.environ['LD_LIBRARY_PATH'] = '/usr/local/cuda/lib64'
os.environ['CUDA_HOME'] = '/usr/local/cuda', then start installing bitsandbytes
Hi everyone,
I'm trying to install bitsandbytes on Amazon SageMaker with CUDA 12.4. The installation process hangs during the make step. Here are the details:
Environment:
Installation Steps Being Executed:
Current Status:
CMake configuration completed successfully with the following capabilities:
CUDA Capabilities Available: 50;52;53;60;61;62;70;72;75;80;86;87;89;90
CUDA NVCC Flags: --use_fast_math
Build files successfully written to /tmp/bitsandbytes_build
Installation hangs at the "Building with Make..." step
No error messages are displayed; the process simply stops responding
Issue:
The installation process successfully completes the CMake configuration but appears to hang indefinitely during the make process. The last log entry shows "Building with Make..." with no further output or error messages.
Questions:
Are there known issues with building on SageMaker with CUDA 12.4?
Are there any specific make flags or configurations needed for SageMaker environments?
Is there a way to get more verbose output during the make process to identify where it's hanging?
Could you provide any guidance on how to proceed or what additional information would be helpful for debugging this issue?
Thank you!
The full code:
The text was updated successfully, but these errors were encountered: