From 32605b48c7b86bf04a467eb6b94d5ce7fbf8f9ea Mon Sep 17 00:00:00 2001 From: Guillaume Raille Date: Thu, 1 Aug 2024 13:14:07 +0200 Subject: [PATCH 1/2] path check_nvidia to support CUDA_VISIBLE_DEVICES --- unsloth/models/_utils.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index fe3aa904..79791b3f 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -933,15 +933,25 @@ def patch_llama_rope_scaling( def check_nvidia(): - # Unsloth doesn't work yet on AMD devices - we're working on it! + cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES') + output = np.array([0,]) try: - output = subprocess.check_output("nvidia-smi --query-gpu=memory.used --format=csv", shell = True) - output = re.findall(rb'([\d]{1,})[\s]{1,}M', output) - output = np.array([int(x.decode('utf-8'))/1024 for x in output]) - except: + if cuda_visible_devices is not None: + gpu_ids = cuda_visible_devices.split(',') + query_gpus = ','.join(gpu_ids) + command = f"nvidia-smi --query-gpu=index,memory.used --format=csv -i {query_gpus}" + else: + command = "nvidia-smi --query-gpu=index,memory.used --format=csv" + + output = subprocess.check_output(command, shell=True) + output = re.findall(rb'(\d+),\s*(\d+)\s*MiB', output) + output = np.array([int(x[1].decode('utf-8'))/1024 for x in output]) + except subprocess.CalledProcessError: if not torch.cuda.is_available(): - raise RuntimeError("Unsloth: We do not support AMD / Intel machines yet - it is a work in progress!") + raise RuntimeError("Unsloth: We do not support AMD / Intel machines yet - it is a work in progress!") + raise + return output pass PRE_CHECK = check_nvidia() From 282011163ae9d18b2ce8fe33a3f47f3cf3fa5574 Mon Sep 17 00:00:00 2001 From: Guillaume Raille Date: Thu, 1 Aug 2024 13:51:58 +0200 Subject: [PATCH 2/2] remove try except to see real error here --- unsloth/models/llama.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index e6c9280b..9b742560 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -1410,14 +1410,14 @@ def from_pretrained( # Patch Trainer from transformers.trainer import Trainer - try: - if Trainer._inner_training_loop.__name__ != "_fast_inner_training_loop": - inner_training_loop = inspect.getsource(Trainer._inner_training_loop) - Trainer._original_training_loop = inner_training_loop - else: - inner_training_loop = Trainer._original_training_loop - except: - raise RuntimeError('Unsloth currently does not support multi GPU setups - but we are working on it!') + # try: + if Trainer._inner_training_loop.__name__ != "_fast_inner_training_loop": + inner_training_loop = inspect.getsource(Trainer._inner_training_loop) + Trainer._original_training_loop = inner_training_loop + else: + inner_training_loop = Trainer._original_training_loop + # except: + # raise RuntimeError('Unsloth currently does not support multi GPU setups - but we are working on it!') pass if ((post_check - pre_check) >= 1).sum() > 1: