You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Thank you so much for sharing the llama3.2 vision model fine-tuning script so fast!
I got the following error when running the demo
The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.
Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 11.36it/s]
trainable params: 31,416,320 || all params: 10,674,357,795 || trainable%: 0.2943
/home/jma/anaconda3/lib/python3.12/site-packages/torch/cuda/__init__.py:128: UserWarning: CUDA initialization: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero. (Triggered internally at /opt/conda/conda-bld/pytorch_1720538439675/work/c10/cuda/CUDAFunctions.cpp:108.)
return torch._C._cuda_getDeviceCount() > 0
/home/jma/anaconda3/lib/python3.12/site-packages/transformers/optimization.py:591: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
warnings.warn(
0%| | 0/2144 [00:00<?, ?it/s]Traceback (most recent call last):
File "/home/jma/Documents/llama3/finetune_demo.py", line 86, in <module>
trainer.train()
File "/home/jma/anaconda3/lib/python3.12/site-packages/transformers/trainer.py", line 2043, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/home/jma/anaconda3/lib/python3.12/site-packages/transformers/trainer.py", line 2345, in _inner_training_loop
for step, inputs in enumerate(epoch_iterator):
File "/home/jma/anaconda3/lib/python3.12/site-packages/accelerate/data_loader.py", line 550, in __iter__
current_batch = next(dataloader_iter)
^^^^^^^^^^^^^^^^^^^^^
File "/home/jma/anaconda3/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 630, in __next__
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "/home/jma/anaconda3/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 673, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jma/anaconda3/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
return self.collate_fn(data)
^^^^^^^^^^^^^^^^^^^^^
File "/home/jma/Documents/llama3/finetune_demo.py", line 47, in process
batch = processor(text=texts, images=images, return_tensors="pt", padding=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jma/anaconda3/lib/python3.12/site-packages/transformers/models/mllama/processing_mllama.py", line 308, in __call__
raise ValueError(
ValueError: The number of images in each batch [1, 1] should be the same [1, 1] should be the same. Yes, the model does not support having a different number of images per batch.
0%| | 0/2144 [00:00<?, ?it/s]
I just simply copied the code in a python script:
from transformers import MllamaForConditionalGeneration, AutoProcessor, BitsAndBytesConfig, MllamaProcessor
from peft import LoraConfig, get_peft_model
import torch
from transformers import Trainer
from datasets import load_dataset
from transformers import TrainingArguments
ds = load_dataset("merve/vqav2-small", split="validation[:10%]")
ckpt = "meta-llama/Llama-3.2-11B-Vision"
USE_LORA = True
# if full fine-tune, you can opt to freeze image part
if USE_LORA:
lora_config = LoraConfig(
r=8,
lora_alpha=8,
lora_dropout=0.1,
target_modules=['down_proj','o_proj','k_proj','q_proj','gate_proj','up_proj','v_proj'],
use_dora=True, # optional DoRA
init_lora_weights="gaussian"
)
model = MllamaForConditionalGeneration.from_pretrained(
ckpt,
torch_dtype=torch.bfloat16,
device_map="auto"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
else:
model = MllamaForConditionalGeneration.from_pretrained(ckpt,
torch_dtype=torch.bfloat16, device_map="auto")
# freeze vision model to save up on compute
for param in model.vision_model.parameters():
param.requires_grad = False
processor = AutoProcessor.from_pretrained(ckpt)
def process(examples):
texts = [f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n<|image|>{example['question']} Answer briefly. <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{example['multiple_choice_answer']}<|eot_id|>" for example in examples]
images = [[example["image"].convert("RGB")] for example in examples]
batch = processor(text=texts, images=images, return_tensors="pt", padding=True)
labels = batch["input_ids"].clone()
labels[labels == processor.tokenizer.pad_token_id] = -100
labels[labels == 128256] = -100 # image token index
batch["labels"] = labels
batch = batch.to(torch.bfloat16).to("cuda")
return batch
args=TrainingArguments(
num_train_epochs=2,
remove_unused_columns=False,
per_device_train_batch_size=1,
#gradient_accumulation_steps=4,
#warmup_steps=2,
learning_rate=2e-5,
weight_decay=1e-6,
adam_beta2=0.999,
logging_steps=250,
save_strategy="no",
optim="adamw_hf",
push_to_hub=True,
save_total_limit=1,
bf16=True,
output_dir="./lora",
dataloader_pin_memory=False,
gradient_checkpointing=True
)
trainer = Trainer(
model=model,
train_dataset=ds,
data_collator=process,
args=args
)
trainer.train()
Any comments on solving this issue are highly appreciated.
The text was updated successfully, but these errors were encountered:
Dear all,
Thank you so much for sharing the llama3.2 vision model fine-tuning script so fast!
I got the following error when running the demo
I just simply copied the code in a python script:
Any comments on solving this issue are highly appreciated.
The text was updated successfully, but these errors were encountered: