From 10786e3c3b844c1138d6a702fc6c8ba1d376c7bb Mon Sep 17 00:00:00 2001 From: itay hubara Date: Fri, 5 Apr 2024 19:46:58 +0300 Subject: [PATCH] fixing DS yaml by adding gradient clipping: 0.3, and small update to README (#726) --- llama2_70b_lora/README.md | 25 ++++++++++++--------- llama2_70b_lora/configs/default_config.yaml | 1 + 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/llama2_70b_lora/README.md b/llama2_70b_lora/README.md index 2caddad2f..bd1d908fe 100644 --- a/llama2_70b_lora/README.md +++ b/llama2_70b_lora/README.md @@ -16,7 +16,7 @@ pip install -r requirements.txt You will also need to run the following to install flash attention: ``` -pip install flash-attn --no-build-isolation +pip install flash-attn==2.1.0 --no-build-isolation ``` > For flash attention, make sure that the following command returns 0: @@ -52,27 +52,30 @@ As defaults the scripts assume the model is under at ```./llama-v2-fused-qkv``` Run: ```bash accelerate launch --config_file configs/default_config.yaml scripts/train.py \ ---model_name meta-llama/Llama-2-70b-hf \ ---dataset_name "tau/scrolls" --dataset_config_name "gov_report" \ +--dataset_path "./dataset" \ +--model_path "/software/users/ihubara/lora_clean/llama-v2-fused-qkv" \ --max_seq_len 8192 \ --bf16 True \ ---logging_steps 1 \ ---eval_steps 22 \ ---output_dir "/tmp/llama-70b" \ +--logging_steps 24 \ +--eval_steps 48 \ +--output_dir "./results/llama-70b_scrolls_gov_report_r16_$1" \ --per_device_train_batch_size 1 \ --gradient_accumulation_steps 1 \ ---dataset_text_field "input" \ --lr_scheduler_type "cosine" \ ---learning_rate 1e-3 \ ---warmup_ratio 0.03 \ +--learning_rate 4e-4 \ +--weight_decay 0.0001 \ +--warmup_ratio 0 \ +--max_grad_norm 0.3 \ --use_gradient_checkpointing True \ +--target_eval_loss 0.925 \ --use_peft_lora True \ --lora_r 16 \ --lora_alpha 32 \ --lora_dropout 0.1 \ ---max_steps 440 \ +--max_steps 1024 \ --use_flash_attn \ ---lora_target_modules "q_proj,v_proj,k_proj,o_proj" +--seed 1234 \ +--lora_target_modules "qkv_proj,o_proj" ``` where the Accelerate config file is [this one](https://github.com/regisss/lora/blob/main/configs/default_config.yaml). diff --git a/llama2_70b_lora/configs/default_config.yaml b/llama2_70b_lora/configs/default_config.yaml index e422c0364..8f542dc4c 100644 --- a/llama2_70b_lora/configs/default_config.yaml +++ b/llama2_70b_lora/configs/default_config.yaml @@ -1,6 +1,7 @@ compute_environment: LOCAL_MACHINE debug: false deepspeed_config: + gradient_clipping: 0.3 gradient_accumulation_steps: 1 offload_optimizer_device: none offload_param_device: none