store configs, minor improvements to pref data mixer (#457)

* store configs * fix * clean up * deletions * rename
allenai · Dec 3, 2024 · 1017c7c · 1017c7c
1 parent 22116c2
commit 1017c7c
Show file tree

Hide file tree

Showing 8 changed files with 119 additions and 2 deletions.
diff --git a/configs/beaker_configs/default_finetune_multinode_olmo1124.yaml b/configs/beaker_configs/default_finetune_multinode_olmo1124.yaml
@@ -66,6 +66,9 @@ tasks:
       - mountPath: /weka/oe-adapt-default
         source:
           weka: oe-adapt-default
+      - mountPath: /weka/oe-training-default
+        source:
+          weka: oe-training-default
     result:
       path: /output
     resources:

diff --git a/configs/train_configs/olmo2/olmo2_1124_13b_dpo.yaml b/configs/train_configs/olmo2/olmo2_1124_13b_dpo.yaml
@@ -0,0 +1,29 @@
+tokenizer_name: allenai/OLMo-2-1124-13B-SFT
+use_flash_attn: true
+gradient_checkpointing: true
+dataset_mixer:
+    allenai/olmo-2-1124-13b-preference-mix: 1.0
+tokenizer_name: allenai/OLMo-2-1124-13B-SFT
+use_slow_tokenizer: true
+max_seq_length: 2048
+preprocessing_num_workers: 16
+per_device_train_batch_size: 1
+# gradient_accumulation_steps: 16 # designed for 8 GPUs, so batch size 128
+gradient_accumulation_steps: 4 # designed for 32 GPUs, so batch size 128
+learning_rate: 5.0e-7
+lr_scheduler_type: linear
+warmup_ratio: 0.1
+weight_decay: 0.0
+num_train_epochs: 1
+output_dir: output/dpo_7b
+with_tracking: true
+report_to:
+  - wandb
+logging_steps: 1
+use_lora: false
+dpo_loss_type: dpo_norm
+dpo_beta: 5
+checkpointing_steps: 1000
+hf_metadata_dataset: allenai/olmo-instruct-evals
+add_bos: true
+seed: 1234
diff --git a/configs/train_configs/olmo2/olmo2_1124_13b_sft.yaml b/configs/train_configs/olmo2/olmo2_1124_13b_sft.yaml
@@ -0,0 +1,26 @@
+model_name_or_path: allenai/OLMo-2-1124-13B
+use_flash_attn: true
+tokenizer_name: allenai/OLMo-2-1124-13B
+use_slow_tokenizer: true
+dataset_mixer:
+    allenai/tulu-3-sft-olmo-2-mixture: 1.0
+
+max_seq_length: 4096
+preprocessing_num_workers: 128
+per_device_train_batch_size: 1 # note, this is set up for 8 GPUs
+gradient_accumulation_steps: 4 # effective batch size 128 with 4 nodes
+learning_rate: 6e-06
+lr_scheduler_type: linear
+warmup_ratio: 0.03
+weight_decay: 0.0
+num_train_epochs: 2
+output_dir: /output/
+with_tracking: true
+report_to:
+  - wandb
+logging_steps: 1
+reduce_loss: sum
+checkpointing_steps: epoch
+dataset_mix_dir: /output/
+hf_metadata_dataset: allenai/olmo-instruct-evals
+add_bos: true
diff --git a/configs/train_configs/olmo2/olmo2_1124_7b_dpo.yaml b/configs/train_configs/olmo2/olmo2_1124_7b_dpo.yaml
@@ -0,0 +1,28 @@
+model_name_or_path: allenai/OLMo-2-1124-7B-SFT
+use_flash_attn: true
+gradient_checkpointing: true
+dataset_mixer:
+    allenai/olmo-2-1124-13b-preference-mix: 1.0
+tokenizer_name: allenai/OLMo-2-1124-7B-SFT
+use_slow_tokenizer: true
+max_seq_length: 2048
+preprocessing_num_workers: 16
+per_device_train_batch_size: 1
+# gradient_accumulation_steps: 16 # designed for 8 GPUs, so batch size 128
+gradient_accumulation_steps: 4 # designed for 32 GPUs, so batch size 128
+learning_rate: 5.0e-7
+lr_scheduler_type: linear
+warmup_ratio: 0.1
+weight_decay: 0.0
+num_train_epochs: 1
+output_dir: output/dpo_7b
+with_tracking: true
+report_to:
+  - wandb
+logging_steps: 1
+use_lora: false
+dpo_loss_type: dpo_norm
+dpo_beta: 5
+checkpointing_steps: 1000
+hf_metadata_dataset: allenai/olmo-instruct-evals
+add_bos: true
diff --git a/configs/train_configs/olmo2/olmo2_1124_7b_sft.yaml b/configs/train_configs/olmo2/olmo2_1124_7b_sft.yaml
@@ -0,0 +1,26 @@
+model_name_or_path: allenai/OLMo-2-1124-7B
+use_flash_attn: true
+tokenizer_name: allenai/OLMo-2-1124-7B
+use_slow_tokenizer: true
+dataset_mixer:
+    allenai/tulu-3-sft-olmo-2-mixture: 1.0
+
+max_seq_length: 4096
+preprocessing_num_workers: 128
+per_device_train_batch_size: 1 # note, this is set up for 8 GPUs
+gradient_accumulation_steps: 4 # effective batch size 128 with 4 nodes
+learning_rate: 1.0e-05
+lr_scheduler_type: linear
+warmup_ratio: 0.03
+weight_decay: 0.0
+num_train_epochs: 2
+output_dir: /output/
+with_tracking: true
+report_to:
+  - wandb
+logging_steps: 1
+reduce_loss: sum
+checkpointing_steps: epoch
+dataset_mix_dir: /output/
+hf_metadata_dataset: allenai/olmo-instruct-evals
+add_bos: true
diff --git a/open_instruct/mix_data_preferences.py b/open_instruct/mix_data_preferences.py
@@ -37,7 +37,7 @@ def main():
         configs=args.dataset_config_name,
         splits=["train"],
         save_data_dir=args.dataset_mix_dir,  # location where dataset is saved as json
-        columns_to_keep=["chosen", "rejected"],
+        columns_to_keep=["chosen", "rejected", "chosen_model", "rejected_model"],
         keep_ids=True,
         add_source_col=True,
     )

diff --git a/open_instruct/utils.py b/open_instruct/utils.py
@@ -320,6 +320,11 @@ def get_datasets(
                 source_col = [ds] * len(dataset)
                 dataset = dataset.add_column("source", source_col)
 
+            # for cols in columns_to_keep, if one is not present, add "None" to the column
+            for col in columns_to_keep:
+                if col not in dataset.column_names:
+                    dataset = dataset.add_column(col, [None] * len(dataset))
+
             # add tag to the dataset corresponding to where it was sourced from, for
             if "train" in split:
                 raw_train_datasets.append(dataset)

diff --git a/scripts/submit_finetune_job.py b/scripts/submit_finetune_job.py
@@ -169,7 +169,7 @@ def parse_args(args):
     # name and description
     exp_name = f"open_instruct_finetune_{model_name}_{now}"[:128]
     d['description'] = exp_name
-    d['tasks'][0]['name'] = exp_name
+    d['tasks'][0]['name'] = exp_name[:128]
 
     # add cluster-specific env vars
     if args.num_nodes > 1: