From a3ea93715208c3b22df8647dbeba99c1446b7a62 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Thu, 9 Jan 2025 12:57:50 -0500
Subject: [PATCH] Add `--try_auto_save_to_beaker` arg (#505)

* Add `--try_auto_save_to_beaker` arg

* push changes
---
 open_instruct/dpo_tune.py                 | 9 ++++++---
 open_instruct/dpo_tune_cache.py           | 9 ++++++---
 open_instruct/finetune.py                 | 9 ++++++---
 open_instruct/ppo_vllm_thread_ray.py      | 4 +++-
 open_instruct/ppo_vllm_thread_ray_gtrl.py | 4 +++-
 5 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/open_instruct/dpo_tune.py b/open_instruct/dpo_tune.py
index 92a783cfb..d5364661c 100644
--- a/open_instruct/dpo_tune.py
+++ b/open_instruct/dpo_tune.py
@@ -22,6 +22,7 @@
 import math
 import os
 import random
+import shutil
 import subprocess
 import time
 from copy import deepcopy
@@ -365,6 +366,8 @@ class FlatArguments:
         default=0.001,
         metadata={"help": "Weight for load balancing loss if applicable."},
     )
+    try_auto_save_to_beaker: bool = True
+    """Whether to try to save the model to Beaker dataset `/output` after training"""
     push_to_hub: bool = True
     """Whether to upload the saved model to huggingface"""
     hf_entity: Optional[str] = None
@@ -487,9 +490,6 @@ def main(args: FlatArguments):
 
     if is_beaker_job():
         beaker_config = maybe_get_beaker_config()
-        # try saving to the beaker `/output`, which will be uploaded to the beaker dataset
-        if len(beaker_config.beaker_dataset_id_urls) > 0:
-            args.output_dir = "/output"
 
     accelerator_log_kwargs = {}
 
@@ -1119,6 +1119,9 @@ def load_model():
     if accelerator.is_local_main_process:
         clean_last_n_checkpoints(args.output_dir, keep_last_n_checkpoints=0)
 
+    if args.try_auto_save_to_beaker and accelerator.is_main_process == 0 and len(beaker_config.beaker_dataset_id_urls) > 0 and args.output_dir != "/output":
+        shutil.copytree(args.output_dir, "/output", dirs_exist_ok=True)
+
     if is_beaker_job() and accelerator.is_main_process:
         # dpo script only supports these two options right now for datasets
         if args.dataset_mixer:
diff --git a/open_instruct/dpo_tune_cache.py b/open_instruct/dpo_tune_cache.py
index 73b180e5f..78385cdfd 100644
--- a/open_instruct/dpo_tune_cache.py
+++ b/open_instruct/dpo_tune_cache.py
@@ -22,6 +22,7 @@
 import math
 import os
 import random
+import shutil
 import subprocess
 import time
 from dataclasses import dataclass, field
@@ -375,6 +376,8 @@ class FlatArguments:
     )
     concatenated_forward: bool = True
     """Whether to concatenate chosen and rejected for DPO training; True is good but you can set to False for saving memory."""
+    try_auto_save_to_beaker: bool = True
+    """Whether to try to save the model to Beaker dataset `/output` after training"""
     push_to_hub: bool = True
     """Whether to upload the saved model to huggingface"""
     hf_entity: Optional[str] = None
@@ -501,9 +504,6 @@ def main(args: FlatArguments):
 
     if is_beaker_job():
         beaker_config = maybe_get_beaker_config()
-        # try saving to the beaker `/output`, which will be uploaded to the beaker dataset
-        if len(beaker_config.beaker_dataset_id_urls) > 0:
-            args.output_dir = "/output"
 
     accelerator_log_kwargs = {}
 
@@ -1139,6 +1139,9 @@ def load_model():
     if accelerator.is_local_main_process:
         clean_last_n_checkpoints(args.output_dir, keep_last_n_checkpoints=0)
 
+    if args.try_auto_save_to_beaker and accelerator.is_main_process == 0 and len(beaker_config.beaker_dataset_id_urls) > 0 and args.output_dir != "/output":
+        shutil.copytree(args.output_dir, "/output", dirs_exist_ok=True)
+
     if is_beaker_job() and accelerator.is_main_process:
         # dpo script only supports these two options right now for datasets
         if args.dataset_mixer:
diff --git a/open_instruct/finetune.py b/open_instruct/finetune.py
index 355d103b5..c6a20cbd9 100644
--- a/open_instruct/finetune.py
+++ b/open_instruct/finetune.py
@@ -19,6 +19,7 @@
 import math
 import os
 import random
+import shutil
 import subprocess
 import time
 from dataclasses import dataclass, field
@@ -337,6 +338,8 @@ class FlatArguments:
         default=0.5,
         metadata={"help": "Weight for load balancing loss if applicable."},
     )
+    try_auto_save_to_beaker: bool = True
+    """Whether to try to save the model to Beaker dataset `/output` after training"""
     push_to_hub: bool = True
     """Whether to upload the saved model to huggingface"""
     hf_entity: Optional[str] = None
@@ -471,9 +474,6 @@ def main(args: FlatArguments):
 
     if is_beaker_job():
         beaker_config = maybe_get_beaker_config()
-        # try saving to the beaker `/output`, which will be uploaded to the beaker dataset
-        if len(beaker_config.beaker_dataset_id_urls) > 0:
-            args.output_dir = "/output"
 
     accelerator_log_kwargs = {}
 
@@ -1033,6 +1033,9 @@ def main(args: FlatArguments):
     if accelerator.is_local_main_process:
         clean_last_n_checkpoints(args.output_dir, keep_last_n_checkpoints=0)
 
+    if args.try_auto_save_to_beaker and accelerator.is_main_process == 0 and len(beaker_config.beaker_dataset_id_urls) > 0 and args.output_dir != "/output":
+        shutil.copytree(args.output_dir, "/output", dirs_exist_ok=True)
+
     if is_beaker_job() and accelerator.is_main_process:
         # dpo script only supports these two options right now for datasets
         if args.dataset_mixer:
diff --git a/open_instruct/ppo_vllm_thread_ray.py b/open_instruct/ppo_vllm_thread_ray.py
index e05e05beb..19e0ad192 100644
--- a/open_instruct/ppo_vllm_thread_ray.py
+++ b/open_instruct/ppo_vllm_thread_ray.py
@@ -274,6 +274,8 @@ class Args:
     """Whether to launch beaker evaluation jobs after training"""
     try_launch_beaker_eval_jobs_on_weka: bool = False
     """Whether to launch beaker evaluation jobs after training on weka"""
+    try_auto_save_to_beaker: bool = True
+    """Whether to try to save the model to Beaker dataset `/output` after training"""
     oe_eval_tasks: Optional[List[str]] = None
     """The beaker evaluation tasks to launch"""
     hf_metadata_dataset: Optional[str] = "allenai/tulu-3-evals"
@@ -1300,7 +1302,7 @@ def vllm_generate(
 
         # Ai2 logic: we use /output to store the artifacts of the job, so we
         # make a copy of the model to `/output` in the end.
-        if self.rank == 0 and len(self.beaker_config.beaker_dataset_id_urls) > 0:
+        if args.try_auto_save_to_beaker and self.rank == 0 and len(self.beaker_config.beaker_dataset_id_urls) > 0 and args.output_dir != "/output":
             shutil.copytree(args.output_dir, "/output", dirs_exist_ok=True)
         print("finished training")
 
diff --git a/open_instruct/ppo_vllm_thread_ray_gtrl.py b/open_instruct/ppo_vllm_thread_ray_gtrl.py
index 085133203..0e6626eec 100644
--- a/open_instruct/ppo_vllm_thread_ray_gtrl.py
+++ b/open_instruct/ppo_vllm_thread_ray_gtrl.py
@@ -289,6 +289,8 @@ class Args:
     """Whether to launch beaker evaluation jobs after training"""
     try_launch_beaker_eval_jobs_on_weka: bool = False
     """Whether to launch beaker evaluation jobs after training on weka"""
+    try_auto_save_to_beaker: bool = True
+    """Whether to try to save the model to Beaker dataset `/output` after training"""
     oe_eval_tasks: Optional[List[str]] = None
     """The beaker evaluation tasks to launch"""
     hf_metadata_dataset: Optional[str] = "allenai/tulu-3-evals"
@@ -1375,7 +1377,7 @@ def vllm_generate(
 
         # Ai2 logic: we use /output to store the artifacts of the job, so we
         # make a copy of the model to `/output` in the end.
-        if self.rank == 0 and len(self.beaker_config.beaker_dataset_id_urls) > 0:
+        if args.try_auto_save_to_beaker and self.rank == 0 and len(self.beaker_config.beaker_dataset_id_urls) > 0 and args.output_dir != "/output":
             shutil.copytree(args.output_dir, "/output", dirs_exist_ok=True)
         print("finished training")