Skip to content

Commit

Permalink
[usability] accelerate support initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
wheresmyhair committed Feb 22, 2025
1 parent 65b9125 commit beba6ef
Show file tree
Hide file tree
Showing 33 changed files with 158 additions and 148 deletions.
1 change: 0 additions & 1 deletion configs/iterative_dpo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ preprocessing_num_workers: 16
output_dir: ./output_models/iterative_dpo
run_name: iterative_dpo
random_seed: 42
use_accelerator: True
enable_distributed_inference: True
distributed_inference_num_instances: 8
initial_iter_idx: 0 # 0 refers to the first dataset in dataset_path_list
Expand Down
2 changes: 1 addition & 1 deletion examples/benchmarking.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def main():
dataset_name = benchmarking_args.dataset_name
# metric = pipeline_args.metric
if is_lmflow_local_benchmarking(dataset_name): # TODO (@Jipeng)
model = AutoModel.get_model(model_args, tune_strategy='none', ds_config=ds_config)
model = AutoModel.get_model(model_args, do_train=False, ds_config=ds_config)
run_lmflow_local_benchmarking(dataset_name,pipeline_name,model_args,pipeline_args,model) # Pass args TODO (@Jipeng)
elif is_lm_evaluation_benchmarking(dataset_name):
model = model_args.model_name_or_path
Expand Down
3 changes: 1 addition & 2 deletions examples/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,9 @@ def main():

model = AutoModel.get_model(
model_args,
tune_strategy='none',
do_train=False,
ds_config=ds_config,
device=pipeline_args.device,
use_accelerator=True,
)

# We don't need input data, we will read interactively from stdin
Expand Down
2 changes: 1 addition & 1 deletion examples/chatbot_gradio.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class ChatbotArguments:

model = AutoModel.get_model(
model_args,
tune_strategy='none',
do_train=False,
ds_config=ds_config,
device=pipeline_args.device,
torch_dtype=torch.float16
Expand Down
3 changes: 1 addition & 2 deletions examples/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,8 @@

model = AutoModel.get_model(
model_args,
tune_strategy='none',
do_train=False,
ds_config=ds_config,
use_accelerator=pipeline_args.use_accelerator_for_evaluator
)
dataset = Dataset(data_args)

Expand Down
2 changes: 1 addition & 1 deletion examples/finetune_multi_modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def main():
# do not resiger deepspeed in the model.
# with_deepspeed flag may be removed
# by modifying the tune strategy in the future.
model = AutoModel.get_model(model_args, tune_strategy='none',
model = AutoModel.get_model(model_args, do_train=True,
ds_config=pipeline_args.deepspeed,
custom_model=True,
with_deepspeed=False,
Expand Down
3 changes: 1 addition & 2 deletions examples/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,9 @@ def main():

model = AutoModel.get_model(
model_args,
tune_strategy='none',
do_train=False,
ds_config=ds_config,
device=pipeline_args.device,
use_accelerator=True,
)

# We don't need input data, we will read interactively from stdin
Expand Down
2 changes: 1 addition & 1 deletion examples/merge_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def main():
model_args.use_lora = True
model = AutoModel.get_model(
model_args,
tune_strategy='none',
do_train=False,
device=merge_lora_args.device,
ds_config=merge_lora_args.ds_config
)
Expand Down
2 changes: 1 addition & 1 deletion examples/rm_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def main():
model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses()

dataset = Dataset(data_args)
model = AutoModel.get_model(model_args, tune_strategy='none', use_accelerator=pipeline_args.use_accelerator)
model = AutoModel.get_model(model_args, do_train=False)
inferencer = AutoPipeline.get_pipeline(
pipeline_name=pipeline_name,
model_args=model_args,
Expand Down
2 changes: 1 addition & 1 deletion examples/vis_chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def main():
ds_config = json.load(f)
model = AutoModel.get_model(
model_args,
tune_strategy='none',
do_train=False,
ds_config=ds_config,
device=pipeline_args.device,
custom_model=model_args.custom_model,
Expand Down
2 changes: 1 addition & 1 deletion examples/vis_chatbot_gradio.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def start_inferencer(

model = AutoModel.get_model(
model_args,
tune_strategy='none',
do_train=False,
ds_config=ds_config,
device=pipeline_args.device,
custom_model=model_args.custom_model,
Expand Down
2 changes: 1 addition & 1 deletion examples/vllm_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def main():
model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses()

dataset = Dataset(data_args)
model = AutoModel.get_model(model_args, tune_strategy='none')
model = AutoModel.get_model(model_args, do_train=False)
inferencer = AutoPipeline.get_pipeline(
pipeline_name=pipeline_name,
model_args=model_args,
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ tokenizers>=0.13.3
peft>=0.10.0
torch>=2.0.1
wandb
deepspeed>=0.14.4
sentencepiece
transformers>=4.31.0
cpm_kernels==1.0.11
Expand Down
1 change: 0 additions & 1 deletion scripts/run_chatbot.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ accelerate launch --config_file configs/accelerator_multigpu_config.yaml \
examples/chatbot.py \
--deepspeed configs/ds_config_chatbot.json \
--model_name_or_path ${model} \
--use_accelerator True \
--max_new_tokens 256 \
--temperature 1.0 \
--end_string "#" \
Expand Down
1 change: 0 additions & 1 deletion scripts/run_evaluation_accelerator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,4 @@ CUDA_VISIBLE_DEVICES=0 accelerate launch --config_file configs/accelerator_singl
--metric accuracy \
--output_dir output_dir/accelerator_1_card \
--inference_batch_size_per_device 1 \
--use_accelerator_for_evaluator True \
--torch_dtype bfloat16
1 change: 0 additions & 1 deletion scripts/run_inference.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ accelerate launch --config_file configs/accelerator_multigpu_config.yaml \
examples/inference.py \
--deepspeed configs/ds_config_chatbot.json \
--model_name_or_path ${model} \
--use_accelerator True \
--max_new_tokens 256 \
--temperature 1.0 \
${lora_args}
1 change: 0 additions & 1 deletion scripts/run_rm_inference.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ accelerate launch --config_file configs/accelerator_multigpu_config.yaml \
--trust_remote_code ${trust_remote_code} \
--model_name_or_path ${model_name_or_path} \
--arch_type text_regression \
--use_accelerator True \
--block_size 4096 \
--inference_batch_size 16 \
--dataset_path ${dataset_path} \
Expand Down
2 changes: 1 addition & 1 deletion service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class AppArguments:
local_rank = int(os.getenv("LOCAL_RANK", "0"))
world_size = int(os.getenv("WORLD_SIZE", "1"))
torch.cuda.set_device(local_rank)
model = AutoModel.get_model(model_args, tune_strategy='none', ds_config=ds_config, use_accelerator=True)
model = AutoModel.get_model(model_args, do_train=False, ds_config=ds_config)
accelerator = Accelerator()

def stream_generate(inputs,context_len = 1024, max_new_tokens=128, end_string="##"):
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
"gradio": ["gradio"],
"flask": ["flask", "flask_cors"],
"flash_attn": ["flash-attn>=2.0.2"],
"trl": ["trl==0.8.0"]
"trl": ["trl==0.8.0"],
"deepspeed": ["deepspeed>=0.14.4"],
}

readme_path = os.path.join(folder, "README.md")
Expand Down
23 changes: 19 additions & 4 deletions src/lmflow/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -924,8 +924,8 @@ class EvaluatorArguments:
),
},
)
use_accelerator_for_evaluator: bool = field(
default=False, metadata={"help": "Whether to use Huggingface Accelerator instead of Deepspeed"},
use_accelerator_for_evaluator: Optional[bool] = field(
default=None, metadata={"help": "[Deprecated] Whether to use Huggingface Accelerator instead of Deepspeed"},
)

temperature: float = field(
Expand All @@ -942,6 +942,14 @@ class EvaluatorArguments:
default=100,
metadata={"help": "Maximum length during inference."},
)

def __post_init__(self):
if self.use_accelerator_for_evaluator is not None:
logger.warning(
"You've specified `use_accelerator_for_evaluator`. This argument is deprecated. "
"It will not take effect and will be removed in a future version, "
"since LMFlow now can automatically detect whether is in Accelerate or Deepspeed environment."
)


@dataclass
Expand Down Expand Up @@ -1061,8 +1069,8 @@ class InferencerArguments:
"help": "whether turn on true random sampling during inference."
},
)
use_accelerator: bool = field(
default=False, metadata={"help": "Whether to use Huggingface Accelerator instead of Deepspeed"},
use_accelerator: Optional[bool] = field(
default=None, metadata={"help": "[Deprecated] Whether to use Huggingface Accelerator instead of Deepspeed"},
)
use_beam_search: Optional[bool] = field(
default=False,
Expand Down Expand Up @@ -1131,6 +1139,13 @@ class InferencerArguments:
)

def __post_init__(self):
if self.use_accelerator is not None:
logger.warning(
"You've specified `use_accelerator`. This argument is deprecated. "
"It will not take effect and will be removed in a future version, "
"since LMFlow now can automatically detect whether is in Accelerate or Deepspeed environment."
)

if self.save_results:
if self.results_path is None:
raise ValueError("Need to specify results_path when save_results is True.")
Expand Down
31 changes: 8 additions & 23 deletions src/lmflow/models/hf_decoder_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,12 @@
"""This is a class called HFDecoderModel which is a wrapper around transformers model and
tokenizer classes. It has several methods such as __init__, tokenize, and train that are
used for training and fine-tuning the model. The __init__ method takes in several arguments
such as model_args, tune_strategy, and ds_config, which are used to load the pretrained
such as model_args which are used to load the pretrained
model and tokenizer, and initialize the training settings.
The tokenize method is used to tokenize the input text and return the input IDs and attention
masks that can be fed to the model for training or inference.
This class supports different tune_strategy options such as 'normal', 'none', 'lora', and
'adapter', which allow for different fine-tuning settings of the model. However, the 'lora'
and 'adapter' strategies are not yet implemented.
Overall, this class provides a convenient interface for loading and fine-tuning transformer
models and can be used for various NLP tasks such as language modeling, text classification,
and question answering.
Expand Down Expand Up @@ -46,6 +42,7 @@
conversation_tokenize_function
)
from lmflow.utils.versioning import is_ray_available, is_vllm_available, is_flash_attn_available
from lmflow.utils.envs import is_accelerate_env


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -74,11 +71,9 @@ class HFDecoderModel(DecoderModel, HFModelMixin, Tunable):
model_args :
Model arguments such as model name, path, revision, etc.
tune_strategy : str or none, default="normal".
A string representing the dataset backend. Defaults to "huggingface".
ds_config :
Deepspeed configuations.
do_train : bool, default True
Determines whether to prepare the model for training, including distribtued env, model placement, quantization,
lora, etc.
args : Optional.
Positional arguments.
Expand All @@ -90,26 +85,16 @@ class HFDecoderModel(DecoderModel, HFModelMixin, Tunable):
def __init__(
self,
model_args,
tune_strategy='normal',
ds_config=None,
do_train=True,
device="gpu",
use_accelerator=False,
*args,
**kwargs
):
"""
Initializes a HFDecoderModel instance.
:param model_args: dictionary with model arguments such as model name, path, revision, etc.
:param tune_strategy: tuning strategy: normal, none, lora or adapter
:param ds_config: deepspeed configuration for distributed training
"""
HFModelMixin.__init__(
self,
model_args=model_args,
do_train=True if tune_strategy == "normal" else False,
ds_config=ds_config,
do_train=do_train,
device=device,
use_accelerator=use_accelerator,
*args,
**kwargs
)
Expand Down Expand Up @@ -384,7 +369,7 @@ def __inference(self, inputs, *args, **kwargs):
The generated sequence output
"""
with torch.no_grad():
if self.use_accelerator:
if is_accelerate_env():
outputs = self.backend_model.generate(
input_ids=inputs,
pad_token_id=self.tokenizer.pad_token_id,
Expand Down
Loading

0 comments on commit beba6ef

Please sign in to comment.