forked from TheBlokeAI/AIScripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfp32_to_fp16.py
31 lines (24 loc) · 882 Bytes
/
fp32_to_fp16.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#
# A simple script for loading a float32 model and saving it as float16
# Does not support bfloat16 but that could be easily added.
#
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import argparse
import os
parser = argparse.ArgumentParser(description='Convert fp32 model to fp16')
parser.add_argument('model_dir', type=str, help='fp32 model folder')
parser.add_argument('output_dir', type=str, help='fp16 output folder')
parser.add_argument('--device', type=str, default="cuda:0", help='device')
args = parser.parse_args()
model_dir = args.model_dir
output_dir = args.output_dir
model = AutoModelForCausalLM.from_pretrained(
model_dir,
torch_dtype=torch.float32,
low_cpu_mem_usage=True,
)
model = model.half()
model.save_pretrained(
output_dir, torch_dtype=torch.float16
)