Skip to content

Commit cdf10fe

Browse files
authored
Merge pull request #43 from MengYue-MK2000/main
更新Windows下载Datasets的方法
2 parents 2601c45 + b1ac936 commit cdf10fe

File tree

2 files changed

+37
-2
lines changed

2 files changed

+37
-2
lines changed

docs/chapter5/code/download_dataset.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ export HF_ENDPOINT=https://hf-mirror.com
66
# dataset dir 下载到本地目录
77
dataset_dir="your local dataset dir"
88

9-
# 下载预训练数据集
9+
# 下载预训练数据集, 需要预先安装modelscope,使用pip3 install modelscope安装
1010
modelscope download --dataset ddzhu123/seq-monkey mobvoi_seq_monkey_general_open_corpus.jsonl.tar.bz2 --local_dir ${dataset_dir}
1111

1212
# 解压预训练数据集
@@ -17,4 +17,4 @@ huggingface-cli download \
1717
--repo-type dataset \
1818
--resume-download \
1919
BelleGroup/train_3.5M_CN \
20-
--local-dir "${dataset_dir}/BelleGroup"
20+
--local-dir "${dataset_dir}/BelleGroup"
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Windows下载方式
2+
# 使用PowerShell下载
3+
# 暂时为当前PowerShell界面设置环境,关闭Powershell环境自动消失
4+
$env:HF_ENDPOINT = "https://hf-mirror.com"
5+
6+
# 将\path\to\your\dataset替换成想要下载dataset目录地址
7+
$dataset_dir = "\path\to\your\dataset"
8+
9+
# 需要预先安装modelscope,使用pip install modelscope安装
10+
modelscope download --dataset ddzhu123/seq-monkey mobvoi_seq_monkey_general_open_corpus.jsonl.tar.bz2 --local_dir "$dataset_dir"
11+
12+
tar -xvf "$dataset_dir\mobvoi_seq_monkey_general_open_corpus.jsonl.tar.bz2" -C "$dataset_dir"
13+
14+
huggingface-cli download `
15+
--repo-type dataset `
16+
--resume-download `
17+
BelleGroup/train_3.5M_CN `
18+
--local-dir "$dataset_dir\BelleGroup"
19+
20+
# 使用CMD下载
21+
# 暂时为当前CMD界面设置环境,关闭CMD环境自动消失
22+
set HF_ENDPOINT=https://hf-mirror.com
23+
24+
# 将\path\to\your\dataset替换成想要下载dataset目录地址
25+
set dataset_dir=\path\to\your\dataset
26+
27+
modelscope download --dataset ddzhu123/seq-monkey mobvoi_seq_monkey_general_open_corpus.jsonl.tar.bz2 --local_dir %dataset_dir%
28+
29+
tar -xvf "%dataset_dir%\mobvoi_seq_monkey_general_open_corpus.jsonl.tar.bz2" -C "%dataset_dir%"
30+
31+
huggingface-cli download ^
32+
--repo-type dataset ^
33+
--resume-download ^
34+
BelleGroup/train_3.5M_CN ^
35+
--local-dir "%dataset_dir%\BelleGroup"

0 commit comments

Comments
 (0)