11# Copyright (c) Microsoft Corporation.
22# Licensed under the MIT License.
33
4+ import os
45import re
56import sys
67import qlib
1112from tqdm import tqdm
1213from pathlib import Path
1314from loguru import logger
15+ from cryptography .fernet import Fernet
1416from qlib .utils import exists_qlib_data
1517
1618
1719class GetData :
18- DATASET_VERSION = "v2"
1920 REMOTE_URL = "https://qlibpublic.blob.core.windows.net/data/default/stock_data"
20- QLIB_DATA_NAME = "{dataset_name}_{region}_{interval}_{qlib_version}.zip"
21+ # "?" is not included in the token.
22+ TOKEN = "gAAAAABkmDhojHc0VSCDdNK1MqmRzNLeDFXe5hy8obHpa6SDQh4de6nW5gtzuD-fa6O_WZb0yyqYOL7ndOfJX_751W3xN5YB4-n-P22jK-t6ucoZqhT70KPD0Lf0_P328QPJVZ1gDnjIdjhi2YLOcP4BFTHLNYO0mvzszR8TKm9iT5AKRvuysWnpi8bbYwGU9zAcJK3x9EPL43hOGtxliFHcPNGMBoJW4g_ercdhi0-Qgv5_JLsV-29_MV-_AhuaYvJuN2dEywBy"
23+ KEY = "EYcA8cgorA8X9OhyMwVfuFxn_1W3jGk6jCbs3L2oPoA="
2124
2225 def __init__ (self , delete_zip_file = False ):
2326 """
@@ -29,24 +32,44 @@ def __init__(self, delete_zip_file=False):
2932 """
3033 self .delete_zip_file = delete_zip_file
3134
32- def normalize_dataset_version (self , dataset_version : str = None ):
33- if dataset_version is None :
34- dataset_version = self .DATASET_VERSION
35- return dataset_version
35+ def merge_remote_url (self , file_name : str ):
36+ fernet = Fernet ( self . KEY )
37+ token = fernet . decrypt ( self .TOKEN ). decode ()
38+ return f" { self . REMOTE_URL } / { file_name } ? { token } "
3639
37- def merge_remote_url (self , file_name : str , dataset_version : str = None ):
38- return f"{ self .REMOTE_URL } /{ self .normalize_dataset_version (dataset_version )} /{ file_name } "
40+ def download_data (self , file_name : str , target_dir : [Path , str ], delete_old : bool = True ):
41+ """
42+ Download the specified file to the target folder.
3943
40- def _download_data (
41- self , file_name : str , target_dir : [Path , str ], delete_old : bool = True , dataset_version : str = None
42- ):
44+ Parameters
45+ ----------
46+ target_dir: str
47+ data save directory
48+ file_name: str
49+ dataset name, needs to endwith .zip, value from [rl_data.zip, csv_data_cn.zip, ...]
50+ may contain folder names, for example: v2/qlib_data_simple_cn_1d_latest.zip
51+ delete_old: bool
52+ delete an existing directory, by default True
53+
54+ Examples
55+ ---------
56+ # get rl data
57+ python get_data.py download_data --file_name rl_data.zip --target_dir ~/.qlib/qlib_data/rl_data
58+ When this command is run, the data will be downloaded from this link: https://qlibpublic.blob.core.windows.net/data/default/stock_data/rl_data.zip?{token}
59+
60+ # get cn csv data
61+ python get_data.py download_data --file_name csv_data_cn.zip --target_dir ~/.qlib/csv_data/cn_data
62+ When this command is run, the data will be downloaded from this link: https://qlibpublic.blob.core.windows.net/data/default/stock_data/csv_data_cn.zip?{token}
63+ -------
64+
65+ """
4366 target_dir = Path (target_dir ).expanduser ()
4467 target_dir .mkdir (exist_ok = True , parents = True )
4568 # saved file name
46- _target_file_name = datetime .datetime .now ().strftime ("%Y%m%d%H%M%S" ) + "_" + file_name
69+ _target_file_name = datetime .datetime .now ().strftime ("%Y%m%d%H%M%S" ) + "_" + os . path . basename ( file_name )
4770 target_path = target_dir .joinpath (_target_file_name )
4871
49- url = self .merge_remote_url (file_name , dataset_version )
72+ url = self .merge_remote_url (file_name )
5073 resp = requests .get (url , stream = True , timeout = 60 )
5174 resp .raise_for_status ()
5275 if resp .status_code != 200 :
@@ -56,7 +79,7 @@ def _download_data(
5679 logger .warning (
5780 f"The data for the example is collected from Yahoo Finance. Please be aware that the quality of the data might not be perfect. (You can refer to the original data source: https://finance.yahoo.com/lookup.)"
5881 )
59- logger .info (f"{ file_name } downloading......" )
82+ logger .info (f"{ os . path . basename ( file_name ) } downloading......" )
6083 with tqdm (total = int (resp .headers .get ("Content-Length" , 0 ))) as p_bar :
6184 with target_path .open ("wb" ) as fp :
6285 for chunk in resp .iter_content (chunk_size = chunk_size ):
@@ -67,8 +90,8 @@ def _download_data(
6790 if self .delete_zip_file :
6891 target_path .unlink ()
6992
70- def check_dataset (self , file_name : str , dataset_version : str = None ):
71- url = self .merge_remote_url (file_name , dataset_version )
93+ def check_dataset (self , file_name : str ):
94+ url = self .merge_remote_url (file_name )
7295 resp = requests .get (url , stream = True , timeout = 60 )
7396 status = True
7497 if resp .status_code == 404 :
@@ -140,9 +163,11 @@ def qlib_data(
140163 ---------
141164 # get 1d data
142165 python get_data.py qlib_data --name qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
166+ When this command is run, the data will be downloaded from this link: https://qlibpublic.blob.core.windows.net/data/default/stock_data/v2/qlib_data_cn_1d_latest.zip?{token}
143167
144168 # get 1min data
145169 python get_data.py qlib_data --name qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --interval 1min --region cn
170+ When this command is run, the data will be downloaded from this link: https://qlibpublic.blob.core.windows.net/data/default/stock_data/v2/qlib_data_cn_1min_latest.zip?{token}
146171 -------
147172
148173 """
@@ -155,29 +180,12 @@ def qlib_data(
155180
156181 qlib_version = "." .join (re .findall (r"(\d+)\.+" , qlib .__version__ ))
157182
158- def _get_file_name (v ):
159- return self .QLIB_DATA_NAME .format (
160- dataset_name = name , region = region .lower (), interval = interval .lower (), qlib_version = v
161- )
162-
163- file_name = _get_file_name (qlib_version )
164- if not self .check_dataset (file_name , version ):
165- file_name = _get_file_name ("latest" )
166- self ._download_data (file_name .lower (), target_dir , delete_old , dataset_version = version )
167-
168- def csv_data_cn (self , target_dir = "~/.qlib/csv_data/cn_data" ):
169- """download cn csv data from remote
170-
171- Parameters
172- ----------
173- target_dir: str
174- data save directory
175-
176- Examples
177- ---------
178- python get_data.py csv_data_cn --target_dir ~/.qlib/csv_data/cn_data
179- -------
183+ def _get_file_name_with_version (qlib_version , dataset_version ):
184+ dataset_version = "v2" if dataset_version is None else dataset_version
185+ file_name_with_version = f"{ dataset_version } /{ name } _{ region .lower ()} _{ interval .lower ()} _{ qlib_version } .zip"
186+ return file_name_with_version
180187
181- """
182- file_name = "csv_data_cn.zip"
183- self ._download_data (file_name , target_dir )
188+ file_name = _get_file_name_with_version (qlib_version , dataset_version = version )
189+ if not self .check_dataset (file_name ):
190+ file_name = _get_file_name_with_version ("latest" , dataset_version = version )
191+ self .download_data (file_name .lower (), target_dir , delete_old )
0 commit comments