1212from tqdm import tqdm
1313from pathlib import Path
1414from loguru import logger
15- from cryptography .fernet import Fernet
1615from qlib .utils import exists_qlib_data
1716
1817
1918class GetData :
20- REMOTE_URL = "https://qlibpublic.blob.core.windows.net/data/default/stock_data"
21- # "?" is not included in the token.
22- TOKEN = b"gAAAAABkmDhojHc0VSCDdNK1MqmRzNLeDFXe5hy8obHpa6SDQh4de6nW5gtzuD-fa6O_WZb0yyqYOL7ndOfJX_751W3xN5YB4-n-P22jK-t6ucoZqhT70KPD0Lf0_P328QPJVZ1gDnjIdjhi2YLOcP4BFTHLNYO0mvzszR8TKm9iT5AKRvuysWnpi8bbYwGU9zAcJK3x9EPL43hOGtxliFHcPNGMBoJW4g_ercdhi0-Qgv5_JLsV-29_MV-_AhuaYvJuN2dEywBy"
23- KEY = "EYcA8cgorA8X9OhyMwVfuFxn_1W3jGk6jCbs3L2oPoA="
19+ REMOTE_URL = "https://github.com/SunsetWolf/qlib_dataset/releases/download"
2420
2521 def __init__ (self , delete_zip_file = False ):
2622 """
@@ -33,9 +29,45 @@ def __init__(self, delete_zip_file=False):
3329 self .delete_zip_file = delete_zip_file
3430
3531 def merge_remote_url (self , file_name : str ):
36- fernet = Fernet (self .KEY )
37- token = fernet .decrypt (self .TOKEN ).decode ()
38- return f"{ self .REMOTE_URL } /{ file_name } ?{ token } "
32+ """
33+ Generate download links.
34+
35+ Parameters
36+ ----------
37+ file_name: str
38+ The name of the file to be downloaded.
39+ The file name can be accompanied by a version number, (e.g.: v2/qlib_data_simple_cn_1d_latest.zip),
40+ if no version number is attached, it will be downloaded from v0 by default.
41+ """
42+ return f"{ self .REMOTE_URL } /{ file_name } " if "/" in file_name else f"{ self .REMOTE_URL } /v0/{ file_name } "
43+
44+ def download (self , url : str , target_path : [Path , str ]):
45+ """
46+ Download a file from the specified url.
47+
48+ Parameters
49+ ----------
50+ url: str
51+ The url of the data.
52+ target_path: str
53+ The location where the data is saved, including the file name.
54+ """
55+ file_name = str (target_path ).rsplit ("/" , maxsplit = 1 )[- 1 ]
56+ resp = requests .get (url , stream = True , timeout = 60 )
57+ resp .raise_for_status ()
58+ if resp .status_code != 200 :
59+ raise requests .exceptions .HTTPError ()
60+
61+ chunk_size = 1024
62+ logger .warning (
63+ f"The data for the example is collected from Yahoo Finance. Please be aware that the quality of the data might not be perfect. (You can refer to the original data source: https://finance.yahoo.com/lookup.)"
64+ )
65+ logger .info (f"{ os .path .basename (file_name )} downloading......" )
66+ with tqdm (total = int (resp .headers .get ("Content-Length" , 0 ))) as p_bar :
67+ with target_path .open ("wb" ) as fp :
68+ for chunk in resp .iter_content (chunk_size = chunk_size ):
69+ fp .write (chunk )
70+ p_bar .update (chunk_size )
3971
4072 def download_data (self , file_name : str , target_dir : [Path , str ], delete_old : bool = True ):
4173 """
@@ -70,21 +102,7 @@ def download_data(self, file_name: str, target_dir: [Path, str], delete_old: boo
70102 target_path = target_dir .joinpath (_target_file_name )
71103
72104 url = self .merge_remote_url (file_name )
73- resp = requests .get (url , stream = True , timeout = 60 )
74- resp .raise_for_status ()
75- if resp .status_code != 200 :
76- raise requests .exceptions .HTTPError ()
77-
78- chunk_size = 1024
79- logger .warning (
80- f"The data for the example is collected from Yahoo Finance. Please be aware that the quality of the data might not be perfect. (You can refer to the original data source: https://finance.yahoo.com/lookup.)"
81- )
82- logger .info (f"{ os .path .basename (file_name )} downloading......" )
83- with tqdm (total = int (resp .headers .get ("Content-Length" , 0 ))) as p_bar :
84- with target_path .open ("wb" ) as fp :
85- for chunk in resp .iter_content (chunk_size = chunk_size ):
86- fp .write (chunk )
87- p_bar .update (chunk_size )
105+ self .download (url = url , target_path = target_path )
88106
89107 self ._unzip (target_path , target_dir , delete_old )
90108 if self .delete_zip_file :
@@ -99,7 +117,9 @@ def check_dataset(self, file_name: str):
99117 return status
100118
101119 @staticmethod
102- def _unzip (file_path : Path , target_dir : Path , delete_old : bool = True ):
120+ def _unzip (file_path : [Path , str ], target_dir : [Path , str ], delete_old : bool = True ):
121+ file_path = Path (file_path )
122+ target_dir = Path (target_dir )
103123 if delete_old :
104124 logger .warning (
105125 f"will delete the old qlib data directory(features, instruments, calendars, features_cache, dataset_cache): { target_dir } "
0 commit comments