-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathload_data.py
113 lines (91 loc) · 2.62 KB
/
load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import sys
import json
import os
from urllib.request import URLopener
import tarfile
import pandas as pd
DATA_HOME = "GaitData"
CODE_LIST_FNAME = "code_list.json"
DOWNLOAD_URL = "http://dev.ipol.im/~truong/GaitData.tar.gz"
DOWNLOAD_URL = "https://mycore.core-cloud.net/index.php/s/Yq3QkYO0LAvAsLv/download"
ARCHIVE_FNAME = "GaitData.tar.gz"
def download_data():
"""This function downloads the data, extract them and remove the archive."""
if not os.path.exists(DATA_HOME):
print("Data are missing. Downloading them now...", end="", flush=True)
datafile = URLopener()
datafile.retrieve(DOWNLOAD_URL, ARCHIVE_FNAME)
print("Ok.")
print("Extracting now...", end="", flush=True)
tf = tarfile.open(ARCHIVE_FNAME)
tf.extractall()
print("Ok.")
print("Removing the archive...", end="", flush=True)
os.remove(ARCHIVE_FNAME)
print("Ok.")
def get_filename(code):
"""Returns the filename of the signal file and the metadata file.
Parameters
----------
code : str
Code of the trial ("Patient-Trial").
Returns
-------
str
Filename.
"""
subject_str, trial_str = code.split("-")
subject = int(subject_str)
trial = int(trial_str)
filename = os.path.join(DATA_HOME, code)
assert os.path.exists(
filename + ".csv"), "The code {} cannot be found in the data set.".format(code)
return filename
def load_trial(code):
"""Returns the signal of the trial.
Parameters
----------
code : str
Code of the trial ("Patient-Trial")
Returns
-------
panda array
Signal of the the trial, shape (n_sample, n_dimension).
"""
fname = get_filename(code)
df = pd.read_csv(fname + ".csv", sep=",")
return df
def load_metadata(code):
"""Returns the metadata of the trial.
Parameters
----------
code : str
Code of the trial ("Patient-Trial").
Returns
-------
dict
Metadata dictionary.
"""
fname = get_filename(code)
with open(fname + ".json", "r") as f:
metadata = json.load(f)
return metadata
def get_code_list():
"""Returns the list of all available codes.
Returns
-------
list
List of codes.
"""
with open(CODE_LIST_FNAME, "r") as f:
code_list = json.load(f)
return code_list
if __name__ == "__main__":
download_data()
all_codes = get_code_list()
print("There are {} trials.".format(len(all_codes)))
for code in all_codes:
signal = load_trial(code)
metadata = load_metadata(code)
# Do something.
# ...