-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
156 lines (110 loc) · 4.57 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# etips
#
# Copyright (c) Siemens AG, 2020
# Authors:
# Zhiliang Wu <[email protected]>
# License-Identifier: MIT
import random
import gzip
import pickle
from pathlib import Path
import numpy as np
import tensorflow as tf
class InvokeTimes(object):
"""a helper class to keep the times of invocation of a funciton
"""
def __init__(self, init=0):
self.number = init
def add_one(self):
self.number += 1
def fix_random_seed(n=0):
"""fix the random seed to facilitate reproducibility
Args:
n (int): the random seed
Returns:
None. The randomness of the program is away.
"""
random.seed(n)
np.random.seed(n)
tf.random.set_random_seed(n)
def convert_to_onehot(target, n_classes=None):
"""convert categorical integers into onehot array
Args:
target(np.ndarray): containing categorical integers, like 1,...9
n_classes(int): contains the information of possible classes
Returns:
onehot_target(np.ndarray): onehot encoded array, shape of `(taget.size, #categories)`
"""
if n_classes is None:
onehot_target = np.zeros((target.size, int(target.max())+1))
else:
onehot_target = np.zeros((target.size, n_classes))
onehot_target[np.arange(target.size), target.astype(int)] = 1
return onehot_target
def load_counting_data(fp=Path('./data/'), fn='Dataset_10k.pickle'):
"""load and preprocess the dataset
Args:
fp(pathlib.PosixPath): Path of the dataset
fn(str): name of the dataset
Returns:
x_data, y_target(np.ndarray): shape of (#samples, #timesteps, #features): (10,000, ?, 784)
"""
with gzip.open(fp / fn, 'rb') as f:
data = pickle.load(f)
tensor, target, sequences = data
y_target = convert_to_onehot(target=target)
x_data = tensor[:, :, 1:]
x_data /= 255
return x_data, y_target
def load_mnist_data():
"""load and preprocess the mnist dataset
Returns:
x_data, y_target(np.ndarray): shape of (#samples, #timesteps, #features): (70,000, 28, 28)
"""
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_data = np.concatenate([x_train, x_test], axis=0)
x_data = x_data.astype('float') / 255
y_target = np.concatenate([y_train, y_test], axis=0)
y_target = convert_to_onehot(y_target)
return x_data, y_target
def load_bandit_data(fp=Path('./data/'), fn='Bandit_1.pickle'):
""""
Returns:
x(np.ndarray): context like sequence of images, shape would be (#samples, #timesteps, #features)
y(np.ndarray): ground-truth label for the context x, one-hot encoded, (#samples, #classes)
a(np.ndarray): action taken according to some policy, one-hot encoded, (#samples, #classes)
scores(np.ndarray): generalized propensity score for the observed action, (#samples, )
delta(np.ndarray): loss of the correspnding action, 0 is no loss (correct) while 1 is high loss (wrong action),
(#samples, )
"""
with gzip.open(fp / fn, 'rb') as f:
data = pickle.load(f)
x, y, actions, scores, delta = data
a = convert_to_onehot(actions)
return x, y, a, scores, delta
def load_ebandit_data(fp=Path('./data/'), fn='eBandit_1.pickle'):
""""
Returns:
x(np.ndarray): context like sequence of images, shape would be (#samples, #timesteps, #features)
y(np.ndarray): ground-truth label for the context x, one-hot encoded, (#samples, #classes)
a(np.ndarray): action taken according to some policy, one-hot encoded, (#samples, #classes)
scores(np.ndarray): generalized propensity score for the observed action, (#samples, )
delta(np.ndarray): loss of the correspnding action, 0 is no loss (correct) while 1 is high loss (wrong action),
(#samples, )
scores_hat(np.ndarray): estimated propensity score for the observed action, (#samples, )
baseline(np.ndarray): estimated baseline for the observed context, (#samples, )
"""
with gzip.open(fp / fn, 'rb') as f:
data = pickle.load(f)
x, y, actions, delta, scores_hat = data
a = convert_to_onehot(actions)
return x, y, a, delta, scores_hat
if __name__ == '__main__':
# x, y = load_counting_data(fp=Path('./data/'), fn='Dataset_10k.pickle')
# x, y = load_mnist_data()
# x, y, a, s, d = load_bandit_data(fp=Path('./data/'), fn='Bandit_1.pickle')
x, y, a, d, s_hat = load_ebandit_data(fp=Path('../Dataset/'),
fn='eBandit_1.pickle')