-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathminmax_scaler.py
181 lines (143 loc) · 5.51 KB
/
minmax_scaler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import numpy as np
from pycompss.api.constraint import constraint
from pycompss.api.parameter import Depth, Type, COLLECTION_IN, COLLECTION_OUT
from pycompss.api.task import task
from scipy.sparse import csr_matrix, issparse
from dislib.data.array import Array
import dislib as ds
class MinMaxScaler(object):
""" Standardize features by rescaling them to the provided range
Scaling happen independently on each feature by computing the relevant
statistics on the samples in the training set. Minimum and Maximum
values are then stored to be used on later data using the transform method.
Attributes
----------
feature_range : tuple
The desired range of values in the ds-array.
"""
def __init__(self, feature_range=(0, 1)):
self._feature_range = feature_range
self.data_min_ = None
self.data_max_ = None
def fit(self, x):
""" Compute the min and max values for later scaling.
Parameters
----------
x : ds-array, shape=(n_samples, n_features)
Returns
-------
self : MinMaxScaler
"""
self.data_min_ = ds.apply_along_axis(np.min, 0, x)
self.data_max_ = ds.apply_along_axis(np.max, 0, x)
return self
def fit_transform(self, x):
""" Fit to data, then transform it.
Parameters
----------
x : ds-array, shape=(n_samples, n_features)
Returns
-------
x_new : ds-array, shape=(n_samples, n_features)
Scaled data.
"""
return self.fit(x).transform(x)
def transform(self, x):
"""
Scale data.
Parameters
----------
x : ds-array, shape=(n_samples, n_features)
Returns
-------
x_new : ds-array, shape=(n_samples, n_features)
Scaled data.
"""
if self.data_min_ is None or self.data_max_ is None:
raise Exception("Model has not been initialized.")
n_blocks = x._n_blocks[1]
blocks = []
min_blocks = self.data_min_._blocks
max_blocks = self.data_max_._blocks
for row in x._iterator(axis=0):
out_blocks = [None] * n_blocks
_transform(row._blocks, min_blocks, max_blocks, out_blocks,
self._feature_range[0], self._feature_range[1])
blocks.append(out_blocks)
return Array(blocks, top_left_shape=x._top_left_shape,
reg_shape=x._reg_shape, shape=x.shape,
sparse=x._sparse)
def inverse_transform(self, x):
"""
Returns data to its original values. The Scaler should be fitted
before using this function.
Parameters
----------
x : ds-array, shape=(n_samples, n_features)
Returns
-------
x_new : ds-array, shape=(n_samples, n_features)
Original valued data.
"""
if self.data_min_ is None or self.data_max_ is None:
raise Exception("Model has not been initialized.")
n_blocks = x._n_blocks[1]
blocks = []
min_blocks = self.data_min_._blocks
max_blocks = self.data_max_._blocks
for row in x._iterator(axis=0):
out_blocks = [None] * n_blocks
_inverse_transform(row._blocks, min_blocks, max_blocks, out_blocks,
self._feature_range[0], self._feature_range[1])
blocks.append(out_blocks)
return Array(blocks, top_left_shape=x._top_left_shape,
reg_shape=x._reg_shape, shape=x.shape,
sparse=x._sparse)
@constraint(computing_units="${ComputingUnits}")
@task(blocks={Type: COLLECTION_IN, Depth: 2},
min_blocks={Type: COLLECTION_IN, Depth: 2},
max_blocks={Type: COLLECTION_IN, Depth: 2},
out_blocks=COLLECTION_OUT)
def _transform(blocks, min_blocks, max_blocks, out_blocks,
range_min, range_max):
x = Array._merge_blocks(blocks)
min_val = Array._merge_blocks(min_blocks)
max_val = Array._merge_blocks(max_blocks)
sparse = issparse(x)
if sparse:
x = x.toarray()
min_val = min_val.toarray()
max_val = max_val.toarray()
std_x = (x - min_val) / (max_val - min_val)
std_x = np.nan_to_num(std_x)
scaled_x = std_x * (range_max - range_min) + range_min
constructor_func = np.array if not sparse else csr_matrix
start, end = 0, 0
for i, block in enumerate(blocks[0]):
end += block.shape[1]
out_blocks[i] = constructor_func(scaled_x[:, start:end])
start += block.shape[1]
@constraint(computing_units="${ComputingUnits}")
@task(blocks={Type: COLLECTION_IN, Depth: 2},
min_blocks={Type: COLLECTION_IN, Depth: 2},
max_blocks={Type: COLLECTION_IN, Depth: 2},
out_blocks=COLLECTION_OUT)
def _inverse_transform(blocks, min_blocks, max_blocks, out_blocks,
range_min, range_max):
x = Array._merge_blocks(blocks)
min_val = Array._merge_blocks(min_blocks)
max_val = Array._merge_blocks(max_blocks)
sparse = issparse(x)
if sparse:
x = x.toarray()
min_val = min_val.toarray()
max_val = max_val.toarray()
x = (x - range_min) / (range_max - range_min)
x = np.nan_to_num(x, nan=1.0)
x = x * (max_val - min_val) + min_val
constructor_func = np.array if not sparse else csr_matrix
start, end = 0, 0
for i, block in enumerate(blocks[0]):
end += block.shape[1]
out_blocks[i] = constructor_func(x[:, start:end])
start += block.shape[1]