-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathUndersampling.py
52 lines (33 loc) · 1.53 KB
/
Undersampling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
############################## Undersampling to address data imbalance ######################
# On train datasets.
from imblearn.over_sampling import RandomOverSampler # Important library
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE
from collections import Counter # Important library
rus = RandomUnderSampler(random_state=42)
# Resampling training datasets.
trainarray = np.reshape(new_X,(len(new_X), 224*224*3))
# Use fit_resample.
trainarray_rus,trainlabel_rus = rus.fit_resample(trainarray,new_y)
# reshaping X back to the first dims
new_X = trainarray_rus.reshape(-1,224,224,3)
new_y = trainlabel_rus
###########################################################################################
rus = RandomUnderSampler(random_state=42)
# Resampling valid datasets.
validarray = np.reshape(new_X_valid,(len(new_X_valid), 224*224*3))
# Use fit_resample.
validarray_rus,validlabel_rus = rus.fit_resample(validarray,new_y_valid)
# reshaping X back to the first dims
new_X_valid = validarray_rus.reshape(-1,224,224,3)
new_y_valid = validlabel_rus
###########################################################################################
# On test datasets.
rus = RandomUnderSampler(random_state=42)
# Resampling test datasets.
testarray = np.reshape(new_X_test,(len(new_X_test), 224*224*3))
# Use fit_resample.
testarray_rus,testlabel_rus = rus.fit_resample(testarray,new_y_test)
# reshaping X back to the first dims
new_X_test = testarray_rus.reshape(-1,224,224,3)
new_y_test = testlabel_rus