-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_label_propogation_digits_active_learning.py
88 lines (65 loc) · 3.2 KB
/
plot_label_propogation_digits_active_learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# active learning technique to learn handwritten digits using label propagation.
# We start by training a label propagation model with only 10 labeled points, then we select the top five most uncertain points to label. Next, we train with 15
# labeled points (original 10 + 5 new ones). We repeat this process four times to have a model trained with 30 labeled examples.
# A plot will appear showing the top 5 most uncertain digits for each iteration of training. These may or may not contain mistakes, but we will train the next
# model with their true labels.
print(__doc__)
# Authors: Clay Woolam <[email protected]>
# Licence: BSD
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from sklearn import datasets
from sklearn.semi_supervised import label_propagation
from sklearn.metrics import classification_report, confusion_matrix
digits = datasets.load_digits()
rng = np.random.RandomState(0)
indices = np.arange(len(digits.data))
rng.shuffle(indices)
X = digits.data[indices[:330]]
y = digits.target[indices[:330]]
images = digits.images[indices[:330]]
n_total_samples = len(y)
n_labeled_points = 10
unlabeled_indices = np.arange(n_total_samples)[n_labeled_points:]
f = plt.figure()
for i in range(5):
y_train = np.copy(y)
y_train[unlabeled_indices] = -1
lp_model = label_propagation.LabelSpreading(gamma=0.25, max_iter=5)
lp_model.fit(X, y_train)
predicted_labels = lp_model.transduction_[unlabeled_indices]
true_labels = y[unlabeled_indices]
cm = confusion_matrix(true_labels, predicted_labels,
labels=lp_model.classes_)
print('Iteration %i %s' % (i, 70 * '_'))
print("Label Spreading model: %d labeled & %d unlabeled (%d total)"
% (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))
print(classification_report(true_labels, predicted_labels))
print("Confusion matrix")
print(cm)
# compute the entropies of transduced label distributions
pred_entropies = stats.distributions.entropy(
lp_model.label_distributions_.T)
# select five digit examples that the classifier is most uncertain about
uncertainty_index = uncertainty_index = np.argsort(pred_entropies)[-5:]
# keep track of indices that we get labels for
delete_indices = np.array([])
f.text(.05, (1 - (i + 1) * .183),
"model %d\n\nfit with\n%d labels" % ((i + 1), i * 5 + 10), size=10)
for index, image_index in enumerate(uncertainty_index):
image = images[image_index]
sub = f.add_subplot(5, 5, index + 1 + (5 * i))
sub.imshow(image, cmap=plt.cm.gray_r)
sub.set_title('predict: %i\ntrue: %i' % (
lp_model.transduction_[image_index], y[image_index]), size=10)
sub.axis('off')
# labeling 5 points, remote from labeled set
delete_index, = np.where(unlabeled_indices == image_index)
delete_indices = np.concatenate((delete_indices, delete_index))
unlabeled_indices = np.delete(unlabeled_indices, delete_indices)
n_labeled_points += 5
f.suptitle("Active learning with Label Propagation.\nRows show 5 most "
"uncertain labels to learn with the next model.")
plt.subplots_adjust(0.12, 0.03, 0.9, 0.8, 0.2, 0.45)
plt.show()