-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathEMfunctions.py
81 lines (65 loc) · 2.45 KB
/
EMfunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import numpy as np
class spectralEM:
def __init__(self, init_mu, labels):
"""
class initialization
:param init_mu: initialization mu, mu is confusion matrix with dimension M workers, K labels, K labels
:param labels: crowed sourcing labels result, dimension M workers, N items
"""
self.mu = init_mu
self.M, self.N = labels.shape
self.K = np.max(labels)+1
# self.N = num_item
# self.M = num_worker
# self.K = num_category
self.q = np.zeros((self.N, self.K)) # / self.K
self.labels = labels
def E_step(self):
log_mu = np.log(np.clip(self.mu, 1e-6, 10))
I, J = self.labels.shape
for j in range(J):
for l in range(self.K):
for i in range(I):
label = self.labels[i, j]
if label != -1:
self.q[j, l] += log_mu[i, l, label]
self.q[j, :] = np.exp(self.q[j, :])
self.q[j, :] = self.q[j, :] / np.sum(self.q[j, :])
def M_step(self):
for i in range(self.M):
for l in range(self.K):
for c in range(self.K):
self.mu[i, l, c] = np.sum(self.q[:, l] * (self.labels[i, :] == c))
self.mu[i, l, :] = self.mu[i, l, :] / np.sum(self.mu[i, l, :])
def loglik(self):
self.loglikelihood = 0
log_mu = np.log(np.clip(self.mu, 1e-6, 10))
I, J = self.labels.shape
for j in range(J):
for l in range(self.K):
inter = 0
for i in range(I):
label = self.labels[i, j]
if label != -1:
inter += log_mu[i, l, label]
inter = inter * self.q[j, l]
self.loglikelihood += inter
return self.loglikelihood
def run(self, strategy='max_iter', max_iter=10, delta=1e-2):
self.E_step()
d = 1
num_iter = 0
logLik = [self.loglik()]
while (strategy=='max_iter' and num_iter < max_iter) or (strategy=='converge' and d > delta):
self.M_step()
self.E_step()
logLik.append(self.loglik())
num_iter += 1
if strategy == 'converge':
d = np.abs(np.diff(logLik[-2:])[0])
print('# iterations = ', num_iter)
return logLik
def output_mu(self):
return self.mu
def output_q(self):
return self.q