-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgmm.cpp
346 lines (273 loc) · 12.2 KB
/
gmm.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
/**
* @file gmm.cpp
* @author jonson ([email protected])
* @brief C++实现的高斯混合模型
* @version 0.1
* @date 2021-06-12
* @copyright Copyright (c) 2021
*/
# include "gmm.hpp"
# include "gmm_matrix_support.h"
# include <stdio.h>
# include <math.h>
# include <sys/time.h>
inline double wall_time() {
timeval t;
gettimeofday(&t, NULL);
return t.tv_sec + t.tv_usec / 1e6;
}
# ifdef GPU_VERSION
# include <cuda_runtime.h>
# else // GPU_VERSION
# include <stdlib.h>
# include <memory.h>
# endif // GPU_VERSION
/**
* @brief 构造一个高斯混合模型对象
*
* @param dim 数据维度
* @param nComponent 聚类数量
* @param tol 收敛条件(对数似然值变化小于 tol)
* @param maxIter 最大迭代次数
*/
GaussianMixture::GaussianMixture(int dim, int nComponent, double tol, int maxIter)
: dim(dim), nComponent(nComponent), tol(tol), maxIter(maxIter), memoryMalloced(true)
{
# ifdef GPU_VERSION
cudaMalloc(&this->weights, sizeof(double) * nComponent);
cudaMalloc(&this->means, sizeof(double) * nComponent * dim);
cudaMalloc(&this->covariances, sizeof(double) * nComponent * dim * dim);
# else // GPU_VERSION
this->weights = (double*)malloc(sizeof(double) * nComponent);
this->means = (double*)malloc(sizeof(double) * nComponent * dim);
this->covariances = (double*)malloc(sizeof(double) * nComponent * dim * dim);
# endif // GPU_VERSION
}
/**
* @brief 构造一个高斯混合模型对象(使用外部指针)
*
* @param dim 数据维度
* @param nComponent 聚类数量
* @param weights 聚类权重所使用的内存地址
* @param means 聚类均值所使用的内存地址
* @param covariances 聚类协方差所使用的内存地址
* @param tol 收敛条件(对数似然值变化小于 tol)
* @param maxIter 最大迭代次数
*/
GaussianMixture::GaussianMixture(int dim, int nComponent, double* weights, double* means, double* covariances, double tol, int maxIter)
: dim(dim), nComponent(nComponent), weights(weights), means(means), covariances(covariances), tol(tol), maxIter(maxIter), memoryMalloced(false)
{}
/**
* @brief 初始化高斯混合模型的参数
*
* @param data 拟合数据,大小为 numData 行 dim 列
* @param numData 见上
* @param xSubMuBuf 临时存放 xSubMu 的 buffer,大小为 numData 行 dim 列
* @param meanBuf 临时存放 mean 的 buffer,大小为 dim
* @param reduceBuf 并行规约需要的临时空间
*/
void GaussianMixture::initParameter(const double* data, int numData, double* xSubMuBuf, double* meanBuf, double* reduceBuf) {
printf("initializing parameters\n");
double t1 = wall_time();
# ifdef GPU_VERSION
// 权重使用均匀分布初始化
double tmpWeights[this->nComponent];
for (int c = 0; c < this->nComponent; ++c) {
tmpWeights[c] = 1.0 / this->nComponent;
}
cudaMemcpy(this->weights, tmpWeights, sizeof(double) * this->nComponent, cudaMemcpyHostToDevice);
// 选择前 nComponent 个数据作为聚类均值 !! 注意,这里没使用随机法,最好把数据 shuffle 好
cudaMemcpy(this->means, data, sizeof(double) * this->nComponent * this->dim, cudaMemcpyDeviceToDevice);
matColMean(data, meanBuf, numData, this->dim, reduceBuf);
# else // GPU_VERSION
// 权重使用均匀分布初始化
for (int c = 0; c < this->nComponent; ++c) {
this->weights[c] = 1.0 / this->nComponent;
}
// 选择前 nComponent 个数据作为聚类均值 !! 注意,这里没使用随机法,最好把数据 shuffle 好
memcpy(this->means, data, sizeof(double) * this->nComponent * this->dim);
matColMean(data, meanBuf, numData, this->dim);
# endif // GPU_VERSION
matVecRowSub(data, meanBuf, xSubMuBuf, numData, this->dim);
// 使用所有数据的协方差初始化聚类协方差
dataCovariance(xSubMuBuf, this->covariances, numData, this->dim);
// 加上 minCovar 以保证最小方差
matDiagAddInplace(this->covariances, this->minCovar, this->dim);
for (int c = 1; c < this->nComponent; ++c) {
# ifdef GPU_VERSION
cudaMemcpy(this->covariances + c * this->dim * this->dim, this->covariances, sizeof(double) * this->dim * this->dim, cudaMemcpyDeviceToDevice);
# else
memcpy(this->covariances + c * this->dim * this->dim, this->covariances, sizeof(double) * this->dim * this->dim);
# endif
}
# ifdef GPU_VERSION
cudaDeviceSynchronize();
# endif
double t2 = wall_time();
printf("initializing finished in %lf seconds\n", t2 - t1);
}
/**
* @brief 计算所有数据对应各个聚类的对数概率密度
*
* @param data 拟合数据,大小为 numData 行 dim 列
* @param logDensity 对数概率密度输出,大小为 nComponent 行 numData 列
* @param numData 见上
* @param lowerMatBuf 临时存放 cholsky 分解得到的下三角矩阵的 buffer,大小为 dim 行 dim 列
* @param xSubMuBuf 临时存放 x - mu 的 buffer,大小为 numData 行 dim 列
* @param covSolBuf 临时存放 Ly = x - mu 的解的 buffer,大小为 numData 行 dim 列
* @param reduceBuf 并行规约需要的临时空间
*/
void GaussianMixture::logProbabilityDensity(const double* data, double* logDensity, int numData, double* lowerMatBuf, double* xSubMuBuf, double* covSolBuf, double* reduceBuf) {
for (int c = 0; c < this->nComponent; ++c) {
// 使用 cholesky 分解得到下三角矩阵
matCholesky(this->covariances + c * this->dim * this->dim, lowerMatBuf, this->dim);
// 协方差矩阵的行列式的对数等于 cholesky 分解的下三角矩阵对角线上元素的对数求和
# ifdef GPU_VERSION
double covLogDet = 2 * sumLog2Diag(lowerMatBuf, this->dim, reduceBuf);
# else
double covLogDet = 2 * sumLog2Diag(lowerMatBuf, this->dim);
# endif
// 求解 y 满足 Ly = x - mu,则 (x - mu)^T Sigma^(-1) (x - mu) = y^T y
matVecRowSub(data, this->means + c * this->dim, xSubMuBuf, numData, this->dim);
solveLower(lowerMatBuf, xSubMuBuf, covSolBuf, this->dim, numData);
// 计算概率密度
double* logDensityOfComponent = logDensity + c * numData;
rowSumSquare(covSolBuf, logDensityOfComponent, numData, this->dim);
allAddInplace(logDensityOfComponent, this->dim * log2(2 * M_PI), numData);
allAddInplace(logDensityOfComponent, covLogDet, numData);
allMulInplace(logDensityOfComponent, -0.5, numData);
}
}
/**
* @brief 根据数据估计高斯混合模型参数
*
* @param data 拟合数据,大小为 numData 行 dim 列
* @param numData 见上
*/
void GaussianMixture::fit(const double* data, int numData) {
// 分配一些各个函数会用到的 buffer
# ifdef GPU_VERSION
double* xSubMu;
cudaMalloc(&xSubMu, sizeof(double) * numData * this->dim);
double* mean;
cudaMalloc(&mean, sizeof(double) * this->dim);
double* lowerMat, * covSol;
cudaMalloc(&lowerMat, sizeof(double) * this->dim * this->dim);
cudaMalloc(&covSol, sizeof(double) * numData * this->dim);
double* logWeights, * logProb, * logProbSum, * responsibilities;
cudaMalloc(&logWeights, sizeof(double) * this->nComponent);
cudaMalloc(&logProb, sizeof(double) * this->nComponent * numData);
cudaMalloc(&logProbSum, sizeof(double) * numData);
responsibilities = logProb;
// GPU 规约需要的空间
double* reduceBuf;
cudaMalloc(&reduceBuf, sizeof(double) * (this->dim > this->nComponent ? this->dim : this->nComponent) * (numData + BLOCK_DIM_1D - 1) / BLOCK_DIM_1D);
# else // GPU_VERSION
// xSubMu 各个函数都用了
double* xSubMu = (double*)malloc(sizeof(double) * numData * this->dim);
// initParameters
double* mean = (double*)malloc(sizeof(double) * this->dim);
// logProbabilityDensity
double* lowerMat = (double*)malloc(sizeof(double) * this->dim * this->dim);
double* covSol = (double*)malloc(sizeof(double) * numData * this->dim);
// 聚类权重对数
double* logWeights = (double*)malloc(sizeof(double) * this->nComponent);
// 对数概率密度矩阵
double* logProb = (double*)malloc(sizeof(double) * this->nComponent * numData);
// logProbSum 用于保存上面矩阵的各列求和结果
double* logProbSum = (double*)malloc(sizeof(double) * numData);
// responsiblities 是簇分配结果,因为 logProb 和它不会同时用到,直接用一块空间就好了
double* responsibilities = logProb;
double* reduceBuf = nullptr;
# endif // GPU_VERSION
this->initParameter(data, numData, xSubMu, mean, reduceBuf);
// 对数似然值,比较两次迭代对数似然值变化用于判断迭代是否收敛
double logLikelihood = -INFINITY;
for (int numIter = 0; numIter < this->maxIter; ++numIter) {
double t1 = wall_time();
double prevLogLikelihood = logLikelihood;
// E 步
this->logProbabilityDensity(data, logProb, numData, lowerMat, xSubMu, covSol, reduceBuf);
// 概率密度乘上聚类权重,相当于对数相加
allLog2(this->weights, logWeights, this->nComponent);
matVecColAddInplace(logProb, logWeights, this->nComponent, numData);
// 各列求和便于概率密度归一化
colLog2SumExp2(logProb, logProbSum, this->nComponent, numData);
// 簇分配(soft assignment),注意 responsibilities 和 logProb 是同一块空间,直接用原地算法
matVecRowSubInplace(responsibilities, logProbSum, this->nComponent, numData);
allExp2Inplace(responsibilities, this->nComponent * numData);
// 计算对数似然值变化
# ifdef GPU_VERSION
logLikelihood = arrMean(logProbSum, numData, reduceBuf);
# else
logLikelihood = arrMean(logProbSum, numData);
# endif
double diff = abs(logLikelihood - prevLogLikelihood);
printf("iteration %d, log likelihood difference: %f\n", numIter, diff);
// 判断是否收敛
if (diff < this->tol) {
break;
}
// M 步
// 将簇分配结果按行求和,即得到每个簇被分配到的数据点数量
# ifdef GPU_VERSION
rowSum(responsibilities, this->weights, this->nComponent, numData, reduceBuf);
# else
rowSum(responsibilities, this->weights, this->nComponent, numData);
# endif
// 将 responsibilities 和 data 做矩阵乘法,即得到 data 在不同聚类下由簇分配系数加权求和之和
matMul(responsibilities, data, this->means, this->nComponent, numData, this->dim);
// this->means 各行相应除以各聚类分配到的数据点数量,得到均值
matPerRowDivInplace(this->means, this->weights, this->nComponent, this->dim);
// 将 this->weights 归一化
allDivInplace(this->weights, (double)numData, this->nComponent);
// TODO: 这里可以并行计算,但 xSubMu 的内存分配每个聚类都要
for (int c = 0; c < this->nComponent; ++c) {
// resp 是样本点对当前聚类的簇分配结果
double* resp = responsibilities + c * numData;
matVecRowSub(data, this->means + c * this->dim, xSubMu, numData, this->dim);
dataAverageCovariance(xSubMu, resp, this->covariances + c * this->dim * this->dim, numData, this->dim);
// 加上 minCovar 以保证最小方差
matDiagAddInplace(this->covariances + c * this->dim * this->dim, this->minCovar, this->dim);
}
# ifdef GPU_VERSION
cudaDeviceSynchronize();
# endif
double t2 = wall_time();
printf("iteration %d finished in %lf seconds\n", numIter, t2 - t1);
}
# ifdef GPU_VERSION
cudaFree(xSubMu);
cudaFree(mean);
cudaFree(lowerMat);
cudaFree(covSol);
cudaFree(logWeights);
cudaFree(logProb);
cudaFree(logProbSum);
cudaFree(reduceBuf);
# else // GPU_VERSION
free(xSubMu);
free(mean);
free(lowerMat);
free(covSol);
free(logWeights);
free(logProb);
free(logProbSum);
# endif // GPU_VERSION
}
/**
* @brief 析构高斯混合模型对象,释放 malloc 的空间
*/
GaussianMixture::~GaussianMixture() {
if (this->memoryMalloced) {
# ifdef GPU_VERSION
cudaFree(this->weights);
cudaFree(this->means);
cudaFree(this->covariances);
# else
free(this->weights);
free(this->means);
free(this->covariances);
# endif
}
}