Skip to content
This repository was archived by the owner on Aug 26, 2022. It is now read-only.

Commit 71fb446

Browse files
committed
modified: plot.py
1 parent d7cc270 commit 71fb446

19 files changed

+42
-23
lines changed

__pycache__/cluster.cpython-36.pyc

0 Bytes
Binary file not shown.

cluster.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def locate_center(self, judge, maxid, threshold):
2323
# result showed in rank.png
2424
# 6 clusters should be divided in given dataset
2525

26-
cluster_centers = list(c[0] for c in result[0:3])
26+
cluster_centers = list(c[0] for c in result[0:6])
2727
# given dataset: [1061, 1515, 400, 6, 1566, 614]
2828
# generate dataset: [642, 877, 123]
2929

generatePoints.py

+29-15
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,22 @@
55
GENERATE_POINTS = './data/generatePoints.txt'
66

77
r = np.random.RandomState(24)
8+
o = r.randn(400, 2)
9+
o[:, 0] += 2
10+
o[:, 1] += 6
11+
u = r.randn(400, 2)
12+
u[:, 0] += 4
13+
u[:, 1] -= 0.5
14+
v = r.randn(400, 2)
15+
v[:, 0] += 7
16+
v[:, 1] -= 0.5
817
p = r.randn(400, 2)
9-
q = r.randn(400, 2) + 7
10-
s = r.randn(400, 2) + 4
18+
q = r.randn(400, 2) + 3
19+
# q[:, 0] += 3
20+
# q[:, 1] += 9
21+
s = r.randn(400, 2) + 6
1122

12-
t = np.concatenate((p, q, s), axis=0)
23+
t = np.concatenate((o, p, q, s, u, v), axis=0)
1324

1425
with open(GENERATE_POINTS, 'w', encoding='utf-8') as f:
1526
for pos in range(len(t)):
@@ -24,22 +35,25 @@
2435
distance = d(t[i], t[j])
2536
f.write(str(i) + ' ' + str(j) + ' ' + str(distance) + '\n')
2637

27-
x = p[:, 0]
28-
y = p[:, 1]
38+
# Without labels
39+
x, y = t[:, 0], t[:, 1]
2940
plt.plot(x, y, 'ok', markersize=1, alpha=0.5)
30-
# plt.show()
31-
32-
x = s[:, 0]
33-
y = s[:, 1]
34-
plt.plot(x, y, 'ok', markersize=1, alpha=0.5)
35-
36-
x = q[:, 0]
37-
y = q[:, 1]
38-
plt.plot(x, y, 'ok', markersize=1, alpha=0.5)
39-
# plt.legend()
4041
# plt.axis([-3, 10, -3, 9])
4142
plt.xlabel('x')
4243
plt.ylabel('y')
4344
plt.title('Generated Points Plot')
4445
plt.savefig('./images/generatedPoints.png')
46+
plt.close()
47+
48+
color = {0: 'c', 1: 'r', 2: 'g', 3: 'b', 4: 'm', 5: 'y'}
49+
cluster = [o, p, q, s, u, v]
50+
for i in range(len(cluster)):
51+
cur = cluster[i]
52+
x, y = cur[:, 0], cur[:, 1]
53+
plt.scatter(x, y, s=1, c=color[i], alpha=0.7, label=i + 1)
54+
plt.legend()
55+
plt.xlabel('x')
56+
plt.ylabel('y')
57+
plt.title('Generated Points with Lable')
58+
plt.savefig('./images/generatedColoredPoints.png')
4559
plt.show()

images/Cluster1 test.png

-19.8 KB
Loading

images/Cluster2 test.png

3.41 KB
Loading

images/Cluster3 test.png

-4.78 KB
Loading

images/Cluster4 test.png

30.6 KB
Loading

images/Cluster5 test.png

32.4 KB
Loading

images/Cluster6 test.png

36 KB
Loading

images/Decision Graph Cutoff test.png

32 Bytes
Loading

images/cluster_cutoff_test.png

3.15 KB
Loading

images/generatedColoredPoints.png

42.4 KB
Loading

images/generatedPoints.png

11.6 KB
Loading

images/rank cutoff test.png

-2.29 KB
Loading

images/result.png

14.2 KB
Loading

others/report.md

+8-4
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,14 @@ def classify(self, taginfo, srt_dens, min_num, maxid):
122122

123123
#### 2.4.1 测试数据
124124

125-
编写`generatePoints.py`来生成三个簇,每个簇400个点且均服从高斯分布,分布图如下所示。
125+
编写`generatePoints.py`来生成六个簇,每个簇400个点且均服从高斯分布,分布图如下所示。
126126

127127
![generatedPoints](../images/generatedPoints.png)
128128

129+
添加颜色及label如下图所示:
130+
131+
![generatedColoredPoints](../images/generatedColoredPoints.png)
132+
129133
#### 2.4.2 聚类效果
130134

131135
通过求熵值来确定截断距离最佳取值的图如下:
@@ -140,9 +144,9 @@ def classify(self, taginfo, srt_dens, min_num, maxid):
140144

141145
![rank cutoff test](../images/rank cutoff test.png)
142146

143-
截断距离选择`0.7828`为最佳值,由图能直观看出此时应该划分三个类,和生成三个簇的数据基本相符
147+
截断距离选择`0.7828`为最佳值,由图能直观看出此时应该划分六个类,和生成六个簇的数据基本相符
144148

145-
对三个簇进行可视化,画出相应的结果如下图,黑色加粗点为聚类中心:
149+
对六个簇进行可视化,画出相应的结果如下图,黑色加粗点为聚类中心:
146150

147151
![result](../images/result.png)
148152

@@ -152,7 +156,7 @@ def classify(self, taginfo, srt_dens, min_num, maxid):
152156

153157
![cluster_cutoff_test](../images/cluster_cutoff_test.png)
154158

155-
1. 可见有一部分点被分到第-1个簇中,这是在非聚类中心点分类过程中一些距离三个聚类中心都很远的离群点,因此在可视化过程中由聚类中心生成对应的簇时,这些点会被忽略,从而导致聚类结果图中点的缺失。
159+
1. 可见有一部分点被分到第-1个簇中,这是在非聚类中心点分类过程中一些距离所有聚类中心都很远的离群点,因此在可视化过程中由聚类中心生成对应的簇时,这些点会被忽略,从而导致聚类结果图中点的缺失。
156160

157161
**对这些离群点进行有效的信息处理和聚类划分,可以是对该算法优化的下一步工作。**
158162

plot.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def main():
2323
center, tag = clust.locate_center(refer_info, maxid, threshold)
2424
taginfo = clust.classify(tag, sort_dst, min_num, maxid)
2525
print('Clustering done!')
26-
print(center) # [80, 460, 463]
26+
print(center) # [978, 1842, 1522, 438, 2077, 123]
2727

2828
# show each cluster results
2929
clust.analysis(center, taginfo, dist, maxid)
@@ -48,7 +48,7 @@ def main():
4848
for i in range(len(center)):
4949
c = coords[p[0]]
5050
try:
51-
# 标号从1开始,故i+1
51+
# 标号从1开始,故i + 1
5252
if p[1] == i + 1:
5353
plt.scatter(c[0], c[1], c=color[i], alpha=0.6, s=1)
5454
except KeyError:

report.pdf

41.6 KB
Binary file not shown.

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
def main():
99
solution = data_process.ProcessData()
1010
dist, maxid = solution.data_process(GIVEN_DATA)
11-
threshold = solution.threshold(dist, maxid)
11+
# threshold = solution.threshold(dist, maxid)
12+
threshold = 0.0456
1213
sort_dst = solution.CutOff(dist, maxid, threshold)
1314
# sort_dst = solution.Guasse(dist, maxid, threshold)
1415
min_dist, min_num = solution.min_distance(dist, sort_dst, maxid)

0 commit comments

Comments
 (0)