Add files via upload

wzy6642 · Dec 28, 2018 · 95d0dc6 · 95d0dc6
1 parent 2707576
commit 95d0dc6
Show file tree

Hide file tree

Showing 15 changed files with 1,430 additions and 0 deletions.
diff --git a/NeuralNetwork_Project1/MLP.py b/NeuralNetwork_Project1/MLP.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Aug 14 22:21:25 2018
+
+@author: wzy
+"""
+"""
+# =============神经网络用于分类=============
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.neural_network import MLPClassifier
+from sklearn.preprocessing import StandardScaler
+data = [
+    [-0.017612, 14.053064, 0],[-1.395634, 4.662541, 1],[-0.752157, 6.53862, 0],[-1.322371, 7.152853, 0],[0.423363, 11.054677, 0],
+    [0.406704, 7.067335, 1],[0.667394, 12.741452, 0],[-2.46015, 6.866805, 1],[0.569411, 9.548755, 0],[-0.026632, 10.427743, 0],
+    [0.850433, 6.920334, 1],[1.347183, 13.1755, 0],[1.176813, 3.16702, 1],[-1.781871, 9.097953, 0],[-0.566606, 5.749003, 1],
+    [0.931635, 1.589505, 1],[-0.024205, 6.151823, 1],[-0.036453, 2.690988, 1],[-0.196949, 0.444165, 1],[1.014459, 5.754399, 1],
+    [1.985298, 3.230619, 1],[-1.693453, -0.55754, 1],[-0.576525, 11.778922, 0],[-0.346811, -1.67873, 1],[-2.124484, 2.672471, 1],
+    [1.217916, 9.597015, 0],[-0.733928, 9.098687, 0],[1.416614, 9.619232, 0],[1.38861, 9.341997, 0],[0.317029, 14.739025, 0]
+]
+dataMat = np.array(data)
+X = dataMat[:,0:2]
+y = dataMat[:,2]
+# 神经网络对数据尺度敏感，所以最好在训练前标准化，或者归一化，或者缩放到[-1,1]
+scaler = StandardScaler() # 标准化转换
+scaler.fit(X)  # 训练标准化对象
+X = scaler.transform(X)   # 转换数据集
+# solver='lbfgs',  MLP的求解方法：L-BFGS 在小数据上表现较好，Adam 较为鲁棒，SGD在参数调整较优时会有最佳表现（分类效果与迭代次数）；SGD标识随机梯度下降。
+# alpha:L2的参数：MLP是可以支持正则化的，默认为L2，具体参数需要调整
+# hidden_layer_sizes=(5, 2) hidden层2层,第一层5个神经元，第二层2个神经元)，2层隐藏层，也就有3层神经网络
+
+clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5,2), random_state=1)  # 神经网络输入为2，第一隐藏层神经元个数为5，第二隐藏层神经元个数为2，输出结果为2分类。
+clf.fit(X, y)
+print('每层网络层系数矩阵维度：\n',[coef.shape for coef in clf.coefs_])
+y_pred = clf.predict([[0.317029, 14.739025]])
+print('预测结果：',y_pred)
+y_pred_pro =clf.predict_proba([[0.317029, 14.739025]])
+print('预测结果概率：\n',y_pred_pro)
+
+cengindex = 0
+for wi in clf.coefs_:
+    cengindex += 1  # 表示底第几层神经网络。
+    print('第%d层网络层:' % cengindex)
+    print('权重矩阵维度:',wi.shape)
+    print('系数矩阵:\n',wi)
+
+# 绘制分割区域
+x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 # 寻找每个维度的范围
+y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 # 寻找每个维度的范围
+xx1, xx2 = np.meshgrid(np.arange(x_min, x_max, 0.01),np.arange(y_min, y_max,0.01)) # 在特征范围以0.01位步长预测每一个点的输出结果
+Z = clf.predict(np.c_[xx1.ravel(), xx2.ravel()]) # 先形成待测样本的形式，在通过模型进行预测。
+Z = Z.reshape(xx1.shape) # 将输出结果转换为和网格的矩阵形式，以便绘图
+# 绘制区域网格图
+plt.pcolormesh(xx1, xx2, Z, cmap=plt.cm.Paired)
+# 绘制样本点
+plt.scatter(X[:,0],X[:,1],c=y)
+plt.show()
+
+"""
+# # =============神经网络用于回归=============
+
+import numpy as np
+from sklearn.neural_network import MLPRegressor  # 多层线性回归
+from sklearn.preprocessing import StandardScaler
+data = [
+         [ -0.017612,14.053064,14.035452],[ -1.395634, 4.662541, 3.266907],[ -0.752157, 6.53862,5.786463],[ -1.322371, 7.152853, 5.830482],
+         [0.423363,11.054677,11.47804 ],[0.406704, 7.067335, 7.474039],[0.667394,12.741452,13.408846],[ -2.46015,6.866805, 4.406655],
+         [0.569411, 9.548755,10.118166],[ -0.026632,10.427743,10.401111],[0.850433, 6.920334, 7.770767],[1.347183,13.1755,14.522683],
+         [1.176813, 3.16702,4.343833],[ -1.781871, 9.097953, 7.316082],[ -0.566606, 5.749003, 5.182397],[0.931635, 1.589505, 2.52114 ],
+         [ -0.024205, 6.151823, 6.127618],[ -0.036453, 2.690988, 2.654535],[ -0.196949, 0.444165, 0.247216],[1.014459, 5.754399, 6.768858],
+         [1.985298, 3.230619, 5.215917],[ -1.693453,-0.55754, -2.250993],[ -0.576525,11.778922,11.202397],[ -0.346811,-1.67873, -2.025541],
+         [ -2.124484, 2.672471, 0.547987],[1.217916, 9.597015,10.814931],[ -0.733928, 9.098687, 8.364759],[1.416614, 9.619232,11.035846],
+         [1.38861,9.341997,10.730607],[0.317029,14.739025,15.056054]
+]
+
+dataMat = np.array(data)
+X=dataMat[:,0:2]
+y = dataMat[:,2]
+scaler = StandardScaler() # 标准化转换
+scaler.fit(X)  # 训练标准化对象
+X = scaler.transform(X)   # 转换数据集
+
+# solver='lbfgs',  MLP的求解方法：L-BFGS 在小数据上表现较好，Adam 较为鲁棒，SGD在参数调整较优时会有最佳表现（分类效果与迭代次数）；SGD标识随机梯度下降。
+# alpha:L2的参数：MLP是可以支持正则化的，默认为L2，具体参数需要调整
+# hidden_layer_sizes=(5, 2) hidden层2层,第一层5个神经元，第二层2个神经元)，2层隐藏层，也就有3层神经网络
+clf = MLPRegressor(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
+clf.fit(X, y)
+print('预测结果：', clf.predict([[0.317029, 14.739025]]))  # 预测某个输入对象
+
+cengindex = 0
+for wi in clf.coefs_:
+    cengindex += 1  # 表示底第几层神经网络。
+    print('第%d层网络层:' % cengindex)
+    print('权重矩阵维度:',wi.shape)
+
diff --git a/NeuralNetwork_Project1/NN.py b/NeuralNetwork_Project1/NN.py
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Aug 13 16:25:47 2018
+神经网络通过调整隐藏节点数、世代数以及学习率改善训练结果
+hidden_nodes、epochs、learning_rate
+
+@author: wzy
+"""
+import numpy as np
+# scipy.special for the sigmoid function expit()
+import scipy.special
+import matplotlib.pyplot as plt
+
+"""
+类说明：构建神经网络
+
+Parameters:
+    None
+    
+Returns:
+    None
+
+Modify:
+    2018-08-13
+"""
+class neuralNetwork:
+    # 神经网络的构造函数
+    # 初始化函数——设定输入层节点、隐藏层节点和输出层节点的数量。
+    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
+        # 输入节点、隐藏节点、输出节点、学习率
+        self.inodes = inputnodes
+        self.hnodes = hiddennodes
+        self.onodes = outputnodes
+        self.lr = learningrate
+        # 通过wih和who链接权重矩阵
+        # weights inside the arrays are w_i_j,where link is from node i to node j in the next layer
+        # 使用正态概率分布采样权重，平均值为0.0，标准方差为节点传入链接数目的开方，即1/sqrt(传入链接数目)
+        self.wih = np.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes, self.inodes))
+        self.who = np.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes))
+        # activation function is the sigmoid function
+        self.activation_function = lambda x: scipy.special.expit(x)
+        pass
+
+    # 神经网络的训练函数
+    # 训练——学习给定训练集样本后，优化权重。
+    def train(self, inputs_list, targets_list):
+        # convert inputs list to 2d array
+        inputs = np.array(inputs_list, ndmin=2).T
+        targets = np.array(targets_list, ndmin=2).T
+        # calculate signals into hidden layer
+        hidden_inputs = np.dot(self.wih, inputs)
+        # calculate the signals emerging from hidden layer
+        hidden_outputs = self.activation_function(hidden_inputs)
+        # calculate signals into final output layer
+        final_inputs = np.dot(self.who, hidden_outputs)
+        # calculate the signals emerging from final output layer
+        final_outputs = self.activation_function(final_inputs)
+        # error is the (target - actual)
+        output_errors = targets - final_outputs
+        # hidden layer error is the output_errors, split by weights, recombined at hidden nodes
+        hidden_errors = np.dot(self.who.T, output_errors)
+        # update the weights for the links between the hidden and output layers
+        self.who += self.lr * np.dot((output_errors * final_outputs * (1.0 - final_outputs)), np.transpose(hidden_outputs))
+        # update the weights for the links between the input and hidden layers
+        self.wih += self.lr * np.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), np.transpose(inputs))
+        pass
+
+    # 神经网络的查询函数
+    # 查询——给定输入，从输出节点给出答案。
+    def query(self, inputs_list):
+        # convert inputs list to 2d array
+        inputs = np.array(inputs_list, ndmin=2).T
+        # calculate signals into hidden layer
+        hidden_inputs = np.dot(self.wih, inputs)
+        # calculate the signals emerging from hidden layer
+        hidden_outputs = self.activation_function(hidden_inputs)
+        # calculate signals into final output layer
+        final_inputs = np.dot(self.who, hidden_outputs)
+        # calculate the signals emerging from final output layer
+        final_outputs = self.activation_function(final_inputs)
+        return final_outputs
+
+
+if __name__ == '__main__':
+    input_nodes = 784
+    hidden_nodes = 200
+    output_nodes = 10
+    learning_rate = 0.2
+    # 进行两轮训练
+    epochs = 5
+    n = neuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)
+    # 导入训练数据
+    training_data_file = open('mnist_dataset/mnist_train_100.csv', 'r')
+    training_data_list = training_data_file.readlines()
+    training_data_file.close()
+    # 数据可视化处理
+    for e in range(epochs):
+        for record in training_data_list:
+            all_values = record.split(',')
+            # 将数据进行归一化处理，落在区间[0.01, 1.0]内
+            inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
+            # np.asfarray()将文本字符串转换成实数，并创建这些数字的数组
+            # image_array = np.asfarray(all_values[1:]).reshape((28, 28))
+            # output nodes is 10(example)
+            targets = np.zeros(output_nodes) + 0.01
+            # all_values[0] is the target label for this record
+            targets[int(all_values[0])] = 0.99
+            # cmap='Greys'灰度图
+            # plt.imshow(image_array, cmap='Greys', interpolation='None')
+            n.train(inputs, targets)
+    # 导入测试数据
+    test_data_file = open('mnist_dataset/mnist_test_10.csv', 'r')
+    test_data_list = test_data_file.readlines()
+    test_data_file.close()
+    # scorecard for how well the network performs, initially empty
+    scorecard = []
+    # go through all the records in the test data set
+    for record in test_data_list:
+        all_values = record.split(',')
+        correct_label = int(all_values[0])
+        print(correct_label, 'correct label')
+        # 将数据进行归一化处理，落在区间[0.01, 1.0]内
+        inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
+        # 查询神经网络
+        outputs = n.query(inputs)
+        # np.argmax()发现数组中的最大值，并告诉我们它的位置
+        label = np.argmax(outputs)
+        print(label, "network's answer")
+        if (label == correct_label):
+            scorecard.append(1)
+        else:
+            scorecard.append(0)
+    scorecard_array = np.asarray(scorecard)
+    print("performance = ", scorecard_array.sum() / scorecard_array.size)
+    # print(all_values[0])
+    # image_array = np.asfarray(all_values[1:]).reshape((28, 28))
+    # plt.imshow(image_array, cmap='Greys', interpolation='None')
+    # print(n.query((np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01))
diff --git a/NeuralNetwork_Project1/mnist_dataset/mnist_readme.txt b/NeuralNetwork_Project1/mnist_dataset/mnist_readme.txt
@@ -0,0 +1,3 @@
+These are small subsets of the MNIST data set, transformed into CSV, and made available for easy testing as your code develops.
+
+The full dataset in CSV format is available at: http://pjreddie.com/projects/mnist-in-csv/
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		These are small subsets of the MNIST data set, transformed into CSV, and made available for easy testing as your code develops.

		The full dataset in CSV format is available at: http://pjreddie.com/projects/mnist-in-csv/