-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneonConvolutionAlgs.h
175 lines (126 loc) · 5.19 KB
/
neonConvolutionAlgs.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#ifndef NEON_CONV_ALGS_H
#define NEON_CONV_ALGS_H
#include <arm_neon.h>
#include <iostream>
#include <cstdint>
#include "../Eigen/Core"
using Matrix4f = Eigen::Matrix<float, 4, 4>; //!< 4x4 float matrix
using Matrix5f = Eigen::Matrix<float, 5, 5>; //!< 5x5 float matrix
using Matrix5u = Eigen::Matrix<uint32_t, 5, 5>; //!< 5x5 uint32 matrix
using Matrix8f = Eigen::Matrix<float, 8, 8>; //!< 8x8 float matrix
using Matrix12f = Eigen::Matrix<float, 12, 12>; //!< 12x12 float matrix
using Matrix24f = Eigen::Matrix<float, 24, 24>; //!< 24x24 float matrix
using Matrix28f = Eigen::Matrix<float, 28, 28>; //!< 28x28 float matrix
using MatrixO = Eigen::Matrix<float, 1, 10>; //!< 1x10 float matrix
using MatrixFCi = Eigen::Matrix<float, 1, 50>; //!< 1x50 float matrix
using MatrixXf = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>; //!< Dynamic float matrix
using MatrixFC1 = Eigen::Matrix<float, 50, 320, Eigen::RowMajor>; //!< 50x320 float matrix stored in row-major order
using MatrixFC2 = Eigen::Matrix<float, 10, 50, Eigen::RowMajor>; //!< 10x50 float matrix stored in row-major order
/**
* Use: convolve 28x28 Matrix (MNIST image size) with 5x5 weight without padding, no stride
* Adapted code from ARM Compute Library
* NOTE: convolution is implemented as follows per block (Haskell notation)
* bias + sum . (coeffMul kernel) input5x5Block
*
* @param[in] input 28x28 input (Eigen matrix)
* @param[in] kernel 5x5 weight (Eigen matrix)
* @param[in] bias Value of bias
* @return 24x24 convolution result
*/
Matrix24f convolve28(const Matrix28f& input, const Matrix5f& kernel, const float bias);
/**
* Use: convolution of 12x12 Matrix (MNIST image size) with 5x5 weight without padding, no stride
* Adapted code from ARM Compute Library
* see "convolve28" notes for algorithm details
*
* @param[in] input 12x12 input (Eigen matrix)
* @param[in] kernel 5x5 weight (Eigen matrix)
* @param[in] bias Value of bias
* @return 8x8 convolution result
*/
Matrix8f convolve12(const Matrix12f& input, const Matrix5f& kernel, const float bias);
/**
* Use: 2x2 max-pooling of 12x12 matrix, stride == 2
*
* @param[in] mat 12x12 input (Eigen matrix)
* @return 12x12 max-pool result
*/
Matrix12f maxPool24(const Matrix24f& mat);
/**
* Use: 2x2 max-pooling of 8x8 matrix, stride == 2
*
* @param[in] mat 12x12 input (Eigen matrix)
* @return 8x8 max-pool result
*/
Matrix4f maxPool8(const Matrix8f& mat);
/**
* Use: In place RELU on 12x12 matrix
*
* @param[in,out] mat 12x12 Eigen matrix
*/
void relu12(Matrix12f& mat);
/**
* Use: In place RELU on 4x4 matrix
*
* @param[in,out] mat 4x4 Eigen matrix
*/
void relu4(Matrix4f& mat);
/**
* Use: In place RELU on 1x50 matrix
*
* @param[in,out] mat 1x50 Eigen matrix
*/
void reluFC1(MatrixFCi& mat);
/**
* Use: In place batch normalization on 12x12 matrix
* Formula: (x - E[x]) * gamma / sqrt(Var[x] + epsilon) + beta
*
* @param[in,out] mat 12x12 Eigen matrix)
* @param[in] mean Expectation
* @param[in] var Variance
* @param[in] eps Epsilon (small positive value -> 0)
* @param[in] gamma Gamma
* @param[in] beta Beta
*/
void batchNorm(Matrix12f& mat, float mean, float var, float eps, float gamma, float beta);
/**
* Use: Forward propagation through a fully connected layer of 50 neurons with 320 inputs
*
* @param[in] inputs C-style array containing twenty 4x4 matrices
* @param[in] layerWeight 50x320 weight matrix for whole layer (one row per neuron)
* @return 1x50 Matrix as layer output (one element per neuron)
*/
MatrixFCi fullyConnect1(const Matrix4f (&inputs)[20], const MatrixFC1& layerWeight);
/**
* Use: Forward propagation through a fully connected layer of 10 neurons with 50 inputs
*
* @param[in] input 1x50 input matrix
* @param[in] layerWeight 10x50 weight matrix for whole layer (one row per neuron)
* @return 1x10 Matrix as layer output (one element per neuron)
*/
MatrixO fullyConnect2(const MatrixFCi& input, const MatrixFC2& layerWeight);
/**
* Use: Add ten 12x12 input matrices
*
* @param[in] mats C-style array containing ten 8x8 matrices
* @return Sum of matrices
*/
Matrix8f addTen8x8(const Matrix8f (&inputs)[10]);
/**
* Use: Add bias in place to FC1 result
*
* @param[in, out] mat 1x50 Eigen matrix FC1 result
* @param[in] bias 50el FC1 bias as C-style array
*/
MatrixFCi addFC1Bias(MatrixFCi& mat, const float *bias);
/**
* Use: Ternary convolution on 28x28 matrices without multiplication
*
* @param[in] mat 28x28 input
* @param[in] neg 5x5 XOR kernel
* @param[in] zer 5x5 AND kernel
* @param[in] bias convolution bias
* @return 24x24 convolution result
*/
Matrix24f ternary28Conv(const Matrix28f& mat, const Matrix5u& neg, const Matrix5u& zer, const float bias);
#endif // NEON_CONV_ALGS