@@ -80,10 +80,11 @@ class Conv2dNhwcFhwcVectorizationPattern : public ConversionPattern {
80
80
// Get Strides.
81
81
SmallVector<int64_t , 2 > strides = {1 , 1 };
82
82
if (op->hasAttr (" strides" )) {
83
- strides.clear ();
84
- for (auto value : op->getAttrOfType <mlir::DenseIntElementsAttr>(" strides" ).getValues <int64_t >()) {
85
- strides.push_back (value);
86
- }
83
+ strides.clear ();
84
+ for (auto value : op->getAttrOfType <mlir::DenseIntElementsAttr>(" strides" )
85
+ .getValues <int64_t >()) {
86
+ strides.push_back (value);
87
+ }
87
88
}
88
89
bool stride1 = strides[0 ] != 1 ;
89
90
bool stride2 = strides[1 ] != 1 ;
@@ -93,14 +94,17 @@ class Conv2dNhwcFhwcVectorizationPattern : public ConversionPattern {
93
94
// Get Dilations.
94
95
SmallVector<int64_t , 2 > dilations = {1 , 1 };
95
96
if (op->hasAttr (" dilations" )) {
96
- dilations.clear ();
97
- for (auto value : op->getAttrOfType <mlir::DenseIntElementsAttr>(" dilations" ).getValues <int64_t >()) {
98
- dilations.push_back (value);
99
- }
97
+ dilations.clear ();
98
+ for (auto value :
99
+ op->getAttrOfType <mlir::DenseIntElementsAttr>(" dilations" )
100
+ .getValues <int64_t >()) {
101
+ dilations.push_back (value);
102
+ }
100
103
}
101
104
bool dilated1 = dilations[0 ] != 1 ;
102
105
bool dilated2 = dilations[1 ] != 1 ;
103
- Value dilHeight = rewriter.create <arith::ConstantIndexOp>(loc, dilations[0 ]);
106
+ Value dilHeight =
107
+ rewriter.create <arith::ConstantIndexOp>(loc, dilations[0 ]);
104
108
Value dilWidth = rewriter.create <arith::ConstantIndexOp>(loc, dilations[1 ]);
105
109
106
110
// Get i1 as the element type for mask vector.
@@ -115,7 +119,7 @@ class Conv2dNhwcFhwcVectorizationPattern : public ConversionPattern {
115
119
const Value c1 = rewriter.create <arith::ConstantIndexOp>(loc, 1 );
116
120
const Value c2 = rewriter.create <arith::ConstantIndexOp>(loc, 2 );
117
121
const Value c3 = rewriter.create <arith::ConstantIndexOp>(loc, 3 );
118
- const Value vl_step = rewriter.create <arith::ConstantIndexOp>(loc, vecsize);
122
+ const Value vlStep = rewriter.create <arith::ConstantIndexOp>(loc, vecsize);
119
123
const Value zero =
120
124
buddy::insertZeroConstantOp (ctx, rewriter, loc, elementTy);
121
125
@@ -136,12 +140,11 @@ class Conv2dNhwcFhwcVectorizationPattern : public ConversionPattern {
136
140
Value width_o = rewriter.create <memref::DimOp>(loc, output, c2);
137
141
138
142
// Calculate the upper bound for vectorized processing
139
- // - Subtract `vl_step ` is to avoid overflow at the vectorization tail.
143
+ // - Subtract `vlStep ` is to avoid overflow at the vectorization tail.
140
144
// - Add 1 to ensure the final loop runs when the workload length
141
145
// is divisible by the vector size.
142
- Value upperBound_tmp =
143
- rewriter.create <arith::SubIOp>(loc, channels, vl_step);
144
- Value upperBound = rewriter.create <arith::AddIOp>(loc, upperBound_tmp, c1);
146
+ Value upperBoundTmp = rewriter.create <arith::SubIOp>(loc, channels, vlStep);
147
+ Value upperBound = rewriter.create <arith::AddIOp>(loc, upperBoundTmp, c1);
145
148
146
149
SmallVector<Value, 8 > lowerBounds (4 , c0);
147
150
SmallVector<Value, 8 > uperBounds{batch, height_o, width_o, f_o};
@@ -150,20 +153,20 @@ class Conv2dNhwcFhwcVectorizationPattern : public ConversionPattern {
150
153
rewriter, loc, lowerBounds, uperBounds, steps,
151
154
[&](OpBuilder &builder, Location loc, ValueRange ivs) {
152
155
// Create strides variables.
153
- Value tmp_ivs1 = ivs[1 ];
154
- if (stride1){
155
- tmp_ivs1 = builder.create <arith::MulIOp>(loc, ivs[1 ], strHeight);
156
+ Value tmpIvs1 = ivs[1 ];
157
+ if (stride1) {
158
+ tmpIvs1 = builder.create <arith::MulIOp>(loc, ivs[1 ], strHeight);
156
159
}
157
- Value tmp_ivs2 = ivs[2 ];
158
- if (stride2){
159
- tmp_ivs2 = builder.create <arith::MulIOp>(loc, ivs[2 ], strWidth);
160
+ Value tmpIvs2 = ivs[2 ];
161
+ if (stride2) {
162
+ tmpIvs2 = builder.create <arith::MulIOp>(loc, ivs[2 ], strWidth);
160
163
}
161
164
Value tmp_result = builder.create <memref::LoadOp>(
162
165
loc, elementTy, output,
163
166
ValueRange{ivs[0 ], ivs[1 ], ivs[2 ], ivs[3 ]});
164
167
// Create vecsize mining loop.
165
168
auto iter_val = builder.create <scf::ForOp>(
166
- loc, c0, upperBound, /* Step=*/ vl_step , ValueRange{c0, tmp_result},
169
+ loc, c0, upperBound, /* Step=*/ vlStep , ValueRange{c0, tmp_result},
167
170
[&](OpBuilder &nestedBuilder, Location nestedLoc, Value iv,
168
171
ValueRange itrArgs) {
169
172
auto tmp0 = nestedBuilder.create <affine::AffineForOp>(
@@ -173,23 +176,27 @@ class Conv2dNhwcFhwcVectorizationPattern : public ConversionPattern {
173
176
[&](OpBuilder &builder, Location loc, Value iv0,
174
177
ValueRange itrArgs0) {
175
178
// Create dilated[0] variables.
176
- Value tmp_ivs3 = iv0;
177
- if (dilated1){
178
- tmp_ivs3 = builder.create <arith::MulIOp>(loc, iv0, dilHeight);
179
+ Value tmpIvs3 = iv0;
180
+ if (dilated1) {
181
+ tmpIvs3 =
182
+ builder.create <arith::MulIOp>(loc, iv0, dilHeight);
179
183
}
180
- Value inputHeight = builder.create <arith::AddIOp>(loc, tmp_ivs1, tmp_ivs3);
184
+ Value inputHeight =
185
+ builder.create <arith::AddIOp>(loc, tmpIvs1, tmpIvs3);
181
186
auto tmp1 = builder.create <affine::AffineForOp>(
182
187
loc, ValueRange{c0}, builder.getDimIdentityMap (),
183
188
ValueRange{width_k}, builder.getDimIdentityMap (),
184
189
/* Step=*/ 1 , ValueRange{itrArgs0[0 ]},
185
190
[&](OpBuilder &builder, Location loc, Value iv1,
186
191
ValueRange itrArgs1) {
187
192
// Create dilated[1] variables.
188
- Value tmp_ivs4 = iv1;
189
- if (dilated2){
190
- tmp_ivs4 = builder.create <arith::MulIOp>(loc, iv1, dilWidth);
193
+ Value tmpIvs4 = iv1;
194
+ if (dilated2) {
195
+ tmpIvs4 = builder.create <arith::MulIOp>(loc, iv1,
196
+ dilWidth);
191
197
}
192
- Value inputWidth = builder.create <arith::AddIOp>(loc, tmp_ivs2, tmp_ivs4);
198
+ Value inputWidth = builder.create <arith::AddIOp>(
199
+ loc, tmpIvs2, tmpIvs4);
193
200
Value inputVector = builder.create <vector::LoadOp>(
194
201
loc, vectorTy, input,
195
202
ValueRange{ivs[0 ], inputHeight, inputWidth,
@@ -226,7 +233,7 @@ class Conv2dNhwcFhwcVectorizationPattern : public ConversionPattern {
226
233
nestedLoc, tmp1.getResult (0 ));
227
234
});
228
235
Value idx =
229
- builder.create <arith::AddIOp>(loc, itrArgs[0 ], vl_step );
236
+ builder.create <arith::AddIOp>(loc, itrArgs[0 ], vlStep );
230
237
builder.create <scf::YieldOp>(
231
238
loc, ValueRange{idx, tmp0.getResult (0 )});
232
239
});
@@ -250,25 +257,27 @@ class Conv2dNhwcFhwcVectorizationPattern : public ConversionPattern {
250
257
[&](OpBuilder &builder, Location loc, Value iv0,
251
258
ValueRange itrArgs0) {
252
259
// Create dilated[0] variables.
253
- Value tmp_ivs3 = iv0;
254
- if (dilated1){
255
- tmp_ivs3 = builder.create <arith::MulIOp>(loc, iv0, dilHeight);
260
+ Value tmpIvs3 = iv0;
261
+ if (dilated1) {
262
+ tmpIvs3 =
263
+ builder.create <arith::MulIOp>(loc, iv0, dilHeight);
256
264
}
257
265
Value inputHeight =
258
- builder.create <arith::AddIOp>(loc, tmp_ivs1, tmp_ivs3 );
266
+ builder.create <arith::AddIOp>(loc, tmpIvs1, tmpIvs3 );
259
267
auto tmp1 = builder.create <affine::AffineForOp>(
260
268
loc, ValueRange{c0}, builder.getDimIdentityMap (),
261
269
ValueRange{width_k}, builder.getDimIdentityMap (),
262
270
/* Step=*/ 1 , ValueRange{itrArgs0[0 ]},
263
271
[&](OpBuilder &builder, Location loc, Value iv1,
264
272
ValueRange itrArgs1) {
265
273
// Create dilated[1] variables.
266
- Value tmp_ivs4 = iv1;
267
- if (dilated2){
268
- tmp_ivs4 = builder.create <arith::MulIOp>(loc, iv1, dilWidth);
274
+ Value tmpIvs4 = iv1;
275
+ if (dilated2) {
276
+ tmpIvs4 = builder.create <arith::MulIOp>(loc, iv1,
277
+ dilWidth);
269
278
}
270
- Value inputWidth =
271
- builder. create <arith::AddIOp>( loc, tmp_ivs2, tmp_ivs4 );
279
+ Value inputWidth = builder. create <arith::AddIOp>(
280
+ loc, tmpIvs2, tmpIvs4 );
272
281
Value inputVec = builder.create <MaskedLoadOp>(
273
282
loc, vectorTy, input,
274
283
ValueRange{ivs[0 ], inputHeight, inputWidth,
0 commit comments