From 860f9d09de4da5ee0e7adcb898cfe4496af2904f Mon Sep 17 00:00:00 2001
From: Joejiong <1004691415@qq.com>
Date: Thu, 16 Dec 2021 18:38:47 +0800
Subject: [PATCH 1/5] eexperiment with mlir return format

---
 benchmarks/DeepLearning/Ops/CMakeLists.txt    |   1 +
 .../PointwiseConv2DNhwcHwcfOp/CMakeLists.txt  |  38 +++++++
 .../MLIROptBenchmark.cpp                      | 102 ++++++++++++++++++
 .../Ops/PointwiseConv2DNhwcHwcfOp/Main.cpp    |  33 ++++++
 .../PointwiseConv2DNhwcHwcf.mlir              |  44 ++++++++
 include/Utils/Container.h                     |  24 ++++-
 lib/Utils/Container.cpp                       |  57 +++++++++-
 7 files changed, 293 insertions(+), 6 deletions(-)
 create mode 100644 benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/CMakeLists.txt
 create mode 100644 benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp
 create mode 100644 benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/Main.cpp
 create mode 100644 benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir

diff --git a/benchmarks/DeepLearning/Ops/CMakeLists.txt b/benchmarks/DeepLearning/Ops/CMakeLists.txt
index 9b283d61..8b8cc99a 100644
--- a/benchmarks/DeepLearning/Ops/CMakeLists.txt
+++ b/benchmarks/DeepLearning/Ops/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_subdirectory(DepthwiseConv2DNhwcHwcOp)
 add_subdirectory(Conv2DNhwcHwcfOp)
 add_subdirectory(Conv2DNchwFchwOp)
+add_subdirectory(PointwiseConv2DNhwcHwcfOp)
diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/CMakeLists.txt
new file mode 100644
index 00000000..7e483980
--- /dev/null
+++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/CMakeLists.txt
@@ -0,0 +1,38 @@
+set(BUDDY_OPT_ATTR avx512f)
+set(LLVM_MLIR_BINARY_DIR ${BUDDY_OPT_BUILD_DIR}/../llvm/build/bin)
+
+add_custom_command(OUTPUT pointwise-conv-2d-nhwc-hwcf.o
+  COMMAND ${LLVM_MLIR_BINARY_DIR}/mlir-opt ${BUDDY_SOURCE_DIR}/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir 
+              -linalg-bufferize 
+              -std-bufferize 
+              -tensor-constant-bufferize 
+              -tensor-bufferize 
+              -func-bufferize 
+              -finalizing-bufferize 
+              -buffer-deallocation
+              -convert-linalg-to-loops 
+              -convert-scf-to-std 
+              -convert-linalg-to-llvm 
+              -lower-affine 
+              --convert-memref-to-llvm 
+              -convert-std-to-llvm='emit-c-wrappers=1' 
+              -reconcile-unrealized-casts| 
+                ${LLVM_MLIR_BINARY_DIR}/mlir-translate --mlir-to-llvmir |
+                ${LLVM_MLIR_BINARY_DIR}/llc 
+              -mtriple=x86_64-unknown-linux-gnu 
+              -mattr=${BUDDY_OPT_ATTR} 
+              --filetype=obj 
+              -o ${BUDDY_BINARY_DIR}/../benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/pointwise-conv-2d-nhwc-hwcf.o
+)
+
+add_library(PointwiseConv2DNhwcHwcf pointwise-conv-2d-nhwc-hwcf.o)
+
+set_target_properties(PointwiseConv2DNhwcHwcf PROPERTIES LINKER_LANGUAGE CXX)
+
+add_executable(pointwise-conv-2d-nhwc-hwcf-benchmark Main.cpp MLIROptBenchmark.cpp)
+
+target_link_libraries(pointwise-conv-2d-nhwc-hwcf-benchmark
+  GoogleBenchmark
+  PointwiseConv2DNhwcHwcf
+  Container
+)
diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp
new file mode 100644
index 00000000..47de7c68
--- /dev/null
+++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp
@@ -0,0 +1,102 @@
+//===- MLIROptBenchmark.cpp -----------------------------------------------===//
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the benchmark for pointwise conv2d(nhwc-hwcf) operation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Utils/Container.h"
+#include <benchmark/benchmark.h>
+
+// kNanosecond, kMicrosecond, kMillisecond, kSecond.
+#define UNIT benchmark::kNanosecond
+
+namespace {
+
+// Declare the mobilenet C interface.
+extern "C" void
+_mlir_ciface_pointwise_conv_2d_nhwc_hwcf(MemRef<float, 4> *input,
+                                         MemRef<float, 4> *filter,
+                                         MemRef<float, 4> *output);
+extern "C" MemRef<float, 4>
+_mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return(MemRef<float, 4> *input,
+                                                     MemRef<float, 4> *filter);
+
+intptr_t sizesInput[4] = {1, 4, 5, 2};
+intptr_t sizesFilter[4] = {1, 1, 2, 7};
+intptr_t sizesOutput[4] = {1, 4, 5, 7};
+
+// Create input, filter, and output.
+MemRef<float, 4> inputMemRef(sizesInput, 2.0);
+MemRef<float, 4> filterMemRef(sizesFilter, 3.0);
+
+MemRef<float, 4> inputMemReturn(sizesInput, 2.0);
+MemRef<float, 4> filterMemReturn(sizesFilter, 3.0);
+
+MemRef<float, 4> outputMemRef(sizesOutput, 0.0);
+// Define benchmark function.void
+void BM_PointwiseConv2DNhwcHwcf(benchmark::State &state) {
+  for (auto _ : state) {
+    for (int i = 0; i < state.range(0); ++i) {
+      // MemRef<float, 4> outputMemRef(sizesOutput, 0);
+      _mlir_ciface_pointwise_conv_2d_nhwc_hwcf(&inputMemRef, &filterMemRef,
+                                               &outputMemRef);
+    }
+  }
+}
+
+MemRef<float, 4> outputMemReturn(sizesOutput, 0.0);
+void BM_PointwiseConv2DNhwcHwcfReturn(benchmark::State &state) {
+
+  for (auto _ : state) {
+    for (int i = 0; i < state.range(0); ++i) {
+      // MemRef<float, 4> outputMemReturn(sizesOutput, 0);
+      outputMemReturn = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return(
+          &inputMemReturn, &filterMemReturn);
+    }
+  }
+}
+
+} // namespace
+
+// Register benchmarking function with different arguments.
+BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(1)->Unit(UNIT);
+BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(10)->Unit(UNIT);
+BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(100)->Unit(UNIT);
+BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(1)->Unit(UNIT);
+BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(10)->Unit(UNIT);
+BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(100)->Unit(UNIT);
+
+// Print result function.
+void printResult() {
+  // Clear the output memref.
+  MemRef<float, 4> outputMemRef(sizesOutput, 0);
+  // Run the mlir function.
+  _mlir_ciface_pointwise_conv_2d_nhwc_hwcf(&inputMemRef, &filterMemRef,
+                                           &outputMemRef);
+
+  std::cout << "inputMemRef: " << inputMemRef << std::endl;
+  std::cout << "filterMemRef: " << filterMemRef << std::endl;
+
+  std::cout << "outputMemRef: " << outputMemRef << std::endl;
+  // Clear the output memref.
+  MemRef<float, 4> outputMemReturn2(sizesOutput, 0);
+  // Run the mlir function.
+  outputMemReturn2 = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return(
+      &inputMemReturn, &filterMemReturn);
+
+  std::cout << "outputMemReturn: " << outputMemReturn2 << std::endl;
+}
diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/Main.cpp b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/Main.cpp
new file mode 100644
index 00000000..cda5c37b
--- /dev/null
+++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/Main.cpp
@@ -0,0 +1,33 @@
+//===- Main.cpp -----------------------------------------------------------===//
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the main file of the pointwise conv2d(nhwc_hwc) benchmark.
+//
+//===----------------------------------------------------------------------===//
+
+#include <benchmark/benchmark.h>
+
+void printResult();
+
+int main(int argc, char **argv) {
+  // Run benchmarks.
+  ::benchmark::Initialize(&argc, argv);
+  ::benchmark::RunSpecifiedBenchmarks();
+  // Print result.
+  printResult();
+
+  return 0;
+}
diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir
new file mode 100644
index 00000000..00fab281
--- /dev/null
+++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir
@@ -0,0 +1,44 @@
+// Generated from Mobilenet.mlir file
+// func @pointwise_conv_2d_nhwc_hwcf(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>, %output: tensor<1x4x5x7xf32>) {
+//     // %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32>
+//     linalg.conv_2d_nhwc_hwcf 
+//     {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
+//     ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) 
+//     outs(%output : tensor<1x4x5x7xf32>)
+//     return
+// }
+
+// func @pointwise_conv_2d_nhwc_hwcf(%input: tensor<?x?x?x?xf32>, %filter: tensor<1x1x?x?xf32>, %output: tensor<?x?x?x?xf32>) {
+//     linalg.conv_2d_nhwc_hwcf 
+//     {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
+//     ins(%input, %filter : tensor<?x?x?x?xf32>, tensor<1x1x?x?xf32>) 
+//     outs(%output : tensor<?x?x?x?xf32>) 
+//     return
+// }
+//pointwise_conv_2d_nhwc_hwcf_with_return
+// func @pointwise_conv_2d_nhwc_hwcf_with_return(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> {
+//     %1 = linalg.conv_2d_nhwc_hwcf {
+//         dilations = dense<1> : tensor<2xi64>,
+//         strides = dense<1> : tensor<2xi64>
+//     } ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) outs(%0 : tensor<1x4x5x7xf32>) -> tensor<1x4x5x7xf32>
+//     return %1 : tensor<1x4x5x7xf32>
+// }
+
+func @pointwise_conv_2d_nhwc_hwcf_with_return(%arg0: tensor<1x4x5x2xf32>, %arg1: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> {
+    %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32>
+    %1 = linalg.tensor_collapse_shape %arg0 [[0, 1, 2], [3]] : tensor<1x4x5x2xf32> into tensor<20x2xf32>
+    %2 = linalg.tensor_collapse_shape %arg1 [[0, 1, 2], [3]] : tensor<1x1x2x7xf32> into tensor<2x7xf32>
+    %3 = linalg.tensor_collapse_shape %0 [[0, 1, 2], [3]] : tensor<1x4x5x7xf32> into tensor<20x7xf32>
+    %4 = linalg.matmul ins(%1, %2 : tensor<20x2xf32>, tensor<2x7xf32>) outs(%3 : tensor<20x7xf32>) -> tensor<20x7xf32>
+    %5 = linalg.tensor_expand_shape %4 [[0, 1, 2], [3]] : tensor<20x7xf32> into tensor<1x4x5x7xf32>
+    return %5 : tensor<1x4x5x7xf32>
+  }
+
+// generate from iree processed mobilenet mlir file
+func @pointwise_conv_2d_nhwc_hwcf(%input: memref<?x?x?x?xf32>, %filter: memref<1x1x?x?xf32>, %output: memref<?x?x?x?xf32>) {
+    linalg.conv_2d_nhwc_hwcf 
+    {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
+    ins(%input, %filter : memref<?x?x?x?xf32>, memref<1x1x?x?xf32>) 
+    outs(%output : memref<?x?x?x?xf32>) 
+    return
+}
\ No newline at end of file
diff --git a/include/Utils/Container.h b/include/Utils/Container.h
index 14054b84..106c8fc7 100644
--- a/include/Utils/Container.h
+++ b/include/Utils/Container.h
@@ -21,6 +21,7 @@
 #ifndef UTILS_CONTAINER
 #define UTILS_CONTAINER
 
+#include <iostream>
 #include <memory>
 #include <opencv2/opencv.hpp>
 #include <stdint.h>
@@ -46,6 +47,19 @@ template <typename T, size_t N> class MemRef {
   // Constructor from a vector of png images.
   // Assume that all the images have the same shape.
   MemRef(const std::vector<PNGImage> &imgs, intptr_t sizes[N]);
+
+  // Copy assignment and copy constructor
+  MemRef &operator=(const MemRef &rhs) = delete;
+  MemRef(const MemRef &) = delete;
+
+  // Move Assignment and Move Constructor NumberArrays
+  MemRef &operator=(MemRef &&rhs) noexcept;
+  MemRef(MemRef &&) noexcept;
+
+  // String overloading
+  template <typename U, std::size_t M>
+  friend std::ostream &operator<<(std::ostream &os, const MemRef<U, M> &memref);
+
   // Desctrutor.
   ~MemRef();
   // Permute the dimensions.
@@ -74,16 +88,16 @@ template <typename T, size_t N> class MemRef {
   size_t product(intptr_t sizes[N]) const;
 
   // Data.
-  T *allocated;
-  T *aligned;
+  T *allocated{nullptr};
+  T *aligned{nullptr};
   // Offset.
   intptr_t offset = 0;
   // Shape.
-  intptr_t sizes[N];
+  intptr_t sizes[N]{};
   // Strides.
-  intptr_t strides[N];
+  intptr_t strides[N]{};
   // Number of elements.
-  size_t size;
+  size_t size{};
 };
 
 #include "Utils/Container.cpp"
diff --git a/lib/Utils/Container.cpp b/lib/Utils/Container.cpp
index 24b50366..5b96069d 100644
--- a/lib/Utils/Container.cpp
+++ b/lib/Utils/Container.cpp
@@ -56,8 +56,10 @@ MemRef<T, N>::MemRef(intptr_t sizes[N], T init) {
   setStrides();
   size = product(sizes);
   T *data = new T[size];
+
   aligned = data;
   allocated = data;
+
   std::fill(data, data + size, init);
 }
 
@@ -134,7 +136,7 @@ MemRef<T, N>::MemRef(const PNGImage &img, intptr_t sizes[N]) {
   size_t height = img.height;
   size_t width = img.width;
   size = channels * height * width;
-  T *data = new T[size];
+  T *data = new T[size]{0};
   for (size_t h = 0; h < height; h++) {
     for (size_t w = 0; w < width; w++) {
       for (size_t c = 0; c < channels; c++) {
@@ -187,6 +189,59 @@ MemRef<T, N>::MemRef(const std::vector<PNGImage> &imgs, intptr_t sizes[N]) {
   allocated = data;
 }
 
+// Move Assignment and Move Constructor
+template <typename T, std::size_t N>
+MemRef<T, N>::MemRef(MemRef &&other) noexcept
+    : size(std::move(other.size)), sizes{}, strides{},
+      allocated(other.allocated), aligned(other.allocated) {
+  std::swap(sizes, other.sizes);
+  std::swap(strides, other.strides);
+  other.allocated = other.aligned = nullptr;
+  std::cout << "move ctor" << std::endl;
+}
+
+template <typename T, std::size_t N>
+MemRef<T, N> &MemRef<T, N>::operator=(MemRef<T, N> &&rhs) noexcept {
+  if (this != &rhs) {
+    // intptr_t sizes[N]{};
+    // intptr_t strides[N]{};
+    // // method 1
+    // allocated = aligned = nullptr;
+    // std::swap(strides, rhs.strides);
+    // std::swap(sizes, rhs.sizes);
+    // aligned = allocated = std::move(rhs.allocated);
+    // rhs.allocated = rhs.aligned = nullptr;
+    // rhs.sizes
+
+    // method 2 handle by std::swap
+    std::swap(strides, rhs.strides);
+    std::swap(offset, rhs.offset);
+    std::swap(sizes, rhs.sizes);
+    std::swap(size, rhs.size);
+    std::swap(allocated, rhs.allocated);
+    std::swap(aligned, rhs.aligned);
+    rhs.allocated = rhs.aligned = nullptr;
+    std::fill(rhs.strides, rhs.strides + N, 0);
+    std::fill(rhs.sizes, rhs.sizes + N, 0);
+  }
+  // std::cout << "move assign ctor" << std::endl;
+  return *this;
+}
+
+template <typename T, std::size_t N>
+std::ostream &operator<<(std::ostream &os, const MemRef<T, N> &memref) {
+  os << "[ ";
+  size_t size =
+      std::accumulate(memref.sizes, memref.sizes + N, 1, std::multiplies<T>());
+  for (int i = 0; i < size; ++i)
+    os << memref.allocated[i] << " ";
+  os << "] of shape: [ ";
+  for (auto s : memref.sizes)
+    os << s << " ";
+  os << "]";
+  return os;
+}
+
 template <typename T, std::size_t N> MemRef<T, N>::~MemRef() {
   delete[] allocated;
 }

From 770d4e76821bb664d64bf9bfdfd35d3ed572de00 Mon Sep 17 00:00:00 2001
From: Joejiong <1004691415@qq.com>
Date: Thu, 16 Dec 2021 18:49:44 +0800
Subject: [PATCH 2/5] modify comments

---
 .../MLIROptBenchmark.cpp                      |  1 -
 .../PointwiseConv2DNhwcHwcf.mlir              | 46 +++++++++----------
 lib/Utils/Container.cpp                       |  4 --
 3 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp
index 47de7c68..d08317fb 100644
--- a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp
+++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp
@@ -60,7 +60,6 @@ void BM_PointwiseConv2DNhwcHwcf(benchmark::State &state) {
 
 MemRef<float, 4> outputMemReturn(sizesOutput, 0.0);
 void BM_PointwiseConv2DNhwcHwcfReturn(benchmark::State &state) {
-
   for (auto _ : state) {
     for (int i = 0; i < state.range(0); ++i) {
       // MemRef<float, 4> outputMemReturn(sizesOutput, 0);
diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir
index 00fab281..c921bb46 100644
--- a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir
+++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir
@@ -1,28 +1,28 @@
 // Generated from Mobilenet.mlir file
-// func @pointwise_conv_2d_nhwc_hwcf(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>, %output: tensor<1x4x5x7xf32>) {
-//     // %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32>
-//     linalg.conv_2d_nhwc_hwcf 
-//     {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
-//     ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) 
-//     outs(%output : tensor<1x4x5x7xf32>)
-//     return
-// }
+func @pointwise_conv_2d_nhwc_hwcf_with_specified_shape(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>, %output: tensor<1x4x5x7xf32>) {
+    // %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32>
+    linalg.conv_2d_nhwc_hwcf 
+    {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
+    ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) 
+    outs(%output : tensor<1x4x5x7xf32>)
+    return
+}
+
+func @pointwise_conv_2d_nhwc_hwcf_with_tensor(%input: tensor<?x?x?x?xf32>, %filter: tensor<1x1x?x?xf32>, %output: tensor<?x?x?x?xf32>) {
+    linalg.conv_2d_nhwc_hwcf 
+    {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
+    ins(%input, %filter : tensor<?x?x?x?xf32>, tensor<1x1x?x?xf32>) 
+    outs(%output : tensor<?x?x?x?xf32>) 
+    return
+}
 
-// func @pointwise_conv_2d_nhwc_hwcf(%input: tensor<?x?x?x?xf32>, %filter: tensor<1x1x?x?xf32>, %output: tensor<?x?x?x?xf32>) {
-//     linalg.conv_2d_nhwc_hwcf 
-//     {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
-//     ins(%input, %filter : tensor<?x?x?x?xf32>, tensor<1x1x?x?xf32>) 
-//     outs(%output : tensor<?x?x?x?xf32>) 
-//     return
-// }
-//pointwise_conv_2d_nhwc_hwcf_with_return
-// func @pointwise_conv_2d_nhwc_hwcf_with_return(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> {
-//     %1 = linalg.conv_2d_nhwc_hwcf {
-//         dilations = dense<1> : tensor<2xi64>,
-//         strides = dense<1> : tensor<2xi64>
-//     } ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) outs(%0 : tensor<1x4x5x7xf32>) -> tensor<1x4x5x7xf32>
-//     return %1 : tensor<1x4x5x7xf32>
-// }
+func @pointwise_conv_2d_nhwc_hwcf_with_return_origin(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> {
+    %1 = linalg.conv_2d_nhwc_hwcf {
+        dilations = dense<1> : tensor<2xi64>,
+        strides = dense<1> : tensor<2xi64>
+    } ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) outs(%0 : tensor<1x4x5x7xf32>) -> tensor<1x4x5x7xf32>
+    return %1 : tensor<1x4x5x7xf32>
+}
 
 func @pointwise_conv_2d_nhwc_hwcf_with_return(%arg0: tensor<1x4x5x2xf32>, %arg1: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> {
     %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32>
diff --git a/lib/Utils/Container.cpp b/lib/Utils/Container.cpp
index 5b96069d..be984299 100644
--- a/lib/Utils/Container.cpp
+++ b/lib/Utils/Container.cpp
@@ -56,10 +56,8 @@ MemRef<T, N>::MemRef(intptr_t sizes[N], T init) {
   setStrides();
   size = product(sizes);
   T *data = new T[size];
-
   aligned = data;
   allocated = data;
-
   std::fill(data, data + size, init);
 }
 
@@ -203,8 +201,6 @@ MemRef<T, N>::MemRef(MemRef &&other) noexcept
 template <typename T, std::size_t N>
 MemRef<T, N> &MemRef<T, N>::operator=(MemRef<T, N> &&rhs) noexcept {
   if (this != &rhs) {
-    // intptr_t sizes[N]{};
-    // intptr_t strides[N]{};
     // // method 1
     // allocated = aligned = nullptr;
     // std::swap(strides, rhs.strides);

From d610645cc7bcdc6809bd042baafa3eef8bc29ce9 Mon Sep 17 00:00:00 2001
From: Joejiong <1004691415@qq.com>
Date: Mon, 20 Dec 2021 14:50:43 +0800
Subject: [PATCH 3/5] add original mlir benchmark

---
 .../MLIROptBenchmark.cpp                      | 73 +++++++++++++------
 .../PointwiseConv2DNhwcHwcf.mlir              | 47 ++++++------
 lib/Utils/Container.cpp                       |  4 +-
 3 files changed, 75 insertions(+), 49 deletions(-)

diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp
index d08317fb..8fc9cb6f 100644
--- a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp
+++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp
@@ -22,31 +22,34 @@
 #include <benchmark/benchmark.h>
 
 // kNanosecond, kMicrosecond, kMillisecond, kSecond.
-#define UNIT benchmark::kNanosecond
+#define UNIT benchmark::kMillisecond
 
 namespace {
 
 // Declare the mobilenet C interface.
 extern "C" void
-_mlir_ciface_pointwise_conv_2d_nhwc_hwcf(MemRef<float, 4> *input,
-                                         MemRef<float, 4> *filter,
+_mlir_ciface_pointwise_conv_2d_nhwc_hwcf(const MemRef<float, 4> *input,
+                                         const MemRef<float, 4> *filter,
                                          MemRef<float, 4> *output);
 extern "C" MemRef<float, 4>
-_mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return(MemRef<float, 4> *input,
-                                                     MemRef<float, 4> *filter);
+_mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return(
+    const MemRef<float, 4> *input, const MemRef<float, 4> *filter);
+extern "C" MemRef<float, 4>
+_mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return_origin(
+    const MemRef<float, 4> *input, const MemRef<float, 4> *filter);
 
 intptr_t sizesInput[4] = {1, 4, 5, 2};
 intptr_t sizesFilter[4] = {1, 1, 2, 7};
 intptr_t sizesOutput[4] = {1, 4, 5, 7};
 
 // Create input, filter, and output.
-MemRef<float, 4> inputMemRef(sizesInput, 2.0);
-MemRef<float, 4> filterMemRef(sizesFilter, 3.0);
+MemRef<float, 4> inputMemRef(sizesInput, 2);
+MemRef<float, 4> filterMemRef(sizesFilter, 3);
 
-MemRef<float, 4> inputMemReturn(sizesInput, 2.0);
-MemRef<float, 4> filterMemReturn(sizesFilter, 3.0);
+MemRef<float, 4> inputMemReturn(sizesInput, 2);
+MemRef<float, 4> filterMemReturn(sizesFilter, 3);
 
-MemRef<float, 4> outputMemRef(sizesOutput, 0.0);
+MemRef<float, 4> outputMemRef(sizesOutput, 0);
 // Define benchmark function.void
 void BM_PointwiseConv2DNhwcHwcf(benchmark::State &state) {
   for (auto _ : state) {
@@ -58,7 +61,7 @@ void BM_PointwiseConv2DNhwcHwcf(benchmark::State &state) {
   }
 }
 
-MemRef<float, 4> outputMemReturn(sizesOutput, 0.0);
+MemRef<float, 4> outputMemReturn(sizesOutput, 0);
 void BM_PointwiseConv2DNhwcHwcfReturn(benchmark::State &state) {
   for (auto _ : state) {
     for (int i = 0; i < state.range(0); ++i) {
@@ -69,15 +72,19 @@ void BM_PointwiseConv2DNhwcHwcfReturn(benchmark::State &state) {
   }
 }
 
-} // namespace
+MemRef<float, 4> outputMemReturnOrigin(sizesOutput, 0);
+void BM_PointwiseConv2DNhwcHwcfReturnOrigin(benchmark::State &state) {
+  for (auto _ : state) {
+    for (int i = 0; i < state.range(0); ++i) {
+      // MemRef<float, 4> outputMemReturn(sizesOutput, 0);
+      outputMemReturnOrigin =
+          _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return_origin(
+              &inputMemReturn, &filterMemReturn);
+    }
+  }
+}
 
-// Register benchmarking function with different arguments.
-BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(1)->Unit(UNIT);
-BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(10)->Unit(UNIT);
-BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(100)->Unit(UNIT);
-BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(1)->Unit(UNIT);
-BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(10)->Unit(UNIT);
-BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(100)->Unit(UNIT);
+} // namespace
 
 // Print result function.
 void printResult() {
@@ -89,13 +96,33 @@ void printResult() {
 
   std::cout << "inputMemRef: " << inputMemRef << std::endl;
   std::cout << "filterMemRef: " << filterMemRef << std::endl;
-
   std::cout << "outputMemRef: " << outputMemRef << std::endl;
   // Clear the output memref.
-  MemRef<float, 4> outputMemReturn2(sizesOutput, 0);
+  MemRef<float, 4> outputMemReturn(sizesOutput, 0);
   // Run the mlir function.
-  outputMemReturn2 = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return(
+  outputMemReturn = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return(
       &inputMemReturn, &filterMemReturn);
+  std::cout << "inputMemReturn: " << inputMemReturn << std::endl;
+  std::cout << "filterMemReturn: " << filterMemReturn << std::endl;
+  std::cout << "outputMemReturn: " << outputMemReturn << std::endl;
 
-  std::cout << "outputMemReturn: " << outputMemReturn2 << std::endl;
+  // Clear the output memref.
+  MemRef<float, 4> outputMemReturnOrigin(sizesOutput, 0);
+  // Run the mlir function.
+  outputMemReturnOrigin = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return(
+      &inputMemReturn, &filterMemReturn);
+  std::cout << "inputMemReturn: " << inputMemReturn << std::endl;
+  std::cout << "filterMemReturn: " << filterMemReturn << std::endl;
+  std::cout << "outputMemReturnOrigin: " << outputMemReturnOrigin << std::endl;
 }
+
+// Register benchmarking function with different arguments.
+// BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(1)->Unit(UNIT);
+// BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(10)->Unit(UNIT);
+BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(100)->Unit(UNIT);
+// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(1)->Unit(UNIT);
+// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(10)->Unit(UNIT);
+BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(100)->Unit(UNIT);
+// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(1)->Unit(UNIT);
+// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(10)->Unit(UNIT);
+BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(100)->Unit(UNIT);
diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir
index c921bb46..e8eb1372 100644
--- a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir
+++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir
@@ -1,22 +1,6 @@
 // Generated from Mobilenet.mlir file
-func @pointwise_conv_2d_nhwc_hwcf_with_specified_shape(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>, %output: tensor<1x4x5x7xf32>) {
-    // %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32>
-    linalg.conv_2d_nhwc_hwcf 
-    {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
-    ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) 
-    outs(%output : tensor<1x4x5x7xf32>)
-    return
-}
-
-func @pointwise_conv_2d_nhwc_hwcf_with_tensor(%input: tensor<?x?x?x?xf32>, %filter: tensor<1x1x?x?xf32>, %output: tensor<?x?x?x?xf32>) {
-    linalg.conv_2d_nhwc_hwcf 
-    {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
-    ins(%input, %filter : tensor<?x?x?x?xf32>, tensor<1x1x?x?xf32>) 
-    outs(%output : tensor<?x?x?x?xf32>) 
-    return
-}
-
 func @pointwise_conv_2d_nhwc_hwcf_with_return_origin(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> {
+    %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32>
     %1 = linalg.conv_2d_nhwc_hwcf {
         dilations = dense<1> : tensor<2xi64>,
         strides = dense<1> : tensor<2xi64>
@@ -26,13 +10,13 @@ func @pointwise_conv_2d_nhwc_hwcf_with_return_origin(%input: tensor<1x4x5x2xf32>
 
 func @pointwise_conv_2d_nhwc_hwcf_with_return(%arg0: tensor<1x4x5x2xf32>, %arg1: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> {
     %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32>
-    %1 = linalg.tensor_collapse_shape %arg0 [[0, 1, 2], [3]] : tensor<1x4x5x2xf32> into tensor<20x2xf32>
-    %2 = linalg.tensor_collapse_shape %arg1 [[0, 1, 2], [3]] : tensor<1x1x2x7xf32> into tensor<2x7xf32>
-    %3 = linalg.tensor_collapse_shape %0 [[0, 1, 2], [3]] : tensor<1x4x5x7xf32> into tensor<20x7xf32>
+    %1 = tensor.collapse_shape %arg0 [[0, 1, 2], [3]] : tensor<1x4x5x2xf32> into tensor<20x2xf32>
+    %2 = tensor.collapse_shape %arg1 [[0, 1, 2], [3]] : tensor<1x1x2x7xf32> into tensor<2x7xf32>
+    %3 = tensor.collapse_shape %0 [[0, 1, 2], [3]] : tensor<1x4x5x7xf32> into tensor<20x7xf32>
     %4 = linalg.matmul ins(%1, %2 : tensor<20x2xf32>, tensor<2x7xf32>) outs(%3 : tensor<20x7xf32>) -> tensor<20x7xf32>
-    %5 = linalg.tensor_expand_shape %4 [[0, 1, 2], [3]] : tensor<20x7xf32> into tensor<1x4x5x7xf32>
+    %5 = tensor.expand_shape %4 [[0, 1, 2], [3]] : tensor<20x7xf32> into tensor<1x4x5x7xf32>
     return %5 : tensor<1x4x5x7xf32>
-  }
+}
 
 // generate from iree processed mobilenet mlir file
 func @pointwise_conv_2d_nhwc_hwcf(%input: memref<?x?x?x?xf32>, %filter: memref<1x1x?x?xf32>, %output: memref<?x?x?x?xf32>) {
@@ -41,4 +25,21 @@ func @pointwise_conv_2d_nhwc_hwcf(%input: memref<?x?x?x?xf32>, %filter: memref<1
     ins(%input, %filter : memref<?x?x?x?xf32>, memref<1x1x?x?xf32>) 
     outs(%output : memref<?x?x?x?xf32>) 
     return
-}
\ No newline at end of file
+}
+
+// test for specific shape
+// func @pointwise_conv_2d_nhwc_hwcf(%input: memref<1x4x5x2xf32>, %filter: memref<1x1x2x7xf32>, %output: memref<1x4x5x7xf32>) {
+//     linalg.conv_2d_nhwc_hwcf 
+//     {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
+//     ins(%input, %filter : memref<1x4x5x2xf32>, memref<1x1x2x7xf32>) 
+//     outs(%output : memref<1x4x5x7xf32>) 
+//     return
+// }
+
+// func @pointwise_conv_2d_nhwc_hwcf(%output: tensor<1x4x5x7xf32>, %input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) {
+//     linalg.conv_2d_nhwc_hwcf 
+//     {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
+//     ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) 
+//     outs(%output : tensor<1x4x5x7xf32>) 
+//     return
+// }
diff --git a/lib/Utils/Container.cpp b/lib/Utils/Container.cpp
index be984299..d07994ff 100644
--- a/lib/Utils/Container.cpp
+++ b/lib/Utils/Container.cpp
@@ -195,7 +195,7 @@ MemRef<T, N>::MemRef(MemRef &&other) noexcept
   std::swap(sizes, other.sizes);
   std::swap(strides, other.strides);
   other.allocated = other.aligned = nullptr;
-  std::cout << "move ctor" << std::endl;
+  // std::cout << "move ctor" << std::endl;
 }
 
 template <typename T, std::size_t N>
@@ -217,8 +217,6 @@ MemRef<T, N> &MemRef<T, N>::operator=(MemRef<T, N> &&rhs) noexcept {
     std::swap(allocated, rhs.allocated);
     std::swap(aligned, rhs.aligned);
     rhs.allocated = rhs.aligned = nullptr;
-    std::fill(rhs.strides, rhs.strides + N, 0);
-    std::fill(rhs.sizes, rhs.sizes + N, 0);
   }
   // std::cout << "move assign ctor" << std::endl;
   return *this;

From 39d7e0cdf1e8ccd94868ed546c24a325782619ec Mon Sep 17 00:00:00 2001
From: Joejiong <1004691415@qq.com>
Date: Mon, 20 Dec 2021 18:37:39 +0800
Subject: [PATCH 4/5] review ready

---
 .../MLIROptBenchmark.cpp                      | 21 ++++++------------
 .../PointwiseConv2DNhwcHwcf.mlir              | 22 ++++++-------------
 lib/Utils/Container.cpp                       | 15 ++-----------
 3 files changed, 16 insertions(+), 42 deletions(-)

diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp
index 8fc9cb6f..d87fd27d 100644
--- a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp
+++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp
@@ -23,6 +23,7 @@
 
 // kNanosecond, kMicrosecond, kMillisecond, kSecond.
 #define UNIT benchmark::kMillisecond
+#define ITERATION 100
 
 namespace {
 
@@ -83,7 +84,10 @@ void BM_PointwiseConv2DNhwcHwcfReturnOrigin(benchmark::State &state) {
     }
   }
 }
-
+// Register benchmarking function with different arguments.
+BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(ITERATION)->Unit(UNIT);
+BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(ITERATION)->Unit(UNIT);
+BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(ITERATION)->Unit(UNIT);
 } // namespace
 
 // Print result function.
@@ -97,16 +101,16 @@ void printResult() {
   std::cout << "inputMemRef: " << inputMemRef << std::endl;
   std::cout << "filterMemRef: " << filterMemRef << std::endl;
   std::cout << "outputMemRef: " << outputMemRef << std::endl;
-  // Clear the output memref.
+
   MemRef<float, 4> outputMemReturn(sizesOutput, 0);
   // Run the mlir function.
   outputMemReturn = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return(
       &inputMemReturn, &filterMemReturn);
+
   std::cout << "inputMemReturn: " << inputMemReturn << std::endl;
   std::cout << "filterMemReturn: " << filterMemReturn << std::endl;
   std::cout << "outputMemReturn: " << outputMemReturn << std::endl;
 
-  // Clear the output memref.
   MemRef<float, 4> outputMemReturnOrigin(sizesOutput, 0);
   // Run the mlir function.
   outputMemReturnOrigin = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return(
@@ -115,14 +119,3 @@ void printResult() {
   std::cout << "filterMemReturn: " << filterMemReturn << std::endl;
   std::cout << "outputMemReturnOrigin: " << outputMemReturnOrigin << std::endl;
 }
-
-// Register benchmarking function with different arguments.
-// BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(1)->Unit(UNIT);
-// BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(10)->Unit(UNIT);
-BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(100)->Unit(UNIT);
-// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(1)->Unit(UNIT);
-// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(10)->Unit(UNIT);
-BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(100)->Unit(UNIT);
-// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(1)->Unit(UNIT);
-// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(10)->Unit(UNIT);
-BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(100)->Unit(UNIT);
diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir
index e8eb1372..5c173bf3 100644
--- a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir
+++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir
@@ -28,18 +28,10 @@ func @pointwise_conv_2d_nhwc_hwcf(%input: memref<?x?x?x?xf32>, %filter: memref<1
 }
 
 // test for specific shape
-// func @pointwise_conv_2d_nhwc_hwcf(%input: memref<1x4x5x2xf32>, %filter: memref<1x1x2x7xf32>, %output: memref<1x4x5x7xf32>) {
-//     linalg.conv_2d_nhwc_hwcf 
-//     {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
-//     ins(%input, %filter : memref<1x4x5x2xf32>, memref<1x1x2x7xf32>) 
-//     outs(%output : memref<1x4x5x7xf32>) 
-//     return
-// }
-
-// func @pointwise_conv_2d_nhwc_hwcf(%output: tensor<1x4x5x7xf32>, %input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) {
-//     linalg.conv_2d_nhwc_hwcf 
-//     {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
-//     ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) 
-//     outs(%output : tensor<1x4x5x7xf32>) 
-//     return
-// }
+func @pointwise_conv_2d_nhwc_hwcf_spec(%input: memref<1x4x5x2xf32>, %filter: memref<1x1x2x7xf32>, %output: memref<1x4x5x7xf32>) {
+    linalg.conv_2d_nhwc_hwcf 
+    {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} 
+    ins(%input, %filter : memref<1x4x5x2xf32>, memref<1x1x2x7xf32>) 
+    outs(%output : memref<1x4x5x7xf32>) 
+    return
+}
diff --git a/lib/Utils/Container.cpp b/lib/Utils/Container.cpp
index d07994ff..4d0539b7 100644
--- a/lib/Utils/Container.cpp
+++ b/lib/Utils/Container.cpp
@@ -55,7 +55,7 @@ MemRef<T, N>::MemRef(intptr_t sizes[N], T init) {
   }
   setStrides();
   size = product(sizes);
-  T *data = new T[size];
+  auto data = new T[size]{};
   aligned = data;
   allocated = data;
   std::fill(data, data + size, init);
@@ -134,7 +134,7 @@ MemRef<T, N>::MemRef(const PNGImage &img, intptr_t sizes[N]) {
   size_t height = img.height;
   size_t width = img.width;
   size = channels * height * width;
-  T *data = new T[size]{0};
+  T *data = new T[size];
   for (size_t h = 0; h < height; h++) {
     for (size_t w = 0; w < width; w++) {
       for (size_t c = 0; c < channels; c++) {
@@ -195,21 +195,11 @@ MemRef<T, N>::MemRef(MemRef &&other) noexcept
   std::swap(sizes, other.sizes);
   std::swap(strides, other.strides);
   other.allocated = other.aligned = nullptr;
-  // std::cout << "move ctor" << std::endl;
 }
 
 template <typename T, std::size_t N>
 MemRef<T, N> &MemRef<T, N>::operator=(MemRef<T, N> &&rhs) noexcept {
   if (this != &rhs) {
-    // // method 1
-    // allocated = aligned = nullptr;
-    // std::swap(strides, rhs.strides);
-    // std::swap(sizes, rhs.sizes);
-    // aligned = allocated = std::move(rhs.allocated);
-    // rhs.allocated = rhs.aligned = nullptr;
-    // rhs.sizes
-
-    // method 2 handle by std::swap
     std::swap(strides, rhs.strides);
     std::swap(offset, rhs.offset);
     std::swap(sizes, rhs.sizes);
@@ -218,7 +208,6 @@ MemRef<T, N> &MemRef<T, N>::operator=(MemRef<T, N> &&rhs) noexcept {
     std::swap(aligned, rhs.aligned);
     rhs.allocated = rhs.aligned = nullptr;
   }
-  // std::cout << "move assign ctor" << std::endl;
   return *this;
 }
 

From 18e39c6f41282879fdd61eecfe8a8341f3eea637 Mon Sep 17 00:00:00 2001
From: Joejiong <1004691415@qq.com>
Date: Mon, 20 Dec 2021 18:45:40 +0800
Subject: [PATCH 5/5] ush for review

---
 lib/Utils/Container.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Utils/Container.cpp b/lib/Utils/Container.cpp
index 4d0539b7..685de22b 100644
--- a/lib/Utils/Container.cpp
+++ b/lib/Utils/Container.cpp
@@ -55,7 +55,7 @@ MemRef<T, N>::MemRef(intptr_t sizes[N], T init) {
   }
   setStrides();
   size = product(sizes);
-  auto data = new T[size]{};
+  T* data = new T[size]{};
   aligned = data;
   allocated = data;
   std::fill(data, data + size, init);