From 860f9d09de4da5ee0e7adcb898cfe4496af2904f Mon Sep 17 00:00:00 2001 From: Joejiong <1004691415@qq.com> Date: Thu, 16 Dec 2021 18:38:47 +0800 Subject: [PATCH 1/5] eexperiment with mlir return format --- benchmarks/DeepLearning/Ops/CMakeLists.txt | 1 + .../PointwiseConv2DNhwcHwcfOp/CMakeLists.txt | 38 +++++++ .../MLIROptBenchmark.cpp | 102 ++++++++++++++++++ .../Ops/PointwiseConv2DNhwcHwcfOp/Main.cpp | 33 ++++++ .../PointwiseConv2DNhwcHwcf.mlir | 44 ++++++++ include/Utils/Container.h | 24 ++++- lib/Utils/Container.cpp | 57 +++++++++- 7 files changed, 293 insertions(+), 6 deletions(-) create mode 100644 benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/CMakeLists.txt create mode 100644 benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp create mode 100644 benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/Main.cpp create mode 100644 benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir diff --git a/benchmarks/DeepLearning/Ops/CMakeLists.txt b/benchmarks/DeepLearning/Ops/CMakeLists.txt index 9b283d61..8b8cc99a 100644 --- a/benchmarks/DeepLearning/Ops/CMakeLists.txt +++ b/benchmarks/DeepLearning/Ops/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(DepthwiseConv2DNhwcHwcOp) add_subdirectory(Conv2DNhwcHwcfOp) add_subdirectory(Conv2DNchwFchwOp) +add_subdirectory(PointwiseConv2DNhwcHwcfOp) diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/CMakeLists.txt new file mode 100644 index 00000000..7e483980 --- /dev/null +++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/CMakeLists.txt @@ -0,0 +1,38 @@ +set(BUDDY_OPT_ATTR avx512f) +set(LLVM_MLIR_BINARY_DIR ${BUDDY_OPT_BUILD_DIR}/../llvm/build/bin) + +add_custom_command(OUTPUT pointwise-conv-2d-nhwc-hwcf.o + COMMAND ${LLVM_MLIR_BINARY_DIR}/mlir-opt ${BUDDY_SOURCE_DIR}/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir + -linalg-bufferize + -std-bufferize + -tensor-constant-bufferize + -tensor-bufferize + -func-bufferize + -finalizing-bufferize + -buffer-deallocation + -convert-linalg-to-loops + -convert-scf-to-std + -convert-linalg-to-llvm + -lower-affine + --convert-memref-to-llvm + -convert-std-to-llvm='emit-c-wrappers=1' + -reconcile-unrealized-casts| + ${LLVM_MLIR_BINARY_DIR}/mlir-translate --mlir-to-llvmir | + ${LLVM_MLIR_BINARY_DIR}/llc + -mtriple=x86_64-unknown-linux-gnu + -mattr=${BUDDY_OPT_ATTR} + --filetype=obj + -o ${BUDDY_BINARY_DIR}/../benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/pointwise-conv-2d-nhwc-hwcf.o +) + +add_library(PointwiseConv2DNhwcHwcf pointwise-conv-2d-nhwc-hwcf.o) + +set_target_properties(PointwiseConv2DNhwcHwcf PROPERTIES LINKER_LANGUAGE CXX) + +add_executable(pointwise-conv-2d-nhwc-hwcf-benchmark Main.cpp MLIROptBenchmark.cpp) + +target_link_libraries(pointwise-conv-2d-nhwc-hwcf-benchmark + GoogleBenchmark + PointwiseConv2DNhwcHwcf + Container +) diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp new file mode 100644 index 00000000..47de7c68 --- /dev/null +++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp @@ -0,0 +1,102 @@ +//===- MLIROptBenchmark.cpp -----------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// This file implements the benchmark for pointwise conv2d(nhwc-hwcf) operation. +// +//===----------------------------------------------------------------------===// + +#include "Utils/Container.h" +#include + +// kNanosecond, kMicrosecond, kMillisecond, kSecond. +#define UNIT benchmark::kNanosecond + +namespace { + +// Declare the mobilenet C interface. +extern "C" void +_mlir_ciface_pointwise_conv_2d_nhwc_hwcf(MemRef *input, + MemRef *filter, + MemRef *output); +extern "C" MemRef +_mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return(MemRef *input, + MemRef *filter); + +intptr_t sizesInput[4] = {1, 4, 5, 2}; +intptr_t sizesFilter[4] = {1, 1, 2, 7}; +intptr_t sizesOutput[4] = {1, 4, 5, 7}; + +// Create input, filter, and output. +MemRef inputMemRef(sizesInput, 2.0); +MemRef filterMemRef(sizesFilter, 3.0); + +MemRef inputMemReturn(sizesInput, 2.0); +MemRef filterMemReturn(sizesFilter, 3.0); + +MemRef outputMemRef(sizesOutput, 0.0); +// Define benchmark function.void +void BM_PointwiseConv2DNhwcHwcf(benchmark::State &state) { + for (auto _ : state) { + for (int i = 0; i < state.range(0); ++i) { + // MemRef outputMemRef(sizesOutput, 0); + _mlir_ciface_pointwise_conv_2d_nhwc_hwcf(&inputMemRef, &filterMemRef, + &outputMemRef); + } + } +} + +MemRef outputMemReturn(sizesOutput, 0.0); +void BM_PointwiseConv2DNhwcHwcfReturn(benchmark::State &state) { + + for (auto _ : state) { + for (int i = 0; i < state.range(0); ++i) { + // MemRef outputMemReturn(sizesOutput, 0); + outputMemReturn = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return( + &inputMemReturn, &filterMemReturn); + } + } +} + +} // namespace + +// Register benchmarking function with different arguments. +BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(1)->Unit(UNIT); +BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(10)->Unit(UNIT); +BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(100)->Unit(UNIT); +BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(1)->Unit(UNIT); +BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(10)->Unit(UNIT); +BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(100)->Unit(UNIT); + +// Print result function. +void printResult() { + // Clear the output memref. + MemRef outputMemRef(sizesOutput, 0); + // Run the mlir function. + _mlir_ciface_pointwise_conv_2d_nhwc_hwcf(&inputMemRef, &filterMemRef, + &outputMemRef); + + std::cout << "inputMemRef: " << inputMemRef << std::endl; + std::cout << "filterMemRef: " << filterMemRef << std::endl; + + std::cout << "outputMemRef: " << outputMemRef << std::endl; + // Clear the output memref. + MemRef outputMemReturn2(sizesOutput, 0); + // Run the mlir function. + outputMemReturn2 = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return( + &inputMemReturn, &filterMemReturn); + + std::cout << "outputMemReturn: " << outputMemReturn2 << std::endl; +} diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/Main.cpp b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/Main.cpp new file mode 100644 index 00000000..cda5c37b --- /dev/null +++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/Main.cpp @@ -0,0 +1,33 @@ +//===- Main.cpp -----------------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// This is the main file of the pointwise conv2d(nhwc_hwc) benchmark. +// +//===----------------------------------------------------------------------===// + +#include + +void printResult(); + +int main(int argc, char **argv) { + // Run benchmarks. + ::benchmark::Initialize(&argc, argv); + ::benchmark::RunSpecifiedBenchmarks(); + // Print result. + printResult(); + + return 0; +} diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir new file mode 100644 index 00000000..00fab281 --- /dev/null +++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir @@ -0,0 +1,44 @@ +// Generated from Mobilenet.mlir file +// func @pointwise_conv_2d_nhwc_hwcf(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>, %output: tensor<1x4x5x7xf32>) { +// // %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32> +// linalg.conv_2d_nhwc_hwcf +// {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} +// ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) +// outs(%output : tensor<1x4x5x7xf32>) +// return +// } + +// func @pointwise_conv_2d_nhwc_hwcf(%input: tensor, %filter: tensor<1x1x?x?xf32>, %output: tensor) { +// linalg.conv_2d_nhwc_hwcf +// {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} +// ins(%input, %filter : tensor, tensor<1x1x?x?xf32>) +// outs(%output : tensor) +// return +// } +//pointwise_conv_2d_nhwc_hwcf_with_return +// func @pointwise_conv_2d_nhwc_hwcf_with_return(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> { +// %1 = linalg.conv_2d_nhwc_hwcf { +// dilations = dense<1> : tensor<2xi64>, +// strides = dense<1> : tensor<2xi64> +// } ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) outs(%0 : tensor<1x4x5x7xf32>) -> tensor<1x4x5x7xf32> +// return %1 : tensor<1x4x5x7xf32> +// } + +func @pointwise_conv_2d_nhwc_hwcf_with_return(%arg0: tensor<1x4x5x2xf32>, %arg1: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> { + %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32> + %1 = linalg.tensor_collapse_shape %arg0 [[0, 1, 2], [3]] : tensor<1x4x5x2xf32> into tensor<20x2xf32> + %2 = linalg.tensor_collapse_shape %arg1 [[0, 1, 2], [3]] : tensor<1x1x2x7xf32> into tensor<2x7xf32> + %3 = linalg.tensor_collapse_shape %0 [[0, 1, 2], [3]] : tensor<1x4x5x7xf32> into tensor<20x7xf32> + %4 = linalg.matmul ins(%1, %2 : tensor<20x2xf32>, tensor<2x7xf32>) outs(%3 : tensor<20x7xf32>) -> tensor<20x7xf32> + %5 = linalg.tensor_expand_shape %4 [[0, 1, 2], [3]] : tensor<20x7xf32> into tensor<1x4x5x7xf32> + return %5 : tensor<1x4x5x7xf32> + } + +// generate from iree processed mobilenet mlir file +func @pointwise_conv_2d_nhwc_hwcf(%input: memref, %filter: memref<1x1x?x?xf32>, %output: memref) { + linalg.conv_2d_nhwc_hwcf + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins(%input, %filter : memref, memref<1x1x?x?xf32>) + outs(%output : memref) + return +} \ No newline at end of file diff --git a/include/Utils/Container.h b/include/Utils/Container.h index 14054b84..106c8fc7 100644 --- a/include/Utils/Container.h +++ b/include/Utils/Container.h @@ -21,6 +21,7 @@ #ifndef UTILS_CONTAINER #define UTILS_CONTAINER +#include #include #include #include @@ -46,6 +47,19 @@ template class MemRef { // Constructor from a vector of png images. // Assume that all the images have the same shape. MemRef(const std::vector &imgs, intptr_t sizes[N]); + + // Copy assignment and copy constructor + MemRef &operator=(const MemRef &rhs) = delete; + MemRef(const MemRef &) = delete; + + // Move Assignment and Move Constructor NumberArrays + MemRef &operator=(MemRef &&rhs) noexcept; + MemRef(MemRef &&) noexcept; + + // String overloading + template + friend std::ostream &operator<<(std::ostream &os, const MemRef &memref); + // Desctrutor. ~MemRef(); // Permute the dimensions. @@ -74,16 +88,16 @@ template class MemRef { size_t product(intptr_t sizes[N]) const; // Data. - T *allocated; - T *aligned; + T *allocated{nullptr}; + T *aligned{nullptr}; // Offset. intptr_t offset = 0; // Shape. - intptr_t sizes[N]; + intptr_t sizes[N]{}; // Strides. - intptr_t strides[N]; + intptr_t strides[N]{}; // Number of elements. - size_t size; + size_t size{}; }; #include "Utils/Container.cpp" diff --git a/lib/Utils/Container.cpp b/lib/Utils/Container.cpp index 24b50366..5b96069d 100644 --- a/lib/Utils/Container.cpp +++ b/lib/Utils/Container.cpp @@ -56,8 +56,10 @@ MemRef::MemRef(intptr_t sizes[N], T init) { setStrides(); size = product(sizes); T *data = new T[size]; + aligned = data; allocated = data; + std::fill(data, data + size, init); } @@ -134,7 +136,7 @@ MemRef::MemRef(const PNGImage &img, intptr_t sizes[N]) { size_t height = img.height; size_t width = img.width; size = channels * height * width; - T *data = new T[size]; + T *data = new T[size]{0}; for (size_t h = 0; h < height; h++) { for (size_t w = 0; w < width; w++) { for (size_t c = 0; c < channels; c++) { @@ -187,6 +189,59 @@ MemRef::MemRef(const std::vector &imgs, intptr_t sizes[N]) { allocated = data; } +// Move Assignment and Move Constructor +template +MemRef::MemRef(MemRef &&other) noexcept + : size(std::move(other.size)), sizes{}, strides{}, + allocated(other.allocated), aligned(other.allocated) { + std::swap(sizes, other.sizes); + std::swap(strides, other.strides); + other.allocated = other.aligned = nullptr; + std::cout << "move ctor" << std::endl; +} + +template +MemRef &MemRef::operator=(MemRef &&rhs) noexcept { + if (this != &rhs) { + // intptr_t sizes[N]{}; + // intptr_t strides[N]{}; + // // method 1 + // allocated = aligned = nullptr; + // std::swap(strides, rhs.strides); + // std::swap(sizes, rhs.sizes); + // aligned = allocated = std::move(rhs.allocated); + // rhs.allocated = rhs.aligned = nullptr; + // rhs.sizes + + // method 2 handle by std::swap + std::swap(strides, rhs.strides); + std::swap(offset, rhs.offset); + std::swap(sizes, rhs.sizes); + std::swap(size, rhs.size); + std::swap(allocated, rhs.allocated); + std::swap(aligned, rhs.aligned); + rhs.allocated = rhs.aligned = nullptr; + std::fill(rhs.strides, rhs.strides + N, 0); + std::fill(rhs.sizes, rhs.sizes + N, 0); + } + // std::cout << "move assign ctor" << std::endl; + return *this; +} + +template +std::ostream &operator<<(std::ostream &os, const MemRef &memref) { + os << "[ "; + size_t size = + std::accumulate(memref.sizes, memref.sizes + N, 1, std::multiplies()); + for (int i = 0; i < size; ++i) + os << memref.allocated[i] << " "; + os << "] of shape: [ "; + for (auto s : memref.sizes) + os << s << " "; + os << "]"; + return os; +} + template MemRef::~MemRef() { delete[] allocated; } From 770d4e76821bb664d64bf9bfdfd35d3ed572de00 Mon Sep 17 00:00:00 2001 From: Joejiong <1004691415@qq.com> Date: Thu, 16 Dec 2021 18:49:44 +0800 Subject: [PATCH 2/5] modify comments --- .../MLIROptBenchmark.cpp | 1 - .../PointwiseConv2DNhwcHwcf.mlir | 46 +++++++++---------- lib/Utils/Container.cpp | 4 -- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp index 47de7c68..d08317fb 100644 --- a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp +++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp @@ -60,7 +60,6 @@ void BM_PointwiseConv2DNhwcHwcf(benchmark::State &state) { MemRef outputMemReturn(sizesOutput, 0.0); void BM_PointwiseConv2DNhwcHwcfReturn(benchmark::State &state) { - for (auto _ : state) { for (int i = 0; i < state.range(0); ++i) { // MemRef outputMemReturn(sizesOutput, 0); diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir index 00fab281..c921bb46 100644 --- a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir +++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir @@ -1,28 +1,28 @@ // Generated from Mobilenet.mlir file -// func @pointwise_conv_2d_nhwc_hwcf(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>, %output: tensor<1x4x5x7xf32>) { -// // %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32> -// linalg.conv_2d_nhwc_hwcf -// {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} -// ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) -// outs(%output : tensor<1x4x5x7xf32>) -// return -// } +func @pointwise_conv_2d_nhwc_hwcf_with_specified_shape(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>, %output: tensor<1x4x5x7xf32>) { + // %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32> + linalg.conv_2d_nhwc_hwcf + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) + outs(%output : tensor<1x4x5x7xf32>) + return +} + +func @pointwise_conv_2d_nhwc_hwcf_with_tensor(%input: tensor, %filter: tensor<1x1x?x?xf32>, %output: tensor) { + linalg.conv_2d_nhwc_hwcf + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins(%input, %filter : tensor, tensor<1x1x?x?xf32>) + outs(%output : tensor) + return +} -// func @pointwise_conv_2d_nhwc_hwcf(%input: tensor, %filter: tensor<1x1x?x?xf32>, %output: tensor) { -// linalg.conv_2d_nhwc_hwcf -// {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} -// ins(%input, %filter : tensor, tensor<1x1x?x?xf32>) -// outs(%output : tensor) -// return -// } -//pointwise_conv_2d_nhwc_hwcf_with_return -// func @pointwise_conv_2d_nhwc_hwcf_with_return(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> { -// %1 = linalg.conv_2d_nhwc_hwcf { -// dilations = dense<1> : tensor<2xi64>, -// strides = dense<1> : tensor<2xi64> -// } ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) outs(%0 : tensor<1x4x5x7xf32>) -> tensor<1x4x5x7xf32> -// return %1 : tensor<1x4x5x7xf32> -// } +func @pointwise_conv_2d_nhwc_hwcf_with_return_origin(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> { + %1 = linalg.conv_2d_nhwc_hwcf { + dilations = dense<1> : tensor<2xi64>, + strides = dense<1> : tensor<2xi64> + } ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) outs(%0 : tensor<1x4x5x7xf32>) -> tensor<1x4x5x7xf32> + return %1 : tensor<1x4x5x7xf32> +} func @pointwise_conv_2d_nhwc_hwcf_with_return(%arg0: tensor<1x4x5x2xf32>, %arg1: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> { %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32> diff --git a/lib/Utils/Container.cpp b/lib/Utils/Container.cpp index 5b96069d..be984299 100644 --- a/lib/Utils/Container.cpp +++ b/lib/Utils/Container.cpp @@ -56,10 +56,8 @@ MemRef::MemRef(intptr_t sizes[N], T init) { setStrides(); size = product(sizes); T *data = new T[size]; - aligned = data; allocated = data; - std::fill(data, data + size, init); } @@ -203,8 +201,6 @@ MemRef::MemRef(MemRef &&other) noexcept template MemRef &MemRef::operator=(MemRef &&rhs) noexcept { if (this != &rhs) { - // intptr_t sizes[N]{}; - // intptr_t strides[N]{}; // // method 1 // allocated = aligned = nullptr; // std::swap(strides, rhs.strides); From d610645cc7bcdc6809bd042baafa3eef8bc29ce9 Mon Sep 17 00:00:00 2001 From: Joejiong <1004691415@qq.com> Date: Mon, 20 Dec 2021 14:50:43 +0800 Subject: [PATCH 3/5] add original mlir benchmark --- .../MLIROptBenchmark.cpp | 73 +++++++++++++------ .../PointwiseConv2DNhwcHwcf.mlir | 47 ++++++------ lib/Utils/Container.cpp | 4 +- 3 files changed, 75 insertions(+), 49 deletions(-) diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp index d08317fb..8fc9cb6f 100644 --- a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp +++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp @@ -22,31 +22,34 @@ #include // kNanosecond, kMicrosecond, kMillisecond, kSecond. -#define UNIT benchmark::kNanosecond +#define UNIT benchmark::kMillisecond namespace { // Declare the mobilenet C interface. extern "C" void -_mlir_ciface_pointwise_conv_2d_nhwc_hwcf(MemRef *input, - MemRef *filter, +_mlir_ciface_pointwise_conv_2d_nhwc_hwcf(const MemRef *input, + const MemRef *filter, MemRef *output); extern "C" MemRef -_mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return(MemRef *input, - MemRef *filter); +_mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return( + const MemRef *input, const MemRef *filter); +extern "C" MemRef +_mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return_origin( + const MemRef *input, const MemRef *filter); intptr_t sizesInput[4] = {1, 4, 5, 2}; intptr_t sizesFilter[4] = {1, 1, 2, 7}; intptr_t sizesOutput[4] = {1, 4, 5, 7}; // Create input, filter, and output. -MemRef inputMemRef(sizesInput, 2.0); -MemRef filterMemRef(sizesFilter, 3.0); +MemRef inputMemRef(sizesInput, 2); +MemRef filterMemRef(sizesFilter, 3); -MemRef inputMemReturn(sizesInput, 2.0); -MemRef filterMemReturn(sizesFilter, 3.0); +MemRef inputMemReturn(sizesInput, 2); +MemRef filterMemReturn(sizesFilter, 3); -MemRef outputMemRef(sizesOutput, 0.0); +MemRef outputMemRef(sizesOutput, 0); // Define benchmark function.void void BM_PointwiseConv2DNhwcHwcf(benchmark::State &state) { for (auto _ : state) { @@ -58,7 +61,7 @@ void BM_PointwiseConv2DNhwcHwcf(benchmark::State &state) { } } -MemRef outputMemReturn(sizesOutput, 0.0); +MemRef outputMemReturn(sizesOutput, 0); void BM_PointwiseConv2DNhwcHwcfReturn(benchmark::State &state) { for (auto _ : state) { for (int i = 0; i < state.range(0); ++i) { @@ -69,15 +72,19 @@ void BM_PointwiseConv2DNhwcHwcfReturn(benchmark::State &state) { } } -} // namespace +MemRef outputMemReturnOrigin(sizesOutput, 0); +void BM_PointwiseConv2DNhwcHwcfReturnOrigin(benchmark::State &state) { + for (auto _ : state) { + for (int i = 0; i < state.range(0); ++i) { + // MemRef outputMemReturn(sizesOutput, 0); + outputMemReturnOrigin = + _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return_origin( + &inputMemReturn, &filterMemReturn); + } + } +} -// Register benchmarking function with different arguments. -BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(1)->Unit(UNIT); -BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(10)->Unit(UNIT); -BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(100)->Unit(UNIT); -BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(1)->Unit(UNIT); -BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(10)->Unit(UNIT); -BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(100)->Unit(UNIT); +} // namespace // Print result function. void printResult() { @@ -89,13 +96,33 @@ void printResult() { std::cout << "inputMemRef: " << inputMemRef << std::endl; std::cout << "filterMemRef: " << filterMemRef << std::endl; - std::cout << "outputMemRef: " << outputMemRef << std::endl; // Clear the output memref. - MemRef outputMemReturn2(sizesOutput, 0); + MemRef outputMemReturn(sizesOutput, 0); // Run the mlir function. - outputMemReturn2 = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return( + outputMemReturn = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return( &inputMemReturn, &filterMemReturn); + std::cout << "inputMemReturn: " << inputMemReturn << std::endl; + std::cout << "filterMemReturn: " << filterMemReturn << std::endl; + std::cout << "outputMemReturn: " << outputMemReturn << std::endl; - std::cout << "outputMemReturn: " << outputMemReturn2 << std::endl; + // Clear the output memref. + MemRef outputMemReturnOrigin(sizesOutput, 0); + // Run the mlir function. + outputMemReturnOrigin = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return( + &inputMemReturn, &filterMemReturn); + std::cout << "inputMemReturn: " << inputMemReturn << std::endl; + std::cout << "filterMemReturn: " << filterMemReturn << std::endl; + std::cout << "outputMemReturnOrigin: " << outputMemReturnOrigin << std::endl; } + +// Register benchmarking function with different arguments. +// BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(1)->Unit(UNIT); +// BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(10)->Unit(UNIT); +BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(100)->Unit(UNIT); +// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(1)->Unit(UNIT); +// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(10)->Unit(UNIT); +BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(100)->Unit(UNIT); +// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(1)->Unit(UNIT); +// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(10)->Unit(UNIT); +BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(100)->Unit(UNIT); diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir index c921bb46..e8eb1372 100644 --- a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir +++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir @@ -1,22 +1,6 @@ // Generated from Mobilenet.mlir file -func @pointwise_conv_2d_nhwc_hwcf_with_specified_shape(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>, %output: tensor<1x4x5x7xf32>) { - // %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32> - linalg.conv_2d_nhwc_hwcf - {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} - ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) - outs(%output : tensor<1x4x5x7xf32>) - return -} - -func @pointwise_conv_2d_nhwc_hwcf_with_tensor(%input: tensor, %filter: tensor<1x1x?x?xf32>, %output: tensor) { - linalg.conv_2d_nhwc_hwcf - {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} - ins(%input, %filter : tensor, tensor<1x1x?x?xf32>) - outs(%output : tensor) - return -} - func @pointwise_conv_2d_nhwc_hwcf_with_return_origin(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> { + %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32> %1 = linalg.conv_2d_nhwc_hwcf { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> @@ -26,13 +10,13 @@ func @pointwise_conv_2d_nhwc_hwcf_with_return_origin(%input: tensor<1x4x5x2xf32> func @pointwise_conv_2d_nhwc_hwcf_with_return(%arg0: tensor<1x4x5x2xf32>, %arg1: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> { %0 = linalg.init_tensor [1, 4, 5, 7] : tensor<1x4x5x7xf32> - %1 = linalg.tensor_collapse_shape %arg0 [[0, 1, 2], [3]] : tensor<1x4x5x2xf32> into tensor<20x2xf32> - %2 = linalg.tensor_collapse_shape %arg1 [[0, 1, 2], [3]] : tensor<1x1x2x7xf32> into tensor<2x7xf32> - %3 = linalg.tensor_collapse_shape %0 [[0, 1, 2], [3]] : tensor<1x4x5x7xf32> into tensor<20x7xf32> + %1 = tensor.collapse_shape %arg0 [[0, 1, 2], [3]] : tensor<1x4x5x2xf32> into tensor<20x2xf32> + %2 = tensor.collapse_shape %arg1 [[0, 1, 2], [3]] : tensor<1x1x2x7xf32> into tensor<2x7xf32> + %3 = tensor.collapse_shape %0 [[0, 1, 2], [3]] : tensor<1x4x5x7xf32> into tensor<20x7xf32> %4 = linalg.matmul ins(%1, %2 : tensor<20x2xf32>, tensor<2x7xf32>) outs(%3 : tensor<20x7xf32>) -> tensor<20x7xf32> - %5 = linalg.tensor_expand_shape %4 [[0, 1, 2], [3]] : tensor<20x7xf32> into tensor<1x4x5x7xf32> + %5 = tensor.expand_shape %4 [[0, 1, 2], [3]] : tensor<20x7xf32> into tensor<1x4x5x7xf32> return %5 : tensor<1x4x5x7xf32> - } +} // generate from iree processed mobilenet mlir file func @pointwise_conv_2d_nhwc_hwcf(%input: memref, %filter: memref<1x1x?x?xf32>, %output: memref) { @@ -41,4 +25,21 @@ func @pointwise_conv_2d_nhwc_hwcf(%input: memref, %filter: memref<1 ins(%input, %filter : memref, memref<1x1x?x?xf32>) outs(%output : memref) return -} \ No newline at end of file +} + +// test for specific shape +// func @pointwise_conv_2d_nhwc_hwcf(%input: memref<1x4x5x2xf32>, %filter: memref<1x1x2x7xf32>, %output: memref<1x4x5x7xf32>) { +// linalg.conv_2d_nhwc_hwcf +// {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} +// ins(%input, %filter : memref<1x4x5x2xf32>, memref<1x1x2x7xf32>) +// outs(%output : memref<1x4x5x7xf32>) +// return +// } + +// func @pointwise_conv_2d_nhwc_hwcf(%output: tensor<1x4x5x7xf32>, %input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) { +// linalg.conv_2d_nhwc_hwcf +// {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} +// ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) +// outs(%output : tensor<1x4x5x7xf32>) +// return +// } diff --git a/lib/Utils/Container.cpp b/lib/Utils/Container.cpp index be984299..d07994ff 100644 --- a/lib/Utils/Container.cpp +++ b/lib/Utils/Container.cpp @@ -195,7 +195,7 @@ MemRef::MemRef(MemRef &&other) noexcept std::swap(sizes, other.sizes); std::swap(strides, other.strides); other.allocated = other.aligned = nullptr; - std::cout << "move ctor" << std::endl; + // std::cout << "move ctor" << std::endl; } template @@ -217,8 +217,6 @@ MemRef &MemRef::operator=(MemRef &&rhs) noexcept { std::swap(allocated, rhs.allocated); std::swap(aligned, rhs.aligned); rhs.allocated = rhs.aligned = nullptr; - std::fill(rhs.strides, rhs.strides + N, 0); - std::fill(rhs.sizes, rhs.sizes + N, 0); } // std::cout << "move assign ctor" << std::endl; return *this; From 39d7e0cdf1e8ccd94868ed546c24a325782619ec Mon Sep 17 00:00:00 2001 From: Joejiong <1004691415@qq.com> Date: Mon, 20 Dec 2021 18:37:39 +0800 Subject: [PATCH 4/5] review ready --- .../MLIROptBenchmark.cpp | 21 ++++++------------ .../PointwiseConv2DNhwcHwcf.mlir | 22 ++++++------------- lib/Utils/Container.cpp | 15 ++----------- 3 files changed, 16 insertions(+), 42 deletions(-) diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp index 8fc9cb6f..d87fd27d 100644 --- a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp +++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/MLIROptBenchmark.cpp @@ -23,6 +23,7 @@ // kNanosecond, kMicrosecond, kMillisecond, kSecond. #define UNIT benchmark::kMillisecond +#define ITERATION 100 namespace { @@ -83,7 +84,10 @@ void BM_PointwiseConv2DNhwcHwcfReturnOrigin(benchmark::State &state) { } } } - +// Register benchmarking function with different arguments. +BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(ITERATION)->Unit(UNIT); +BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(ITERATION)->Unit(UNIT); +BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(ITERATION)->Unit(UNIT); } // namespace // Print result function. @@ -97,16 +101,16 @@ void printResult() { std::cout << "inputMemRef: " << inputMemRef << std::endl; std::cout << "filterMemRef: " << filterMemRef << std::endl; std::cout << "outputMemRef: " << outputMemRef << std::endl; - // Clear the output memref. + MemRef outputMemReturn(sizesOutput, 0); // Run the mlir function. outputMemReturn = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return( &inputMemReturn, &filterMemReturn); + std::cout << "inputMemReturn: " << inputMemReturn << std::endl; std::cout << "filterMemReturn: " << filterMemReturn << std::endl; std::cout << "outputMemReturn: " << outputMemReturn << std::endl; - // Clear the output memref. MemRef outputMemReturnOrigin(sizesOutput, 0); // Run the mlir function. outputMemReturnOrigin = _mlir_ciface_pointwise_conv_2d_nhwc_hwcf_with_return( @@ -115,14 +119,3 @@ void printResult() { std::cout << "filterMemReturn: " << filterMemReturn << std::endl; std::cout << "outputMemReturnOrigin: " << outputMemReturnOrigin << std::endl; } - -// Register benchmarking function with different arguments. -// BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(1)->Unit(UNIT); -// BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(10)->Unit(UNIT); -BENCHMARK(BM_PointwiseConv2DNhwcHwcf)->Arg(100)->Unit(UNIT); -// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(1)->Unit(UNIT); -// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(10)->Unit(UNIT); -BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturn)->Arg(100)->Unit(UNIT); -// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(1)->Unit(UNIT); -// BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(10)->Unit(UNIT); -BENCHMARK(BM_PointwiseConv2DNhwcHwcfReturnOrigin)->Arg(100)->Unit(UNIT); diff --git a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir index e8eb1372..5c173bf3 100644 --- a/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir +++ b/benchmarks/DeepLearning/Ops/PointwiseConv2DNhwcHwcfOp/PointwiseConv2DNhwcHwcf.mlir @@ -28,18 +28,10 @@ func @pointwise_conv_2d_nhwc_hwcf(%input: memref, %filter: memref<1 } // test for specific shape -// func @pointwise_conv_2d_nhwc_hwcf(%input: memref<1x4x5x2xf32>, %filter: memref<1x1x2x7xf32>, %output: memref<1x4x5x7xf32>) { -// linalg.conv_2d_nhwc_hwcf -// {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} -// ins(%input, %filter : memref<1x4x5x2xf32>, memref<1x1x2x7xf32>) -// outs(%output : memref<1x4x5x7xf32>) -// return -// } - -// func @pointwise_conv_2d_nhwc_hwcf(%output: tensor<1x4x5x7xf32>, %input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) { -// linalg.conv_2d_nhwc_hwcf -// {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} -// ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) -// outs(%output : tensor<1x4x5x7xf32>) -// return -// } +func @pointwise_conv_2d_nhwc_hwcf_spec(%input: memref<1x4x5x2xf32>, %filter: memref<1x1x2x7xf32>, %output: memref<1x4x5x7xf32>) { + linalg.conv_2d_nhwc_hwcf + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins(%input, %filter : memref<1x4x5x2xf32>, memref<1x1x2x7xf32>) + outs(%output : memref<1x4x5x7xf32>) + return +} diff --git a/lib/Utils/Container.cpp b/lib/Utils/Container.cpp index d07994ff..4d0539b7 100644 --- a/lib/Utils/Container.cpp +++ b/lib/Utils/Container.cpp @@ -55,7 +55,7 @@ MemRef::MemRef(intptr_t sizes[N], T init) { } setStrides(); size = product(sizes); - T *data = new T[size]; + auto data = new T[size]{}; aligned = data; allocated = data; std::fill(data, data + size, init); @@ -134,7 +134,7 @@ MemRef::MemRef(const PNGImage &img, intptr_t sizes[N]) { size_t height = img.height; size_t width = img.width; size = channels * height * width; - T *data = new T[size]{0}; + T *data = new T[size]; for (size_t h = 0; h < height; h++) { for (size_t w = 0; w < width; w++) { for (size_t c = 0; c < channels; c++) { @@ -195,21 +195,11 @@ MemRef::MemRef(MemRef &&other) noexcept std::swap(sizes, other.sizes); std::swap(strides, other.strides); other.allocated = other.aligned = nullptr; - // std::cout << "move ctor" << std::endl; } template MemRef &MemRef::operator=(MemRef &&rhs) noexcept { if (this != &rhs) { - // // method 1 - // allocated = aligned = nullptr; - // std::swap(strides, rhs.strides); - // std::swap(sizes, rhs.sizes); - // aligned = allocated = std::move(rhs.allocated); - // rhs.allocated = rhs.aligned = nullptr; - // rhs.sizes - - // method 2 handle by std::swap std::swap(strides, rhs.strides); std::swap(offset, rhs.offset); std::swap(sizes, rhs.sizes); @@ -218,7 +208,6 @@ MemRef &MemRef::operator=(MemRef &&rhs) noexcept { std::swap(aligned, rhs.aligned); rhs.allocated = rhs.aligned = nullptr; } - // std::cout << "move assign ctor" << std::endl; return *this; } From 18e39c6f41282879fdd61eecfe8a8341f3eea637 Mon Sep 17 00:00:00 2001 From: Joejiong <1004691415@qq.com> Date: Mon, 20 Dec 2021 18:45:40 +0800 Subject: [PATCH 5/5] ush for review --- lib/Utils/Container.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Utils/Container.cpp b/lib/Utils/Container.cpp index 4d0539b7..685de22b 100644 --- a/lib/Utils/Container.cpp +++ b/lib/Utils/Container.cpp @@ -55,7 +55,7 @@ MemRef::MemRef(intptr_t sizes[N], T init) { } setStrides(); size = product(sizes); - auto data = new T[size]{}; + T* data = new T[size]{}; aligned = data; allocated = data; std::fill(data, data + size, init);