buddy-compiler · hharryz · Mar 3, 2025 · Mar 7, 2025 · Mar 7, 2025 · Mar 7, 2025
diff --git a/examples/DIPDialect/CMakeLists.txt b/examples/DIPDialect/CMakeLists.txt
@@ -1,4 +1,5 @@
-set(DIP_LIBS ${JPEG_LIBRARY} ${PNG_LIBRARIES} BuddyLibDIP)
+set(DIP_LIBS ${JPEG_LIBRARY} ${PNG_LIBRARIES} BuddyLibDIP omp)
+link_directories(${LLVM_LIBS})
 
 if(BUDDY_ENABLE_OPENCV)
   find_package(OpenCV REQUIRED CONFIG)

diff --git a/frontend/Interfaces/lib/CMakeLists.txt b/frontend/Interfaces/lib/CMakeLists.txt
@@ -17,16 +17,23 @@ endif ()
 add_custom_command(OUTPUT DIP.o
         COMMAND ${CMAKE_BINARY_DIR}/bin/buddy-opt ${CMAKE_CURRENT_SOURCE_DIR}/DIP.mlir
         -lower-dip="DIP-strip-mining=${SPLITING_SIZE}"
-        -arith-expand
+        -affine-parallelize
         -lower-affine
-        -convert-scf-to-cf
-        -convert-math-to-llvm
+        -convert-scf-to-openmp
+        -convert-vector-to-scf
         -convert-vector-to-llvm
+        -memref-expand
+        -arith-expand
+        -convert-arith-to-llvm
         -finalize-memref-to-llvm
+        -convert-scf-to-cf
+        -convert-openmp-to-llvm
+        -convert-math-to-llvm
         -convert-func-to-llvm
         -reconcile-unrealized-casts |
         ${LLVM_TOOLS_BINARY_DIR}/mlir-translate --mlir-to-llvmir |
         ${LLVM_TOOLS_BINARY_DIR}/llc
+        -relocation-model=pic
         -mtriple=${BUDDY_TARGET_TRIPLE}
         -mattr=${BUDDY_OPT_ATTR}
         --filetype=obj
@@ -47,8 +54,8 @@ SET_TARGET_PROPERTIES(BuddyLibDIP PROPERTIES
 
 add_custom_command(
   OUTPUT DAP.o
-  COMMAND 
-    ${CMAKE_BINARY_DIR}/bin/buddy-opt ${CMAKE_CURRENT_SOURCE_DIR}/DAP.mlir 
+  COMMAND
+    ${CMAKE_BINARY_DIR}/bin/buddy-opt ${CMAKE_CURRENT_SOURCE_DIR}/DAP.mlir
       -lower-dap="DAP-vector-splitting=${SPLITING_SIZE}"
       --convert-linalg-to-affine-loops
       -arith-expand
@@ -59,9 +66,9 @@ add_custom_command(
       -finalize-memref-to-llvm
       -llvm-request-c-wrappers
       -convert-func-to-llvm
-      -reconcile-unrealized-casts | 
+      -reconcile-unrealized-casts |
     ${LLVM_TOOLS_BINARY_DIR}/mlir-translate --mlir-to-llvmir |
-    ${LLVM_TOOLS_BINARY_DIR}/llc 
+    ${LLVM_TOOLS_BINARY_DIR}/llc
       -mtriple=${BUDDY_TARGET_TRIPLE}
       -mattr=${BUDDY_OPT_ATTR}
       --filetype=obj
@@ -71,25 +78,25 @@ add_custom_command(
 
 add_custom_command(
   OUTPUT DAP-extend.o
-  COMMAND 
-    ${CMAKE_BINARY_DIR}/bin/buddy-opt ${CMAKE_CURRENT_SOURCE_DIR}/DAP-extend.mlir 
+  COMMAND
+    ${CMAKE_BINARY_DIR}/bin/buddy-opt ${CMAKE_CURRENT_SOURCE_DIR}/DAP-extend.mlir
       -extend-dap
       -one-shot-bufferize
       -convert-linalg-to-loops
       -convert-scf-to-cf
       -expand-strided-metadata
       -lower-affine
-      -convert-vector-to-llvm 
-      -memref-expand 
+      -convert-vector-to-llvm
+      -memref-expand
       -arith-expand
       -convert-arith-to-llvm
-      -finalize-memref-to-llvm 
+      -finalize-memref-to-llvm
       -convert-math-to-llvm
       -llvm-request-c-wrappers
       -convert-func-to-llvm
-      -reconcile-unrealized-casts | 
+      -reconcile-unrealized-casts |
     ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
-    ${LLVM_TOOLS_BINARY_DIR}/llc 
+    ${LLVM_TOOLS_BINARY_DIR}/llc
       -mtriple=${BUDDY_TARGET_TRIPLE}
       -mattr=${BUDDY_OPT_ATTR}
       -filetype=obj -relocation-model=pic
@@ -99,7 +106,7 @@ add_custom_command(
 
 add_custom_command(
   OUTPUT DAPVectorization.o
-  COMMAND 
+  COMMAND
     cat ${CMAKE_CURRENT_SOURCE_DIR}/DAP.mlir |
     sed -e 's/@buddy_fir/@buddy_fir_vectorization/'
         -e 's/@buddy_iir/@buddy_iir_vectorization/'
@@ -115,19 +122,19 @@ add_custom_command(
       -finalize-memref-to-llvm
       -llvm-request-c-wrappers
       -convert-func-to-llvm
-      -reconcile-unrealized-casts | 
+      -reconcile-unrealized-casts |
     ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
-    ${LLVM_TOOLS_BINARY_DIR}/llc 
+    ${LLVM_TOOLS_BINARY_DIR}/llc
       -mtriple=${BUDDY_TARGET_TRIPLE}
       -mattr=${BUDDY_OPT_ATTR}
       -filetype=obj
       -o ${CMAKE_CURRENT_BINARY_DIR}/DAPVectorization.o
   DEPENDS mlir-translate llc buddy-opt
 )
 
-add_library(BuddyLibDAP STATIC 
-  DAP.o 
-  DAP-extend.o 
+add_library(BuddyLibDAP STATIC
+  DAP.o
+  DAP-extend.o
   DAPVectorization.o
 )
 

diff --git a/midend/include/Utils/DIPUtils.h b/midend/include/Utils/DIPUtils.h
@@ -178,14 +178,11 @@ void fillPixelsBilinearInterpolate4D(
 
 // Helper function for resizing an image using nearest neighbour interpolation
 // mechanism.
-void NearestNeighbourInterpolationResizing(
-    OpBuilder &builder, Location loc, MLIRContext *ctx,
-    SmallVector<Value, 8> lowerBounds, SmallVector<Value, 8> upperBounds,
-    SmallVector<int64_t, 8> steps, Value strideVal, Value input, Value output,
-    Value horizontalScalingFactorVec, Value verticalScalingFactorVec,
-    Value outputRowLastElemF32, Value outputColLastElemF32,
-    Value inputRowLastElemF32, Value inputColLastElemF32, VectorType vectorTy32,
-    int64_t stride, Value c0, Value c0F32);
+void NearestNeighbourInterpolationResizing(OpBuilder &builder, Location loc,
+                                           MLIRContext *ctx, Value input,
+                                           Value output, int64_t stride,
+                                           Value horizontalScalingFactor,
+                                           Value verticalScalingFactor);
 
 // Helper function for resizing 4D an image using nearest neighbour
 // interpolation mechanism.
@@ -200,13 +197,11 @@ void NearestNeighbourInterpolationResizing4D(
 
 // Helper function for resizing an image using bilinear interpolation mechanism.
 void BilinearInterpolationResizing(
-    OpBuilder &builder, Location loc, MLIRContext *ctx,
-    SmallVector<Value, 8> lowerBounds, SmallVector<Value, 8> upperBounds,
-    SmallVector<int64_t, 8> steps, Value strideVal, Value input, Value output,
-    Value horizontalScalingFactorVec, Value verticalScalingFactorVec,
-    Value outputRowLastElemF32, Value outputColLastElemF32,
-    Value inputRowLastElemF32, Value inputColLastElemF32, VectorType vectorTy32,
-    int64_t stride, Value c0, Value c0F32, Value c1F32);
+    OpBuilder &builder, Location loc, MLIRContext *ctx, Value input,
+    Value output, int64_t stride, Value horizontalScalingFactor,
+    Value verticalScalingFactor, Value halfVec, Value shiftVec, Value scaleVec,
+    VectorType vectorResTy, VectorType vectorTyI32, VectorType vectorTyI16,
+    VectorType vectorTyIndex, VectorType vectorTyF32, VectorType vectorTyI1);
 
 // Helper function for resizing 4D an image using bilinear interpolation
 // mechanism.

diff --git a/midend/lib/Conversion/LowerDIP/LowerDIPPass.cpp b/midend/lib/Conversion/LowerDIP/LowerDIPPass.cpp
@@ -300,7 +300,6 @@ class DIPResize2DOpLowering : public OpRewritePattern<dip::Resize2DOp> {
     Value verticalScalingFactor = op->getOperand(2);
     Value output = op->getOperand(3);
     auto interpolationAttr = op.getInterpolationType();
-    Value strideVal = rewriter.create<arith::ConstantIndexOp>(loc, stride);
 
     auto inElemTy = input.getType().cast<MemRefType>().getElementType();
     dip::DIP_ERROR error =
@@ -314,85 +313,45 @@ class DIPResize2DOpLowering : public OpRewritePattern<dip::Resize2DOp> {
                                << inElemTy << "is passed";
     }
 
-    Value c0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    Value c1 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
-    Value c0F32 = indexToF32(rewriter, loc, c0);
-
-    Value inputRow = rewriter.create<memref::DimOp>(loc, input, c0);
-    Value inputCol = rewriter.create<memref::DimOp>(loc, input, c1);
-
-    Value outputRow = rewriter.create<memref::DimOp>(loc, output, c0);
-    Value outputCol = rewriter.create<memref::DimOp>(loc, output, c1);
-
-    // Determine lower bound for second call of resize function (this is done
-    // for efficient tail processing).
-    Value outputColStrideRatio =
-        rewriter.create<arith::DivUIOp>(loc, outputCol, strideVal);
-    Value outputColMultiple =
-        rewriter.create<arith::MulIOp>(loc, strideVal, outputColStrideRatio);
-
-    SmallVector<Value, 8> lowerBounds1{c0, c0};
-    SmallVector<Value, 8> upperBounds1{outputRow, outputColMultiple};
-
-    SmallVector<int64_t, 8> steps{1, stride};
-    Value strideTailVal =
-        rewriter.create<arith::SubIOp>(loc, outputCol, outputColMultiple);
-
-    SmallVector<Value, 8> lowerBounds2{c0, outputColMultiple};
-    SmallVector<Value, 8> upperBounds2{outputRow, outputCol};
-
-    FloatType f32 = FloatType::getF32(ctx);
-    VectorType vectorTy32 = VectorType::get({stride}, f32);
-
-    Value horizontalScalingFactorVec = rewriter.create<vector::SplatOp>(
-        loc, vectorTy32, horizontalScalingFactor);
-    Value verticalScalingFactorVec = rewriter.create<vector::SplatOp>(
-        loc, vectorTy32, verticalScalingFactor);
-
-    // Obtain extreme allocatable value(s) in input and output for bounding
-    // purpose.
-    Value inputRowLastElem = rewriter.create<arith::SubIOp>(loc, inputRow, c1);
-    Value inputRowLastElemF32 = indexToF32(rewriter, loc, inputRowLastElem);
-
-    Value inputColLastElem = rewriter.create<arith::SubIOp>(loc, inputCol, c1);
-    Value inputColLastElemF32 = indexToF32(rewriter, loc, inputColLastElem);
-
-    Value outputRowLastElem =
-        rewriter.create<arith::SubIOp>(loc, outputRow, c1);
-    Value outputRowLastElemF32 = indexToF32(rewriter, loc, outputRowLastElem);
-
-    Value outputColLastElem =
-        rewriter.create<arith::SubIOp>(loc, outputCol, c1);
-    Value outputColLastElemF32 = indexToF32(rewriter, loc, outputColLastElem);
+    VectorType vectorTyI1 = VectorType::get({stride}, rewriter.getI1Type());
+    VectorType vectorTyI16 =
+        VectorType::get({stride}, IntegerType::get(rewriter.getContext(), 16));
+    VectorType vectorTyI32 =
+        VectorType::get({stride}, IntegerType::get(rewriter.getContext(), 32));
+    VectorType vectorTyF32 =
+        VectorType::get({stride}, FloatType::getF32(rewriter.getContext()));
+    VectorType vectorTyIndex =
+        VectorType::get({stride}, rewriter.getIndexType());
+    VectorType vectorResTy = VectorType::get(
+        {stride}, inElemTy.isF32() ? FloatType::getF32(rewriter.getContext())
+                                   : FloatType::getF64(rewriter.getContext()));
+
+    static const int SHIFT = 11;
+    static const int HALF = 1 << (SHIFT - 1);
+    static const int INTER_RESIZE_COEF_SCALE = 1 << SHIFT;
+    Value half =
+        rewriter.create<arith::ConstantIntOp>(loc, HALF, rewriter.getI32Type());
+    Value halfVec = rewriter.create<vector::SplatOp>(loc, vectorTyI32, half);
+    Value shift = rewriter.create<arith::ConstantIntOp>(loc, SHIFT,
+                                                        rewriter.getI32Type());
+    Value shiftVec = rewriter.create<vector::SplatOp>(loc, vectorTyI32, shift);
+    Value scaleVec = rewriter.create<vector::SplatOp>(
+        loc, vectorTyF32,
+        rewriter.create<arith::ConstantFloatOp>(
+            loc, (llvm::APFloat)(float)INTER_RESIZE_COEF_SCALE,
+            rewriter.getF32Type()));
 
     if (interpolationAttr ==
         dip::InterpolationType::NearestNeighbourInterpolation) {
       dip::NearestNeighbourInterpolationResizing(
-          rewriter, loc, ctx, lowerBounds1, upperBounds1, steps, strideVal,
-          input, output, horizontalScalingFactorVec, verticalScalingFactorVec,
-          outputRowLastElemF32, outputColLastElemF32, inputRowLastElemF32,
-          inputColLastElemF32, vectorTy32, stride, c0, c0F32);
-
-      dip::NearestNeighbourInterpolationResizing(
-          rewriter, loc, ctx, lowerBounds2, upperBounds2, steps, strideTailVal,
-          input, output, horizontalScalingFactorVec, verticalScalingFactorVec,
-          outputRowLastElemF32, outputColLastElemF32, inputRowLastElemF32,
-          inputColLastElemF32, vectorTy32, stride, c0, c0F32);
+          rewriter, loc, ctx, input, output, stride, horizontalScalingFactor,
+          verticalScalingFactor);
     } else if (interpolationAttr ==
                dip::InterpolationType::BilinearInterpolation) {
-      Value c1F32 = indexToF32(rewriter, loc, c1);
-
       dip::BilinearInterpolationResizing(
-          rewriter, loc, ctx, lowerBounds1, upperBounds1, steps, strideVal,
-          input, output, horizontalScalingFactorVec, verticalScalingFactorVec,
-          outputRowLastElemF32, outputColLastElemF32, inputRowLastElemF32,
-          inputColLastElemF32, vectorTy32, stride, c0, c0F32, c1F32);
-
-      dip::BilinearInterpolationResizing(
-          rewriter, loc, ctx, lowerBounds2, upperBounds2, steps, strideTailVal,
-          input, output, horizontalScalingFactorVec, verticalScalingFactorVec,
-          outputRowLastElemF32, outputColLastElemF32, inputRowLastElemF32,
-          inputColLastElemF32, vectorTy32, stride, c0, c0F32, c1F32);
+          rewriter, loc, ctx, input, output, stride, horizontalScalingFactor,
+          verticalScalingFactor, halfVec, shiftVec, scaleVec, vectorResTy,
+          vectorTyI32, vectorTyI16, vectorTyIndex, vectorTyF32, vectorTyI1);
     }
 
     // Remove the original resize operation.