Skip to content

[Midend] DIP Resize Op Acceleration #483

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion examples/DIPDialect/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
set(DIP_LIBS ${JPEG_LIBRARY} ${PNG_LIBRARIES} BuddyLibDIP)
set(DIP_LIBS ${JPEG_LIBRARY} ${PNG_LIBRARIES} BuddyLibDIP omp)
link_directories(${LLVM_LIBS})

if(BUDDY_ENABLE_OPENCV)
find_package(OpenCV REQUIRED CONFIG)
Expand Down
47 changes: 27 additions & 20 deletions frontend/Interfaces/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,23 @@ endif ()
add_custom_command(OUTPUT DIP.o
COMMAND ${CMAKE_BINARY_DIR}/bin/buddy-opt ${CMAKE_CURRENT_SOURCE_DIR}/DIP.mlir
-lower-dip="DIP-strip-mining=${SPLITING_SIZE}"
-arith-expand
-affine-parallelize
-lower-affine
-convert-scf-to-cf
-convert-math-to-llvm
-convert-scf-to-openmp
-convert-vector-to-scf
-convert-vector-to-llvm
-memref-expand
-arith-expand
-convert-arith-to-llvm
-finalize-memref-to-llvm
-convert-scf-to-cf
-convert-openmp-to-llvm
-convert-math-to-llvm
-convert-func-to-llvm
-reconcile-unrealized-casts |
${LLVM_TOOLS_BINARY_DIR}/mlir-translate --mlir-to-llvmir |
${LLVM_TOOLS_BINARY_DIR}/llc
-relocation-model=pic
-mtriple=${BUDDY_TARGET_TRIPLE}
-mattr=${BUDDY_OPT_ATTR}
--filetype=obj
Expand All @@ -47,8 +54,8 @@ SET_TARGET_PROPERTIES(BuddyLibDIP PROPERTIES

add_custom_command(
OUTPUT DAP.o
COMMAND
${CMAKE_BINARY_DIR}/bin/buddy-opt ${CMAKE_CURRENT_SOURCE_DIR}/DAP.mlir
COMMAND
${CMAKE_BINARY_DIR}/bin/buddy-opt ${CMAKE_CURRENT_SOURCE_DIR}/DAP.mlir
-lower-dap="DAP-vector-splitting=${SPLITING_SIZE}"
--convert-linalg-to-affine-loops
-arith-expand
Expand All @@ -59,9 +66,9 @@ add_custom_command(
-finalize-memref-to-llvm
-llvm-request-c-wrappers
-convert-func-to-llvm
-reconcile-unrealized-casts |
-reconcile-unrealized-casts |
${LLVM_TOOLS_BINARY_DIR}/mlir-translate --mlir-to-llvmir |
${LLVM_TOOLS_BINARY_DIR}/llc
${LLVM_TOOLS_BINARY_DIR}/llc
-mtriple=${BUDDY_TARGET_TRIPLE}
-mattr=${BUDDY_OPT_ATTR}
--filetype=obj
Expand All @@ -71,25 +78,25 @@ add_custom_command(

add_custom_command(
OUTPUT DAP-extend.o
COMMAND
${CMAKE_BINARY_DIR}/bin/buddy-opt ${CMAKE_CURRENT_SOURCE_DIR}/DAP-extend.mlir
COMMAND
${CMAKE_BINARY_DIR}/bin/buddy-opt ${CMAKE_CURRENT_SOURCE_DIR}/DAP-extend.mlir
-extend-dap
-one-shot-bufferize
-convert-linalg-to-loops
-convert-scf-to-cf
-expand-strided-metadata
-lower-affine
-convert-vector-to-llvm
-memref-expand
-convert-vector-to-llvm
-memref-expand
-arith-expand
-convert-arith-to-llvm
-finalize-memref-to-llvm
-finalize-memref-to-llvm
-convert-math-to-llvm
-llvm-request-c-wrappers
-convert-func-to-llvm
-reconcile-unrealized-casts |
-reconcile-unrealized-casts |
${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_TOOLS_BINARY_DIR}/llc
${LLVM_TOOLS_BINARY_DIR}/llc
-mtriple=${BUDDY_TARGET_TRIPLE}
-mattr=${BUDDY_OPT_ATTR}
-filetype=obj -relocation-model=pic
Expand All @@ -99,7 +106,7 @@ add_custom_command(

add_custom_command(
OUTPUT DAPVectorization.o
COMMAND
COMMAND
cat ${CMAKE_CURRENT_SOURCE_DIR}/DAP.mlir |
sed -e 's/@buddy_fir/@buddy_fir_vectorization/'
-e 's/@buddy_iir/@buddy_iir_vectorization/'
Expand All @@ -115,19 +122,19 @@ add_custom_command(
-finalize-memref-to-llvm
-llvm-request-c-wrappers
-convert-func-to-llvm
-reconcile-unrealized-casts |
-reconcile-unrealized-casts |
${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_TOOLS_BINARY_DIR}/llc
${LLVM_TOOLS_BINARY_DIR}/llc
-mtriple=${BUDDY_TARGET_TRIPLE}
-mattr=${BUDDY_OPT_ATTR}
-filetype=obj
-o ${CMAKE_CURRENT_BINARY_DIR}/DAPVectorization.o
DEPENDS mlir-translate llc buddy-opt
)

add_library(BuddyLibDAP STATIC
DAP.o
DAP-extend.o
add_library(BuddyLibDAP STATIC
DAP.o
DAP-extend.o
DAPVectorization.o
)

Expand Down
25 changes: 10 additions & 15 deletions midend/include/Utils/DIPUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,11 @@ void fillPixelsBilinearInterpolate4D(

// Helper function for resizing an image using nearest neighbour interpolation
// mechanism.
void NearestNeighbourInterpolationResizing(
OpBuilder &builder, Location loc, MLIRContext *ctx,
SmallVector<Value, 8> lowerBounds, SmallVector<Value, 8> upperBounds,
SmallVector<int64_t, 8> steps, Value strideVal, Value input, Value output,
Value horizontalScalingFactorVec, Value verticalScalingFactorVec,
Value outputRowLastElemF32, Value outputColLastElemF32,
Value inputRowLastElemF32, Value inputColLastElemF32, VectorType vectorTy32,
int64_t stride, Value c0, Value c0F32);
void NearestNeighbourInterpolationResizing(OpBuilder &builder, Location loc,
MLIRContext *ctx, Value input,
Value output, int64_t stride,
Value horizontalScalingFactor,
Value verticalScalingFactor);

// Helper function for resizing 4D an image using nearest neighbour
// interpolation mechanism.
Expand All @@ -200,13 +197,11 @@ void NearestNeighbourInterpolationResizing4D(

// Helper function for resizing an image using bilinear interpolation mechanism.
void BilinearInterpolationResizing(
OpBuilder &builder, Location loc, MLIRContext *ctx,
SmallVector<Value, 8> lowerBounds, SmallVector<Value, 8> upperBounds,
SmallVector<int64_t, 8> steps, Value strideVal, Value input, Value output,
Value horizontalScalingFactorVec, Value verticalScalingFactorVec,
Value outputRowLastElemF32, Value outputColLastElemF32,
Value inputRowLastElemF32, Value inputColLastElemF32, VectorType vectorTy32,
int64_t stride, Value c0, Value c0F32, Value c1F32);
OpBuilder &builder, Location loc, MLIRContext *ctx, Value input,
Value output, int64_t stride, Value horizontalScalingFactor,
Value verticalScalingFactor, Value halfVec, Value shiftVec, Value scaleVec,
VectorType vectorResTy, VectorType vectorTyI32, VectorType vectorTyI16,
VectorType vectorTyIndex, VectorType vectorTyF32, VectorType vectorTyI1);

// Helper function for resizing 4D an image using bilinear interpolation
// mechanism.
Expand Down
105 changes: 32 additions & 73 deletions midend/lib/Conversion/LowerDIP/LowerDIPPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,6 @@ class DIPResize2DOpLowering : public OpRewritePattern<dip::Resize2DOp> {
Value verticalScalingFactor = op->getOperand(2);
Value output = op->getOperand(3);
auto interpolationAttr = op.getInterpolationType();
Value strideVal = rewriter.create<arith::ConstantIndexOp>(loc, stride);

auto inElemTy = input.getType().cast<MemRefType>().getElementType();
dip::DIP_ERROR error =
Expand All @@ -314,85 +313,45 @@ class DIPResize2DOpLowering : public OpRewritePattern<dip::Resize2DOp> {
<< inElemTy << "is passed";
}

Value c0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
Value c1 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
Value c0F32 = indexToF32(rewriter, loc, c0);

Value inputRow = rewriter.create<memref::DimOp>(loc, input, c0);
Value inputCol = rewriter.create<memref::DimOp>(loc, input, c1);

Value outputRow = rewriter.create<memref::DimOp>(loc, output, c0);
Value outputCol = rewriter.create<memref::DimOp>(loc, output, c1);

// Determine lower bound for second call of resize function (this is done
// for efficient tail processing).
Value outputColStrideRatio =
rewriter.create<arith::DivUIOp>(loc, outputCol, strideVal);
Value outputColMultiple =
rewriter.create<arith::MulIOp>(loc, strideVal, outputColStrideRatio);

SmallVector<Value, 8> lowerBounds1{c0, c0};
SmallVector<Value, 8> upperBounds1{outputRow, outputColMultiple};

SmallVector<int64_t, 8> steps{1, stride};
Value strideTailVal =
rewriter.create<arith::SubIOp>(loc, outputCol, outputColMultiple);

SmallVector<Value, 8> lowerBounds2{c0, outputColMultiple};
SmallVector<Value, 8> upperBounds2{outputRow, outputCol};

FloatType f32 = FloatType::getF32(ctx);
VectorType vectorTy32 = VectorType::get({stride}, f32);

Value horizontalScalingFactorVec = rewriter.create<vector::SplatOp>(
loc, vectorTy32, horizontalScalingFactor);
Value verticalScalingFactorVec = rewriter.create<vector::SplatOp>(
loc, vectorTy32, verticalScalingFactor);

// Obtain extreme allocatable value(s) in input and output for bounding
// purpose.
Value inputRowLastElem = rewriter.create<arith::SubIOp>(loc, inputRow, c1);
Value inputRowLastElemF32 = indexToF32(rewriter, loc, inputRowLastElem);

Value inputColLastElem = rewriter.create<arith::SubIOp>(loc, inputCol, c1);
Value inputColLastElemF32 = indexToF32(rewriter, loc, inputColLastElem);

Value outputRowLastElem =
rewriter.create<arith::SubIOp>(loc, outputRow, c1);
Value outputRowLastElemF32 = indexToF32(rewriter, loc, outputRowLastElem);

Value outputColLastElem =
rewriter.create<arith::SubIOp>(loc, outputCol, c1);
Value outputColLastElemF32 = indexToF32(rewriter, loc, outputColLastElem);
VectorType vectorTyI1 = VectorType::get({stride}, rewriter.getI1Type());
VectorType vectorTyI16 =
VectorType::get({stride}, IntegerType::get(rewriter.getContext(), 16));
VectorType vectorTyI32 =
VectorType::get({stride}, IntegerType::get(rewriter.getContext(), 32));
VectorType vectorTyF32 =
VectorType::get({stride}, FloatType::getF32(rewriter.getContext()));
VectorType vectorTyIndex =
VectorType::get({stride}, rewriter.getIndexType());
VectorType vectorResTy = VectorType::get(
{stride}, inElemTy.isF32() ? FloatType::getF32(rewriter.getContext())
: FloatType::getF64(rewriter.getContext()));

static const int SHIFT = 11;
static const int HALF = 1 << (SHIFT - 1);
static const int INTER_RESIZE_COEF_SCALE = 1 << SHIFT;
Value half =
rewriter.create<arith::ConstantIntOp>(loc, HALF, rewriter.getI32Type());
Value halfVec = rewriter.create<vector::SplatOp>(loc, vectorTyI32, half);
Value shift = rewriter.create<arith::ConstantIntOp>(loc, SHIFT,
rewriter.getI32Type());
Value shiftVec = rewriter.create<vector::SplatOp>(loc, vectorTyI32, shift);
Value scaleVec = rewriter.create<vector::SplatOp>(
loc, vectorTyF32,
rewriter.create<arith::ConstantFloatOp>(
loc, (llvm::APFloat)(float)INTER_RESIZE_COEF_SCALE,
rewriter.getF32Type()));

if (interpolationAttr ==
dip::InterpolationType::NearestNeighbourInterpolation) {
dip::NearestNeighbourInterpolationResizing(
rewriter, loc, ctx, lowerBounds1, upperBounds1, steps, strideVal,
input, output, horizontalScalingFactorVec, verticalScalingFactorVec,
outputRowLastElemF32, outputColLastElemF32, inputRowLastElemF32,
inputColLastElemF32, vectorTy32, stride, c0, c0F32);

dip::NearestNeighbourInterpolationResizing(
rewriter, loc, ctx, lowerBounds2, upperBounds2, steps, strideTailVal,
input, output, horizontalScalingFactorVec, verticalScalingFactorVec,
outputRowLastElemF32, outputColLastElemF32, inputRowLastElemF32,
inputColLastElemF32, vectorTy32, stride, c0, c0F32);
rewriter, loc, ctx, input, output, stride, horizontalScalingFactor,
verticalScalingFactor);
} else if (interpolationAttr ==
dip::InterpolationType::BilinearInterpolation) {
Value c1F32 = indexToF32(rewriter, loc, c1);

dip::BilinearInterpolationResizing(
rewriter, loc, ctx, lowerBounds1, upperBounds1, steps, strideVal,
input, output, horizontalScalingFactorVec, verticalScalingFactorVec,
outputRowLastElemF32, outputColLastElemF32, inputRowLastElemF32,
inputColLastElemF32, vectorTy32, stride, c0, c0F32, c1F32);

dip::BilinearInterpolationResizing(
rewriter, loc, ctx, lowerBounds2, upperBounds2, steps, strideTailVal,
input, output, horizontalScalingFactorVec, verticalScalingFactorVec,
outputRowLastElemF32, outputColLastElemF32, inputRowLastElemF32,
inputColLastElemF32, vectorTy32, stride, c0, c0F32, c1F32);
rewriter, loc, ctx, input, output, stride, horizontalScalingFactor,
verticalScalingFactor, halfVec, shiftVec, scaleVec, vectorResTy,
vectorTyI32, vectorTyI16, vectorTyIndex, vectorTyF32, vectorTyI1);
}

// Remove the original resize operation.
Expand Down
Loading