Skip to content

Commit e6a3cc5

Browse files
committed
Add Halide Conv Layer Benchmark.
1 parent c7a1e30 commit e6a3cc5

9 files changed

+497
-0
lines changed

CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ if(DEFINED IMAGE_PROCESSING_BENCHMARKS OR DEEP_LEARNING_BENCHMARKS OR OP_OPTIMIZ
8989
include_directories(${OpenCV_INCLUDE_DIRS})
9090
endif()
9191

92+
#-------------------------------------------------------------------------------
93+
# Find Halide
94+
#-------------------------------------------------------------------------------
95+
96+
if(DEFINED DEEP_LEARNING_BENCHMARKS)
97+
find_package(Halide REQUIRED)
98+
endif()
99+
92100
#-------------------------------------------------------------------------------
93101
# Find PNG
94102
#-------------------------------------------------------------------------------

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ $ cd bin && ./image-processing-benchmark <image path> <kernel name> <kernelmorph
8282

8383
## Deep Learning Benchmark
8484

85+
Currently, the deep learning benchmark includes the following frameworks or optimizers:
86+
87+
- Halide ([link](https://github.com/halide/Halide/blob/main/README_cmake.md))
88+
89+
*NOTE: Please build Halide 15.0.0 from source to achieve the best performance.*
90+
8591
| CMake Options | Default Value |
8692
| -------------- | ------------- |
8793
| `-DBUDDY_OPT_ATTR` | avx512f |
@@ -97,6 +103,7 @@ $ cmake -G Ninja .. \
97103
-DDEEP_LEARNING_BENCHMARKS=ON \
98104
-DCMAKE_BUILD_TYPE=RELEASE \
99105
-DOpenCV_DIR=$PWD/../thirdparty/opencv/build/ \
106+
-DCMAKE_PREFIX_PATH=$PWD/../thirdparty/Halide/Halide-install/ \
100107
-DBUDDY_MLIR_BUILD_DIR=/PATH/TO/BUDDY-MLIR/BUILD/
101108
$ ninja
102109
```
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
add_subdirectory(Layers)
12
add_subdirectory(Models)
23
add_subdirectory(Ops)
34

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#-------------------------------------------------------------------------------
2+
# Generate Non-Schedule Version Conv Layer Static Library
3+
#-------------------------------------------------------------------------------
4+
5+
add_halide_generator(conv_layer_nonschedule.generator SOURCES conv_layer_generator.cpp)
6+
add_halide_library(conv_layer_nonschedule FROM conv_layer_nonschedule.generator)
7+
8+
#-------------------------------------------------------------------------------
9+
# Generate Auto-Schedule Version Conv Layer Static Library
10+
#-------------------------------------------------------------------------------
11+
12+
add_halide_generator(conv_layer_autoschedule.generator SOURCES conv_layer_generator-autoschedule.cpp)
13+
add_halide_library(conv_layer_autoschedule FROM conv_layer_autoschedule.generator
14+
AUTOSCHEDULER Halide::Mullapudi2016)
15+
16+
#-------------------------------------------------------------------------------
17+
# Generate Manually-Schedule Version Conv Layer Static Library
18+
#-------------------------------------------------------------------------------
19+
20+
add_halide_generator(conv_layer_manually.generator SOURCES conv_layer_generator-manually.cpp)
21+
add_halide_library(conv_layer_manuallyschedule FROM conv_layer_manually.generator)
22+
23+
#-------------------------------------------------------------------------------
24+
# Halide ConvLayer Benchmark Target
25+
#-------------------------------------------------------------------------------
26+
27+
add_executable(halide-convlayer-benchmark
28+
Main.cpp
29+
HalideConvLayerBenchmark.cpp)
30+
31+
add_custom_command(TARGET halide-convlayer-benchmark POST_BUILD
32+
COMMAND ${CMAKE_COMMAND} -E rm ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/*.generator
33+
COMMENT "Cleaning up all generator files")
34+
35+
target_link_libraries(halide-convlayer-benchmark
36+
GoogleBenchmark
37+
Halide::ImageIO
38+
conv_layer_nonschedule
39+
conv_layer_manuallyschedule
40+
conv_layer_autoschedule)
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#include <chrono>
2+
#include <cstdio>
3+
4+
#include "conv_layer_nonschedule.h"
5+
#include "conv_layer_manuallyschedule.h"
6+
#include "conv_layer_autoschedule.h"
7+
#include <benchmark/benchmark.h>
8+
#include "HalideBuffer.h"
9+
10+
using namespace Halide::Runtime;
11+
12+
const int N = 5, CI = 128, CO = 128, W = 100, H = 80;
13+
14+
Buffer<float, 4> input(CI, W + 2, H + 2, N), input1(CI, W + 2, H + 2, N), input2(CI, W + 2, H + 2, N);
15+
Buffer<float, 4> filter(CO, 3, 3, CI), filter1(CO, 3, 3, CI), filter2(CO, 3, 3, CI);
16+
Buffer<float, 1> bias(CO), bias1(CO), bias2(CO);
17+
Buffer<float, 4> output(CO, W, H, N), output1(CO, W, H, N), output2(CO, W, H, N);
18+
19+
void initializeHalideConvLayerBenchmark(char **argv) {
20+
for (int c = 0; c < input.dim(3).extent(); c++) {
21+
for (int z = 0; z < input.channels(); z++) {
22+
for (int y = 0; y < input.height(); y++) {
23+
for (int x = 0; x < input.width(); x++) {
24+
input(x, y, z, c) = rand();
25+
input1(x, y, z, c) = input(x, y, z, c);
26+
input2(x, y, z, c) = input(x, y, z, c);
27+
}
28+
}
29+
}
30+
}
31+
32+
for (int c = 0; c < filter.dim(3).extent(); c++) {
33+
for (int z = 0; z < filter.channels(); z++) {
34+
for (int y = 0; y < filter.height(); y++) {
35+
for (int x = 0; x < filter.width(); x++) {
36+
filter(x, y, z, c) = rand();
37+
filter1(x, y, z, c) = filter(x, y, z, c);
38+
filter2(x, y, z, c) = filter(x, y, z, c);
39+
}
40+
}
41+
}
42+
}
43+
44+
for (int x = 0; x < bias.width(); x++) {
45+
bias(x) = rand();
46+
bias1(x) = bias(x);
47+
bias2(x) = bias(x);
48+
}
49+
50+
#ifdef _WIN32
51+
_putenv_s("HL_CUDA_JIT_MAX_REGISTERS", "256");
52+
#else
53+
setenv("HL_CUDA_JIT_MAX_REGISTERS", "256", 1);
54+
#endif
55+
}
56+
57+
static void Halide_ConvLayer_NonSchedule(benchmark::State &state) {
58+
for (auto _ : state) {
59+
for (int i = 0; i < state.range(0); ++i) {
60+
conv_layer_nonschedule(input, filter, bias, output);
61+
}
62+
}
63+
}
64+
65+
static void Halide_ConvLayer_MaunallySchedule(benchmark::State &state) {
66+
for (auto _ : state) {
67+
for (int i = 0; i < state.range(0); ++i) {
68+
conv_layer_manuallyschedule(input1, filter1, bias1, output1);
69+
}
70+
}
71+
}
72+
73+
static void Halide_ConvLayer_AutoSchedule(benchmark::State &state) {
74+
for (auto _ : state) {
75+
for (int i = 0; i < state.range(0); ++i) {
76+
conv_layer_autoschedule(input2, filter2, bias2, output2);
77+
}
78+
}
79+
}
80+
81+
// Register benchmarking function.
82+
void registerBenchmarkHalideConvLayer() {
83+
BENCHMARK(Halide_ConvLayer_NonSchedule)->Arg(1)->Unit(benchmark::kMillisecond);
84+
BENCHMARK(Halide_ConvLayer_MaunallySchedule)->Arg(1)->Unit(benchmark::kMillisecond);
85+
BENCHMARK(Halide_ConvLayer_AutoSchedule)->Arg(1)->Unit(benchmark::kMillisecond);
86+
}
87+
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//===- Main.cpp -----------------------------------------------------------===//
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
//
17+
// This is the main file of the Halide Conv Layer benchmark.
18+
//
19+
//===----------------------------------------------------------------------===//
20+
21+
#include <benchmark/benchmark.h>
22+
#include <stdexcept>
23+
24+
void initializeHalideConvLayerBenchmark(char **);
25+
26+
void registerBenchmarkHalideConvLayer();
27+
28+
// Run benchmarks.
29+
int main(int argc, char **argv) {
30+
if (argc != 1) {
31+
throw std::invalid_argument(
32+
"No arguments needed.\n");
33+
}
34+
35+
initializeHalideConvLayerBenchmark(argv);
36+
37+
// Register Benchmark Function.
38+
registerBenchmarkHalideConvLayer();
39+
40+
::benchmark::Initialize(&argc, argv);
41+
::benchmark::RunSpecifiedBenchmarks();
42+
43+
// Generate result.
44+
45+
return 0;
46+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#include "Halide.h"
2+
3+
namespace {
4+
5+
using namespace Halide;
6+
7+
class ConvolutionLayer : public Halide::Generator<ConvolutionLayer> {
8+
public:
9+
Input<Buffer<float, 4>> input{"input"};
10+
Input<Buffer<float, 4>> filter{"filter"};
11+
Input<Buffer<float, 1>> bias{"bias"};
12+
Output<Buffer<float, 4>> relu{"relu"};
13+
14+
void generate() {
15+
const int N = 5, CI = 128, CO = 128, W = 100, H = 80;
16+
17+
/* THE ALGORITHM */
18+
19+
Var x("x"), y("y"), c("c"), n("n");
20+
21+
Func conv("conv");
22+
RDom r(0, CI, 0, 3, 0, 3);
23+
24+
conv(c, x, y, n) = bias(c);
25+
conv(c, x, y, n) += filter(c, r.y, r.z, r.x) * input(r.x, x + r.y, y + r.z, n);
26+
27+
relu(c, x, y, n) = max(0, conv(c, x, y, n));
28+
29+
/* THE SCHEDULE */
30+
31+
relu.dim(0).set_bounds(0, CO).set_stride(1);
32+
relu.dim(1).set_bounds(0, W).set_stride(CO);
33+
relu.dim(2).set_bounds(0, H).set_stride(CO * W);
34+
relu.dim(3).set_bounds(0, N).set_stride(CO * H * W);
35+
36+
input.dim(0).set_bounds(0, CI).set_stride(1);
37+
input.dim(1).set_bounds(0, W + 2).set_stride(CI);
38+
input.dim(2).set_bounds(0, H + 2).set_stride(CI * (W + 2));
39+
input.dim(3).set_bounds(0, N).set_stride(CI * (W + 2) * (H + 2));
40+
41+
filter.dim(0).set_bounds(0, CO).set_stride(1);
42+
filter.dim(1).set_bounds(0, 3).set_stride(CO);
43+
filter.dim(2).set_bounds(0, 3).set_stride(CO * 3);
44+
filter.dim(3).set_bounds(0, CI).set_stride(CO * 3 * 3);
45+
46+
bias.dim(0).set_bounds(0, CO).set_stride(1);
47+
48+
if (using_autoscheduler()) {
49+
input.dim(0).set_estimate(0, CI);
50+
input.dim(1).set_estimate(0, W + 2);
51+
input.dim(2).set_estimate(0, H + 2);
52+
input.dim(3).set_estimate(0, N);
53+
54+
filter.dim(0).set_estimate(0, CO);
55+
filter.dim(1).set_estimate(0, 3);
56+
filter.dim(2).set_estimate(0, 3);
57+
filter.dim(3).set_estimate(0, CI);
58+
59+
bias.dim(0).set_estimate(0, CO);
60+
61+
relu.dim(0).set_estimate(0, W);
62+
relu.dim(1).set_estimate(0, H);
63+
relu.dim(2).set_estimate(0, CO);
64+
relu.dim(3).set_estimate(0, N);
65+
}
66+
}
67+
};
68+
69+
} // namespace
70+
71+
HALIDE_REGISTER_GENERATOR(ConvolutionLayer, conv_layer_autoschedule)

0 commit comments

Comments
 (0)