Skip to content

Commit 6028c05

Browse files
authored
use Eigen as a BLAS alternative (LeelaChessZero#858)
* import Eigen 3.3.7
1 parent 4134905 commit 6028c05

File tree

308 files changed

+116520
-5
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

308 files changed

+116520
-5
lines changed

meson.build

+5-1
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,11 @@ if get_option('build_backends')
195195
endif
196196

197197
if get_option('blas')
198-
if get_option('mkl') and mkl_lib.found()
198+
if get_option('eigen')
199+
add_project_arguments('-DUSE_EIGEN', language : 'cpp')
200+
has_blas = true
201+
202+
elif get_option('mkl') and mkl_lib.found()
199203
add_project_arguments('-DUSE_MKL', language : 'cpp')
200204
includes += include_directories(get_option('mkl_include'))
201205
deps += [ mkl_lib ]

meson_options.txt

+5
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ option('mkl',
103103
value: true,
104104
description: 'Enable MKL BLAS support')
105105

106+
option('eigen',
107+
type: 'boolean',
108+
value: false,
109+
description: 'Use EIGEN as a BLAS alternative')
110+
106111
option('accelerate',
107112
type: 'boolean',
108113
value: true,

src/neural/blas/convolution1.cc

+19-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,19 @@
1919
#include "neural/blas/convolution1.h"
2020
#include "neural/blas/blas.h"
2121

22+
#ifdef USE_EIGEN
23+
#include <Eigen/Dense>
24+
#endif
25+
2226
namespace lczero {
27+
#ifdef USE_EIGEN
28+
template <typename T>
29+
using EigenMatrixMap =
30+
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
31+
template <typename T>
32+
using ConstEigenMatrixMap =
33+
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
34+
#endif
2335

2436
void Convolution1::Forward(const size_t batch_size, const size_t input_channels,
2537
const size_t output_channels, const float* input,
@@ -44,7 +56,7 @@ void Convolution1::Forward(const size_t batch_size, const size_t input_channels,
4456

4557
const float* batch_input = input + i * kSquares * input_channels;
4658
float* batch_output = output + i * kSquares * output_channels;
47-
59+
#ifndef USE_EIGEN
4860
cblas_sgemm(CblasRowMajor, // Row major formar
4961
CblasNoTrans, // A not transposed
5062
CblasNoTrans, // B not transposed
@@ -59,6 +71,12 @@ void Convolution1::Forward(const size_t batch_size, const size_t input_channels,
5971
0.0f, // beta
6072
batch_output, // C
6173
kSquares); // ldc, leading rank of B
74+
#else
75+
auto C_mat = EigenMatrixMap<float>(batch_output, kSquares, output_channels);
76+
C_mat.noalias() =
77+
ConstEigenMatrixMap<float>(batch_input, kSquares, input_channels) *
78+
ConstEigenMatrixMap<float>(weights, input_channels, output_channels);
79+
#endif
6280
}
6381
}
6482

src/neural/blas/fully_connected_layer.cc

+37-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,24 @@
2323
#include <cassert>
2424
#include <cmath>
2525

26+
#ifdef USE_EIGEN
27+
#include <Eigen/Dense>
28+
#endif
29+
2630
namespace lczero {
31+
#ifdef USE_EIGEN
32+
template <typename T>
33+
using EigenVectorMap = Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1>>;
34+
template <typename T>
35+
using ConstEigenVectorMap =
36+
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>>;
37+
template <typename T>
38+
using EigenMatrixMap =
39+
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
40+
template <typename T>
41+
using ConstEigenMatrixMap =
42+
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
43+
#endif
2744

2845
void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
2946
const size_t output_size,
@@ -41,11 +58,17 @@ void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
4158
//
4259
// rows output_size output_size input_size
4360
//
44-
61+
#ifndef USE_EIGEN
4562
cblas_sgemv(CblasRowMajor, CblasNoTrans,
4663
// M K
4764
(int)output_size, (int)input_size, 1.0f, weights,
4865
(int)input_size, inputs, 1, 0.0f, outputs, 1);
66+
#else
67+
EigenVectorMap<float> y(outputs, output_size);
68+
y.noalias() = ConstEigenMatrixMap<float>(weights, input_size, output_size)
69+
.transpose() *
70+
ConstEigenVectorMap<float>(inputs, input_size);
71+
#endif
4972
} else {
5073
// more columns, matrix-matrix multiplication
5174
//
@@ -66,7 +89,7 @@ void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
6689
// passing a matrix A[m][n], the value should be m.
6790
// cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
6891
// ldb, beta, C, N);
69-
92+
#ifndef USE_EIGEN
7093
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
7194
(int)output_size, // M
7295
(int)batch_size, // N
@@ -79,6 +102,13 @@ void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
79102
0.0f, // beta
80103
outputs, // C
81104
(int)output_size); // ldc, leading rank of C
105+
#else
106+
auto C_mat = EigenMatrixMap<float>(outputs, output_size, batch_size);
107+
C_mat.noalias() =
108+
ConstEigenMatrixMap<float>(weights, input_size, output_size)
109+
.transpose() *
110+
ConstEigenMatrixMap<float>(inputs, input_size, batch_size);
111+
#endif
82112
}
83113
if (apply_relu) {
84114
for (size_t i = 0; i < batch_size; i++) {
@@ -104,7 +134,12 @@ float FullyConnectedLayer::Forward0D(const size_t size, const float* x,
104134
// float cblas_sdot(const int N, const float *X, const int incX, const float
105135
// *Y,
106136
// const int incY);
137+
#ifndef USE_EIGEN
107138
return cblas_sdot((int)size, x, 1, y, 1);
139+
#else
140+
return ConstEigenVectorMap<float>(x, size)
141+
.dot(ConstEigenVectorMap<float>(y, size));
142+
#endif
108143
}
109144

110145
} // namespace lczero

src/neural/blas/network_blas.cc

+11
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@
3333
#include <cmath>
3434
#include <iostream>
3535

36+
#ifdef USE_EIGEN
37+
#include <Eigen/Core>
38+
#endif
39+
3640
namespace lczero {
3741
namespace {
3842

@@ -339,7 +343,9 @@ void BlasComputation::EncodePlanes(const InputPlanes& sample, float* buffer) {
339343

340344
BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options)
341345
: weights_(file.weights()) {
346+
#ifndef USE_EIGEN
342347
int blas_cores = options.GetOrDefault<int>("blas_cores", 1);
348+
#endif
343349
max_batch_size_ =
344350
static_cast<size_t>(options.GetOrDefault<int>("batch_size", 256));
345351

@@ -378,6 +384,11 @@ BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options)
378384
pol_channels, channels);
379385
}
380386

387+
#ifdef USE_EIGEN
388+
CERR << "Using Eigen version " << EIGEN_WORLD_VERSION << "."
389+
<< EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION;
390+
#endif
391+
381392
#ifdef USE_OPENBLAS
382393
int num_procs = openblas_get_num_procs();
383394
blas_cores = std::min(num_procs, blas_cores);

src/neural/blas/winograd_convolution3.cc

+21-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,19 @@
2929
#include "winograd_transform_ispc.h"
3030
#endif
3131

32+
#ifdef USE_EIGEN
33+
#include <Eigen/Dense>
34+
#endif
35+
3236
namespace lczero {
37+
#ifdef USE_EIGEN
38+
template <typename T>
39+
using EigenMatrixMap =
40+
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
41+
template <typename T>
42+
using ConstEigenMatrixMap =
43+
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
44+
#endif
3345

3446
WinogradConvolution3::WinogradConvolution3(const size_t max_batch_size,
3547
const size_t max_input_layers,
@@ -206,7 +218,7 @@ void WinogradConvolution3::Sgemm(const size_t batch_size, const float* weights,
206218

207219
auto offset_v = b * batch_size * input_channels * kTiles;
208220
auto offset_m = b * batch_size * output_channels * kTiles;
209-
221+
#ifndef USE_EIGEN
210222
cblas_sgemm(CblasColMajor, // Row major format
211223
CblasNoTrans, // A no trans
212224
CblasNoTrans, // B no trans
@@ -220,6 +232,14 @@ void WinogradConvolution3::Sgemm(const size_t batch_size, const float* weights,
220232
(int)input_channels, 0.0f, // ldV
221233
&M_[offset_m], // M
222234
(int)output_channels); // ldM
235+
#else
236+
auto C_mat = EigenMatrixMap<float>(&M_[offset_m], output_channels,
237+
batch_size * kTiles);
238+
C_mat.noalias() = ConstEigenMatrixMap<float>(
239+
&weights[offset_u], output_channels, input_channels) *
240+
ConstEigenMatrixMap<float>(&V_[offset_v], input_channels,
241+
batch_size * kTiles);
242+
#endif
223243
}
224244

225245
#endif

third_party/Eigen/CMakeLists.txt

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
include(RegexUtils)
2+
test_escape_string_as_regex()
3+
4+
file(GLOB Eigen_directory_files "*")
5+
6+
escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
7+
8+
foreach(f ${Eigen_directory_files})
9+
if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/src")
10+
list(APPEND Eigen_directory_files_to_install ${f})
11+
endif()
12+
endforeach(f ${Eigen_directory_files})
13+
14+
install(FILES
15+
${Eigen_directory_files_to_install}
16+
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel
17+
)
18+
19+
install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h")

third_party/Eigen/Cholesky

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// This file is part of Eigen, a lightweight C++ template library
2+
// for linear algebra.
3+
//
4+
// This Source Code Form is subject to the terms of the Mozilla
5+
// Public License v. 2.0. If a copy of the MPL was not distributed
6+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
7+
8+
#ifndef EIGEN_CHOLESKY_MODULE_H
9+
#define EIGEN_CHOLESKY_MODULE_H
10+
11+
#include "Core"
12+
#include "Jacobi"
13+
14+
#include "src/Core/util/DisableStupidWarnings.h"
15+
16+
/** \defgroup Cholesky_Module Cholesky module
17+
*
18+
*
19+
*
20+
* This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices.
21+
* Those decompositions are also accessible via the following methods:
22+
* - MatrixBase::llt()
23+
* - MatrixBase::ldlt()
24+
* - SelfAdjointView::llt()
25+
* - SelfAdjointView::ldlt()
26+
*
27+
* \code
28+
* #include <Eigen/Cholesky>
29+
* \endcode
30+
*/
31+
32+
#include "src/Cholesky/LLT.h"
33+
#include "src/Cholesky/LDLT.h"
34+
#ifdef EIGEN_USE_LAPACKE
35+
#ifdef EIGEN_USE_MKL
36+
#include "mkl_lapacke.h"
37+
#else
38+
#include "src/misc/lapacke.h"
39+
#endif
40+
#include "src/Cholesky/LLT_LAPACKE.h"
41+
#endif
42+
43+
#include "src/Core/util/ReenableStupidWarnings.h"
44+
45+
#endif // EIGEN_CHOLESKY_MODULE_H
46+
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

third_party/Eigen/CholmodSupport

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// This file is part of Eigen, a lightweight C++ template library
2+
// for linear algebra.
3+
//
4+
// This Source Code Form is subject to the terms of the Mozilla
5+
// Public License v. 2.0. If a copy of the MPL was not distributed
6+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
7+
8+
#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H
9+
#define EIGEN_CHOLMODSUPPORT_MODULE_H
10+
11+
#include "SparseCore"
12+
13+
#include "src/Core/util/DisableStupidWarnings.h"
14+
15+
extern "C" {
16+
#include <cholmod.h>
17+
}
18+
19+
/** \ingroup Support_modules
20+
* \defgroup CholmodSupport_Module CholmodSupport module
21+
*
22+
* This module provides an interface to the Cholmod library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
23+
* It provides the two following main factorization classes:
24+
* - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.
25+
* - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).
26+
*
27+
* For the sake of completeness, this module also propose the two following classes:
28+
* - class CholmodSimplicialLLT
29+
* - class CholmodSimplicialLDLT
30+
* Note that these classes does not bring any particular advantage compared to the built-in
31+
* SimplicialLLT and SimplicialLDLT factorization classes.
32+
*
33+
* \code
34+
* #include <Eigen/CholmodSupport>
35+
* \endcode
36+
*
37+
* In order to use this module, the cholmod headers must be accessible from the include paths, and your binary must be linked to the cholmod library and its dependencies.
38+
* The dependencies depend on how cholmod has been compiled.
39+
* For a cmake based project, you can use our FindCholmod.cmake module to help you in this task.
40+
*
41+
*/
42+
43+
#include "src/CholmodSupport/CholmodSupport.h"
44+
45+
#include "src/Core/util/ReenableStupidWarnings.h"
46+
47+
#endif // EIGEN_CHOLMODSUPPORT_MODULE_H
48+

0 commit comments

Comments
 (0)