From 13ee748a079b1055a8739a16eed96c1b01c55de1 Mon Sep 17 00:00:00 2001 From: Unknown Date: Mon, 5 Dec 2011 14:52:21 +0700 Subject: [PATCH] Intel(R) MKL support added. * * * License disclaimer changed to BSD license for MKL_support.h * * * Pardiso support fixed, test added. blas/lapack tests fixed: Scalar parameter was added in Cholesky, product_matrix_vector_triangular remaned to triangular_matrix_vector_product. * * * PARDISO test was added physically. --- CMakeLists_mkl.txt | 375 +++++++++++++++ COPYING.BSD | 26 ++ Eigen/Cholesky | 3 + Eigen/Core | 15 +- Eigen/Eigenvalues | 5 + Eigen/LU | 3 + Eigen/PARDISOSupport | 30 ++ Eigen/QR | 4 + Eigen/SVD | 3 + Eigen/src/Cholesky/LLT.h | 20 +- Eigen/src/Cholesky/LLT_MKL.h | 123 +++++ Eigen/src/Core/Assign_MKL.h | 219 +++++++++ .../products/GeneralMatrixMatrixTriangular.h | 12 +- .../GeneralMatrixMatrixTriangular_MKL.h | 144 ++++++ .../Core/products/GeneralMatrixMatrix_MKL.h | 116 +++++ Eigen/src/Core/products/GeneralMatrixVector.h | 8 +- .../Core/products/GeneralMatrixVector_MKL.h | 129 ++++++ Eigen/src/Core/products/Parallelizer.h | 2 +- .../products/SelfadjointMatrixMatrix_MKL.h | 293 ++++++++++++ .../Core/products/SelfadjointMatrixVector.h | 14 +- .../products/SelfadjointMatrixVector_MKL.h | 112 +++++ Eigen/src/Core/products/SelfadjointProduct.h | 2 +- .../Core/products/TriangularMatrixMatrix.h | 14 +- .../products/TriangularMatrixMatrix_MKL.h | 307 +++++++++++++ .../Core/products/TriangularMatrixVector.h | 20 +- .../products/TriangularMatrixVector_MKL.h | 247 ++++++++++ .../products/TriangularSolverMatrix_MKL.h | 153 +++++++ Eigen/src/Core/util/BlasUtil.h | 2 +- Eigen/src/Core/util/MKL_support.h | 84 ++++ Eigen/src/Eigenvalues/ComplexSchur_MKL.h | 90 ++++ Eigen/src/Eigenvalues/RealSchur_MKL.h | 79 ++++ .../Eigenvalues/SelfAdjointEigenSolver_MKL.h | 89 ++++ Eigen/src/LU/PartialPivLU_MKL.h | 80 ++++ Eigen/src/PARDISOSupport/PARDISOSupport.h | 427 ++++++++++++++++++ Eigen/src/QR/ColPivHouseholderQR_MKL.h | 94 ++++ Eigen/src/QR/HouseholderQR_MKL.h | 65 +++ Eigen/src/SVD/JacobiSVD_MKL.h | 88 ++++ blas/level2_impl.h | 24 +- lapack/cholesky.cpp | 4 +- test/CMakeLists.txt | 1 + test/pardiso_support.cpp | 26 ++ 41 files changed, 3494 insertions(+), 58 deletions(-) create mode 100644 CMakeLists_mkl.txt create mode 100644 COPYING.BSD create mode 100644 Eigen/PARDISOSupport create mode 100644 Eigen/src/Cholesky/LLT_MKL.h create mode 100644 Eigen/src/Core/Assign_MKL.h create mode 100644 Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h create mode 100644 Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h create mode 100644 Eigen/src/Core/products/GeneralMatrixVector_MKL.h create mode 100644 Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h create mode 100644 Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h create mode 100644 Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h create mode 100644 Eigen/src/Core/products/TriangularMatrixVector_MKL.h create mode 100644 Eigen/src/Core/products/TriangularSolverMatrix_MKL.h create mode 100644 Eigen/src/Core/util/MKL_support.h create mode 100644 Eigen/src/Eigenvalues/ComplexSchur_MKL.h create mode 100644 Eigen/src/Eigenvalues/RealSchur_MKL.h create mode 100644 Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h create mode 100644 Eigen/src/LU/PartialPivLU_MKL.h create mode 100644 Eigen/src/PARDISOSupport/PARDISOSupport.h create mode 100644 Eigen/src/QR/ColPivHouseholderQR_MKL.h create mode 100644 Eigen/src/QR/HouseholderQR_MKL.h create mode 100644 Eigen/src/SVD/JacobiSVD_MKL.h create mode 100644 test/pardiso_support.cpp diff --git a/CMakeLists_mkl.txt b/CMakeLists_mkl.txt new file mode 100644 index 000000000..bc812088d --- /dev/null +++ b/CMakeLists_mkl.txt @@ -0,0 +1,375 @@ +project(Eigen) + +cmake_minimum_required(VERSION 2.6.2) + +# guard against in-source builds + +if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) + message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ") +endif() + +# guard against bad build-type strings + +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release") +endif() + +string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower) +if( NOT cmake_build_type_tolower STREQUAL "debug" + AND NOT cmake_build_type_tolower STREQUAL "release" + AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo") + message(FATAL_ERROR "Unknown build type \"${CMAKE_BUILD_TYPE}\". Allowed values are Debug, Release, RelWithDebInfo (case-insensitive).") +endif() + + +############################################################################# +# retrieve version infomation # +############################################################################# + +# automatically parse the version number +file(READ "${PROJECT_SOURCE_DIR}/Eigen/src/Core/util/Macros.h" _eigen_version_header) +string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen_world_version_match "${_eigen_version_header}") +set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}") +string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen_major_version_match "${_eigen_version_header}") +set(EIGEN_MAJOR_VERSION "${CMAKE_MATCH_1}") +string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen_minor_version_match "${_eigen_version_header}") +set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}") +set(EIGEN_VERSION_NUMBER ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION}) + +# if the mercurial program is absent, this will leave the EIGEN_HG_CHANGESET string empty, +# but won't stop CMake. +execute_process(COMMAND hg tip -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_HGTIP_OUTPUT) +execute_process(COMMAND hg branch -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_BRANCH_OUTPUT) + +# if this is the default (aka development) branch, extract the mercurial changeset number from the hg tip output... +if(EIGEN_BRANCH_OUTPUT MATCHES "default") +string(REGEX MATCH "^changeset: *[0-9]*:([0-9;a-f]+).*" EIGEN_HG_CHANGESET_MATCH "${EIGEN_HGTIP_OUTPUT}") +set(EIGEN_HG_CHANGESET "${CMAKE_MATCH_1}") +endif(EIGEN_BRANCH_OUTPUT MATCHES "default") +#...and show it next to the version number +if(EIGEN_HG_CHANGESET) + set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER} (mercurial changeset ${EIGEN_HG_CHANGESET})") +else(EIGEN_HG_CHANGESET) + set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER}") +endif(EIGEN_HG_CHANGESET) + + +include(CheckCXXCompilerFlag) + +set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) + +############################################################################# +# find how to link to the standard libraries # +############################################################################# + +find_package(StandardMathLibrary) + +set(MKLROOT "/nfs/ins/home/karturov/mkl/MKLQA/mkl_release/mkl1038_20111114/__release_lnx/mkl") + +set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO ${MKLROOT}/lib/intel64/libmkl_intel_lp64.so ${MKLROOT}/lib/intel64/libmkl_intel_thread.so ${MKLROOT}/lib/intel64/libmkl_core.so /nfs/ins/home/karturov/mkl/mirror/NS/MKLQA/tools/ess/l_compxe_p_12.1/32e/lib/libiomp5.so libpthread.so) + + +if(NOT STANDARD_MATH_LIBRARY_FOUND) + + message(FATAL_ERROR + "Can't link to the standard math library. Please report to the Eigen developers, telling them about your platform.") + +else() + +# if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) +# set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${STANDARD_MATH_LIBRARY}") +# else() +# set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${STANDARD_MATH_LIBRARY}") +# endif() + +endif() + +if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + message(STATUS "Standard libraries to link to explicitly: ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}") +else() + message(STATUS "Standard libraries to link to explicitly: none") +endif() + +option(EIGEN_BUILD_BTL "Build benchmark suite" OFF) +if(NOT WIN32) + option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ON) +endif(NOT WIN32) + +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +option(EIGEN_SPLIT_LARGE_TESTS "Split large tests into smaller executables" ON) + +option(EIGEN_DEFAULT_TO_ROW_MAJOR "Use row-major as default matrix storage order" OFF) +if(EIGEN_DEFAULT_TO_ROW_MAJOR) + add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR") +endif() + +add_definitions("-DEIGEN_MKL") + +add_definitions("-DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS") + +set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320") + +if(CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing") + set(CMAKE_CXX_FLAGS_DEBUG "-g3") + set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2") + + check_cxx_compiler_flag("-Wno-variadic-macros" COMPILER_SUPPORT_WNOVARIADICMACRO) + if(COMPILER_SUPPORT_WNOVARIADICMACRO) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-variadic-macros") + endif() + + check_cxx_compiler_flag("-Wextra" COMPILER_SUPPORT_WEXTRA) + if(COMPILER_SUPPORT_WEXTRA) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra") + endif() + + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic") + + option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF) + if(EIGEN_TEST_SSE2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2") + message(STATUS "Enabling SSE2 in tests/examples") + endif() + + option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF) + if(EIGEN_TEST_SSE3) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") + message(STATUS "Enabling SSE3 in tests/examples") + endif() + + option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF) + if(EIGEN_TEST_SSSE3) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3") + message(STATUS "Enabling SSSE3 in tests/examples") + endif() + + option(EIGEN_TEST_SSE4_1 "Enable/Disable SSE4.1 in tests/examples" OFF) + if(EIGEN_TEST_SSE4_1) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1") + message(STATUS "Enabling SSE4.1 in tests/examples") + endif() + + option(EIGEN_TEST_SSE4_2 "Enable/Disable SSE4.2 in tests/examples" OFF) + if(EIGEN_TEST_SSE4_2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") + message(STATUS "Enabling SSE4.2 in tests/examples") + endif() + + option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF) + if(EIGEN_TEST_ALTIVEC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec") + message(STATUS "Enabling AltiVec in tests/examples") + endif() + + option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF) + if(EIGEN_TEST_NEON) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon -mcpu=cortex-a8") + message(STATUS "Enabling NEON in tests/examples") + endif() + + check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP) + if(COMPILER_SUPPORT_OPENMP) + option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF) + if(EIGEN_TEST_OPENMP) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") + message(STATUS "Enabling OpenMP in tests/examples") + endif() + endif() + +endif(CMAKE_COMPILER_IS_GNUCXX) + +if(MSVC) + # C4127 - conditional expression is constant + # C4714 - marked as __forceinline not inlined (I failed to deactivate it selectively) + # We can disable this warning in the unit tests since it is clear that it occurs + # because we are oftentimes returning objects that have a destructor or may + # throw exceptions - in particular in the unit tests we are throwing extra many + # exceptions to cover indexing errors. + # C4505 - unreferenced local function has been removed (impossible to deactive selectively) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /wd4127 /wd4505 /wd4714") + + # replace all /Wx by /W4 + string(REGEX REPLACE "/W[0-9]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + + check_cxx_compiler_flag("/openmp" COMPILER_SUPPORT_OPENMP) + if(COMPILER_SUPPORT_OPENMP) + option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF) + if(EIGEN_TEST_OPENMP) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp") + message(STATUS "Enabling OpenMP in tests/examples") + endif() + endif() + + option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF) + if(EIGEN_TEST_SSE2) + if(NOT CMAKE_CL_64) + # arch is not supported on 64 bit systems, SSE is enabled automatically. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2") + endif(NOT CMAKE_CL_64) + message(STATUS "Enabling SSE2 in tests/examples") + endif(EIGEN_TEST_SSE2) +endif(MSVC) + +option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF) +option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF) +option(EIGEN_TEST_32BIT "Force generating 32bit code." OFF) + +if(EIGEN_TEST_X87) + set(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION ON) + if(CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpmath=387") + message(STATUS "Forcing use of x87 instructions in tests/examples") + else() + message(STATUS "EIGEN_TEST_X87 ignored on your compiler") + endif() +endif() + +if(EIGEN_TEST_32BIT) + if(CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32") + message(STATUS "Forcing generation of 32-bit code in tests/examples") + else() + message(STATUS "EIGEN_TEST_32BIT ignored on your compiler") + endif() +endif() + +if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) + add_definitions(-DEIGEN_DONT_VECTORIZE=1) + message(STATUS "Disabling vectorization in tests/examples") +endif() + +option(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT "Disable explicit alignment (hence vectorization) in tests/examples" OFF) +if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT) + add_definitions(-DEIGEN_DONT_ALIGN=1) + message(STATUS "Disabling alignment in tests/examples") +endif() + +option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} "${MKLROOT}/include") + +# the user modifiable install path for header files +set(EIGEN_INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR} CACHE PATH "The directory where we install the header files (optional)") + +# set the internal install path for header files which depends on wether the user modifiable +# EIGEN_INCLUDE_INSTALL_DIR has been set by the user or not. +if(EIGEN_INCLUDE_INSTALL_DIR) + set(INCLUDE_INSTALL_DIR + ${EIGEN_INCLUDE_INSTALL_DIR} + CACHE INTERNAL + "The directory where we install the header files (internal)" + ) +else() + set(INCLUDE_INSTALL_DIR + "${CMAKE_INSTALL_PREFIX}/include/eigen3" + CACHE INTERNAL + "The directory where we install the header files (internal)" + ) +endif() + +# similar to set_target_properties but append the property instead of overwriting it +macro(ei_add_target_property target prop value) + + get_target_property(previous ${target} ${prop}) + # if the property wasn't previously set, ${previous} is now "previous-NOTFOUND" which cmake allows catching with plain if() + if(NOT previous) + set(previous "") + endif(NOT previous) + set_target_properties(${target} PROPERTIES ${prop} "${previous} ${value}") +endmacro(ei_add_target_property) + +install(FILES + signature_of_eigen3_matrix_library + DESTINATION ${INCLUDE_INSTALL_DIR} COMPONENT Devel + ) + +if(EIGEN_BUILD_PKGCONFIG) + SET(path_separator ":") + STRING(REPLACE ${path_separator} ";" pkg_config_libdir_search "$ENV{PKG_CONFIG_LIBDIR}") + message(STATUS "searching for 'pkgconfig' directory in PKG_CONFIG_LIBDIR ( $ENV{PKG_CONFIG_LIBDIR} ), ${CMAKE_INSTALL_PREFIX}/share, and ${CMAKE_INSTALL_PREFIX}/lib") + FIND_PATH(pkg_config_libdir pkgconfig ${pkg_config_libdir_search} ${CMAKE_INSTALL_PREFIX}/share ${CMAKE_INSTALL_PREFIX}/lib ${pkg_config_libdir_search}) + if(pkg_config_libdir) + SET(pkg_config_install_dir ${pkg_config_libdir}) + message(STATUS "found ${pkg_config_libdir}/pkgconfig" ) + else(pkg_config_libdir) + SET(pkg_config_install_dir ${CMAKE_INSTALL_PREFIX}/share) + message(STATUS "pkgconfig not found; installing in ${pkg_config_install_dir}" ) + endif(pkg_config_libdir) + + configure_file(eigen3.pc.in eigen3.pc) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc + DESTINATION ${pkg_config_install_dir}/pkgconfig + ) +endif(EIGEN_BUILD_PKGCONFIG) + +add_subdirectory(Eigen) + +add_subdirectory(doc EXCLUDE_FROM_ALL) + +include(EigenConfigureTesting) +# fixme, not sure this line is still needed: +enable_testing() # must be called from the root CMakeLists, see man page + + +if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) + add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest +else() + add_subdirectory(test EXCLUDE_FROM_ALL) +endif() + +if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) + add_subdirectory(blas) + add_subdirectory(lapack) +else() + add_subdirectory(blas EXCLUDE_FROM_ALL) + add_subdirectory(lapack EXCLUDE_FROM_ALL) +endif() + +add_subdirectory(unsupported) + +add_subdirectory(demos EXCLUDE_FROM_ALL) + +# must be after test and unsupported, for configuring buildtests.in +add_subdirectory(scripts EXCLUDE_FROM_ALL) + +# TODO: consider also replacing EIGEN_BUILD_BTL by a custom target "make btl"? +if(EIGEN_BUILD_BTL) + add_subdirectory(bench/btl EXCLUDE_FROM_ALL) +endif(EIGEN_BUILD_BTL) + +ei_testing_print_summary() + +message(STATUS "") +message(STATUS "Configured Eigen ${EIGEN_VERSION_NUMBER}") +message(STATUS "") + +option(EIGEN_FAILTEST "Enable failtests." OFF) +if(EIGEN_FAILTEST) + add_subdirectory(failtest) +endif() + +string(TOLOWER "${CMAKE_GENERATOR}" cmake_generator_tolower) +if(cmake_generator_tolower MATCHES "makefile") + message(STATUS "Some things you can do now:") + message(STATUS "--------------+--------------------------------------------------------------") + message(STATUS "Command | Description") + message(STATUS "--------------+--------------------------------------------------------------") + message(STATUS "make install | Install to ${CMAKE_INSTALL_PREFIX}. To change that:") + message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourpath") + message(STATUS " | Eigen headers will then be installed to:") + message(STATUS " | ${INCLUDE_INSTALL_DIR}") + message(STATUS " | To install Eigen headers to a separate location, do:") + message(STATUS " | cmake . -DEIGEN_INCLUDE_INSTALL_DIR=yourpath") + message(STATUS "make doc | Generate the API documentation, requires Doxygen & LaTeX") + message(STATUS "make check | Build and run the unit-tests. Read this page:") + message(STATUS " | http://eigen.tuxfamily.org/index.php?title=Tests") + message(STATUS "make blas | Build BLAS library (not the same thing as Eigen)") + message(STATUS "--------------+--------------------------------------------------------------") +else() + message(STATUS "To build/run the unit tests, read this page:") + message(STATUS " http://eigen.tuxfamily.org/index.php?title=Tests") +endif() + +message(STATUS "") diff --git a/COPYING.BSD b/COPYING.BSD new file mode 100644 index 000000000..11971ffe2 --- /dev/null +++ b/COPYING.BSD @@ -0,0 +1,26 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ \ No newline at end of file diff --git a/Eigen/Cholesky b/Eigen/Cholesky index 53f7bf911..7b700c700 100644 --- a/Eigen/Cholesky +++ b/Eigen/Cholesky @@ -24,6 +24,9 @@ namespace Eigen { #include "src/misc/Solve.h" #include "src/Cholesky/LLT.h" #include "src/Cholesky/LDLT.h" +#ifdef EIGEN_MKL +#include "src/Cholesky/LLT_MKL.h" +#endif } // namespace Eigen diff --git a/Eigen/Core b/Eigen/Core index 239ee705b..ea2b6ec51 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -336,6 +336,16 @@ using std::ptrdiff_t; #include "src/Core/products/TriangularMatrixVector.h" #include "src/Core/products/TriangularMatrixMatrix.h" #include "src/Core/products/TriangularSolverMatrix.h" +#ifdef EIGEN_MKL +#include "src/Core/products/GeneralMatrixMatrix_MKL.h" +#include "src/Core/products/GeneralMatrixVector_MKL.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h" +#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h" +#include "src/Core/products/SelfadjointMatrixVector_MKL.h" +#include "src/Core/products/TriangularMatrixMatrix_MKL.h" +#include "src/Core/products/TriangularMatrixVector_MKL.h" +#include "src/Core/products/TriangularSolverMatrix_MKL.h" +#endif #include "src/Core/products/TriangularSolverVector.h" #include "src/Core/BandMatrix.h" @@ -352,7 +362,10 @@ using std::ptrdiff_t; #include "src/Core/Product.h" #include "src/Core/CoreEvaluators.h" #include "src/Core/AssignEvaluator.h" -#endif +#endif +#ifdef EIGEN_MKL +#include "src/Core/Assign_MKL.h" +#endif } // namespace Eigen diff --git a/Eigen/Eigenvalues b/Eigen/Eigenvalues index bfcd83dd3..d6de5ab2a 100644 --- a/Eigen/Eigenvalues +++ b/Eigen/Eigenvalues @@ -36,6 +36,11 @@ namespace Eigen { #include "src/Eigenvalues/ComplexSchur.h" #include "src/Eigenvalues/ComplexEigenSolver.h" #include "src/Eigenvalues/MatrixBaseEigenvalues.h" +#ifdef EIGEN_MKL +#include "src/Eigenvalues/RealSchur_MKL.h" +#include "src/Eigenvalues/ComplexSchur_MKL.h" +#include "src/Eigenvalues/SelfAdjointEigenSolver_MKL.h" +#endif } // namespace Eigen diff --git a/Eigen/LU b/Eigen/LU index 226f88ca3..da38c2238 100644 --- a/Eigen/LU +++ b/Eigen/LU @@ -23,6 +23,9 @@ namespace Eigen { #include "src/misc/Image.h" #include "src/LU/FullPivLU.h" #include "src/LU/PartialPivLU.h" +#ifdef EIGEN_MKL +#include "src/LU/PartialPivLU_MKL.h" +#endif #include "src/LU/Determinant.h" #include "src/LU/Inverse.h" diff --git a/Eigen/PARDISOSupport b/Eigen/PARDISOSupport new file mode 100644 index 000000000..a8a07d76d --- /dev/null +++ b/Eigen/PARDISOSupport @@ -0,0 +1,30 @@ +#ifndef EIGEN_PARDISOSUPPORT_MODULE_H +#define EIGEN_PARDISOSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include + +#include + +namespace Eigen { + +/** \ingroup Sparse_modules + * \defgroup PARDISOSupport_Module Intel(R) MKL PARDISO support + * + * + * + * \code + * #include + * \endcode + */ + +#include "src/PARDISOSupport/PARDISOSupport.h" + +} // namespace Eigen + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_PARDISOSUPPORT_MODULE_H diff --git a/Eigen/QR b/Eigen/QR index 97c1788ee..e5c947b49 100644 --- a/Eigen/QR +++ b/Eigen/QR @@ -28,6 +28,10 @@ namespace Eigen { #include "src/QR/HouseholderQR.h" #include "src/QR/FullPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h" +#ifdef EIGEN_MKL +#include "src/QR/HouseholderQR_MKL.h" +#include "src/QR/ColPivHouseholderQR_MKL.h" +#endif #ifdef EIGEN2_SUPPORT #include "src/Eigen2Support/QR.h" diff --git a/Eigen/SVD b/Eigen/SVD index d24471fd7..5b41d4e83 100644 --- a/Eigen/SVD +++ b/Eigen/SVD @@ -24,6 +24,9 @@ namespace Eigen { #include "src/misc/Solve.h" #include "src/SVD/JacobiSVD.h" +#ifdef EIGEN_MKL +#include "src/SVD/JacobiSVD_MKL.h" +#endif #include "src/SVD/UpperBidiagonalization.h" #ifdef EIGEN2_SUPPORT diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index f061cbf42..aef7ae263 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -196,15 +196,15 @@ template class LLT namespace internal { -template struct llt_inplace; +template struct llt_inplace; -template<> struct llt_inplace +template struct llt_inplace { template static typename MatrixType::Index unblocked(MatrixType& mat) { typedef typename MatrixType::Index Index; - typedef typename MatrixType::Scalar Scalar; +// typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; eigen_assert(mat.rows()==mat.cols()); @@ -291,25 +291,25 @@ template<> struct llt_inplace } }; -template<> struct llt_inplace +template struct llt_inplace { template static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat) { Transpose matt(mat); - return llt_inplace::unblocked(matt); + return llt_inplace::unblocked(matt); } template static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat) { Transpose matt(mat); - return llt_inplace::blocked(matt); + return llt_inplace::blocked(matt); } template static void rankUpdate(MatrixType& mat, const VectorType& vec) { Transpose matt(mat); - return llt_inplace::rankUpdate(matt, vec.conjugate()); + return llt_inplace::rankUpdate(matt, vec.conjugate()); } }; @@ -320,7 +320,7 @@ template struct LLT_Traits inline static MatrixL getL(const MatrixType& m) { return m; } inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); } static bool inplace_decomposition(MatrixType& m) - { return llt_inplace::blocked(m)==-1; } + { return llt_inplace::blocked(m)==-1; } }; template struct LLT_Traits @@ -330,7 +330,7 @@ template struct LLT_Traits inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); } inline static MatrixU getU(const MatrixType& m) { return m; } static bool inplace_decomposition(MatrixType& m) - { return llt_inplace::blocked(m)==-1; } + { return llt_inplace::blocked(m)==-1; } }; } // end namespace internal @@ -368,7 +368,7 @@ template void LLT::rankUpdate(const VectorType& v) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType); - internal::llt_inplace::rankUpdate(m_matrix,v); + internal::llt_inplace::rankUpdate(m_matrix,v); } namespace internal { diff --git a/Eigen/src/Cholesky/LLT_MKL.h b/Eigen/src/Cholesky/LLT_MKL.h new file mode 100644 index 000000000..c5ed77d33 --- /dev/null +++ b/Eigen/src/Cholesky/LLT_MKL.h @@ -0,0 +1,123 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * LLt decomposition based on LAPACKE_?potrf function. + ******************************************************************************** +*/ + +#ifndef EIGEN_LLT_MKL_H +#define EIGEN_LLT_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" +#include + +namespace internal { + +template struct mkl_llt; + +#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \ +template<> struct mkl_llt \ +{ \ + template \ + static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \ + { \ + lapack_int matrix_order; \ + lapack_int size, lda, info, StorageOrder; \ + EIGTYPE* a; \ + eigen_assert(m.rows()==m.cols()); \ +/* Set up parameters for ?potrf */ \ + size = m.rows(); \ + StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \ + matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ + a = &(m.coeffRef(0,0)); \ + lda = m.outerStride(); \ +\ + info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \ + info = (info==0) ? Success : NumericalIssue; \ + return info; \ + } \ +}; \ +template<> struct llt_inplace \ +{ \ + template \ + static typename MatrixType::Index blocked(MatrixType& m) \ + { \ + return mkl_llt::potrf(m, 'L'); \ + } \ + template \ + static void rankUpdate(MatrixType& mat, const VectorType& vec) \ + { \ + typedef typename MatrixType::ColXpr ColXpr; \ + typedef typename internal::remove_all::type ColXprCleaned; \ + typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; \ + typedef typename MatrixType::Scalar Scalar; \ + typedef Matrix TempVectorType; \ + typedef typename TempVectorType::SegmentReturnType TempVecSegment; \ +\ + int n = mat.cols(); \ + eigen_assert(mat.rows()==n && vec.size()==n); \ + TempVectorType temp(vec); \ +\ + for(int i=0; i g; \ + g.makeGivens(mat(i,i), -temp(i), &mat(i,i)); \ +\ + int rs = n-i-1; \ + if(rs>0) \ + { \ + ColXprSegment x(mat.col(i).tail(rs)); \ + TempVecSegment y(temp.tail(rs)); \ + apply_rotation_in_the_plane(x, y, g); \ + } \ + } \ + } \ +}; \ +template<> struct llt_inplace \ +{ \ + template \ + static typename MatrixType::Index blocked(MatrixType& m) \ + { \ + return mkl_llt::potrf(m, 'U'); \ + } \ + template \ + static void rankUpdate(MatrixType& mat, const VectorType& vec) \ + { \ + Transpose matt(mat); \ + return llt_inplace::rankUpdate(matt, vec.conjugate()); \ + } \ +}; + +EIGEN_MKL_LLT(double, double, d) +EIGEN_MKL_LLT(float, float, s) +EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z) +EIGEN_MKL_LLT(scomplex, MKL_Complex8, c) + +} + +#endif // EIGEN_LLT_MKL_H diff --git a/Eigen/src/Core/Assign_MKL.h b/Eigen/src/Core/Assign_MKL.h new file mode 100644 index 000000000..141015f1b --- /dev/null +++ b/Eigen/src/Core/Assign_MKL.h @@ -0,0 +1,219 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin() + ******************************************************************************** +*/ + +#ifndef EIGEN_ASSIGN_VML_H +#define EIGEN_ASSIGN_VML_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +template struct vml_call +{ enum { IsSupported = 0 }; }; + +template +class vml_assign_traits +{ + private: + enum { + DstHasDirectAccess = Dst::Flags & DirectAccessBit, + SrcHasDirectAccess = Src::Flags & DirectAccessBit, + + StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime, + + MightEnableVml = vml_call::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess + && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, + MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), + VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, + LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD, + MayEnableVml = MightEnableVml && LargeEnough, + MayLinearize = MayEnableVml && MightLinearize + }; + public: + enum { + Traversal = MayLinearize ? LinearVectorizedTraversal + : MayEnableVml ? InnerVectorizedTraversal + : DefaultTraversal + }; +}; + +template::Traversal > +struct vml_assign_impl + : assign_impl,Traversal,Unrolling,BuiltIn> +{ +}; + +template +struct vml_assign_impl +{ + typedef typename Derived1::Scalar Scalar; + typedef typename Derived1::Index Index; + inline static void run(Derived1& dst, const CwiseUnaryOp& src) + { + // in case we want to (or have to) skip VML at runtime we can call: + // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) { + const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : + &(src.nestedExpression().coeffRef(0, outer)); + Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); + vml_call::run(src.functor(), innerSize, src_ptr, dst_ptr ); + } + } +}; + +template +struct vml_assign_impl +{ + inline static void run(Derived1& dst, const CwiseUnaryOp& src) + { + // in case we want to (or have to) skip VML at runtime we can call: + // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); + vml_call::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() ); + } +}; + +// Macroses + +#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \ + template \ + struct assign_impl, TRAVERSAL, UNROLLING, Specialized> { \ + inline static void run(Derived1 &dst, const Eigen::CwiseUnaryOp &src) { \ + vml_assign_impl::run(dst, src); \ + } \ + }; + +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling) + + +#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) +#define EIGEN_MKL_VML_MODE VML_HA +#else +#define EIGEN_MKL_VML_MODE VML_LA +#endif + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ + template<> struct vml_call< scalar_##EIGENOP##_op > { \ + enum { IsSupported = 1 }; \ + static inline void run( const scalar_##EIGENOP##_op& func, \ + int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ + VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \ + } \ + }; + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ + template<> struct vml_call< scalar_##EIGENOP##_op > { \ + enum { IsSupported = 1 }; \ + static inline void run( const scalar_##EIGENOP##_op& func, \ + int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ + MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ + VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \ + } \ + }; + +#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ + template<> struct vml_call< scalar_##EIGENOP##_op > { \ + enum { IsSupported = 1 }; \ + static inline void run( const scalar_##EIGENOP##_op& func, \ + int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ + EIGENTYPE exponent = func.m_exponent; \ + MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ + VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \ + (VMLTYPE*)dst, &vmlMode); \ + } \ + }; + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) + + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) + + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan) +//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt) + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr) + +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16) + +} // end namespace internal + +#endif // EIGEN_ASSIGN_VML_H diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 5043b64fe..0ed65f9b4 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -42,14 +42,14 @@ struct tribb_kernel; template + int ResStorageOrder, int UpLo, int Version = Specialized> struct general_matrix_matrix_triangular_product; // as usual if the result is row major => we transpose the product template -struct general_matrix_matrix_triangular_product -{ + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> +struct general_matrix_matrix_triangular_product +{ typedef typename scalar_product_traits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha) @@ -63,8 +63,8 @@ struct general_matrix_matrix_triangular_product -struct general_matrix_matrix_triangular_product + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> +struct general_matrix_matrix_triangular_product { typedef typename scalar_product_traits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h new file mode 100644 index 000000000..d86bc0542 --- /dev/null +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h @@ -0,0 +1,144 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Level 3 BLAS SYRK/HERK implementation. + ******************************************************************************** +*/ + +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H +#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +template +struct general_matrix_matrix_rankupdate : + general_matrix_matrix_triangular_product< + Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {}; + + +// try to go to BLAS specialization +#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \ +template \ +struct general_matrix_matrix_triangular_product { \ + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ + const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \ + { \ + if (lhs==rhs) { \ + general_matrix_matrix_rankupdate \ + ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ + } else { \ + general_matrix_matrix_triangular_product \ + ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ + } \ + } \ +}; + +EIGEN_MKL_RANKUPDATE_SPECIALIZE(double) +//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex) +EIGEN_MKL_RANKUPDATE_SPECIALIZE(float) +//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex) + +// SYRK for float/double +#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \ +template \ +struct general_matrix_matrix_rankupdate { \ + enum { \ + IsLower = (UpLo&Lower) == Lower, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \ + }; \ + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ + const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ + { \ + /* typedef Matrix MatrixRhs;*/ \ +\ + MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ + char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \ + MKLTYPE alpha_, beta_; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, EIGTYPE(1)); \ + MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \ + } \ +}; + +// HERK for complex data +#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \ +template \ +struct general_matrix_matrix_rankupdate { \ + enum { \ + IsLower = (UpLo&Lower) == Lower, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \ + }; \ + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ + const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ + { \ + typedef Matrix MatrixType; \ +\ + MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ + char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \ + RTYPE alpha_, beta_; \ + const EIGTYPE* a_ptr; \ +\ +/* Set alpha_ & beta_ */ \ +/* assign_scalar_eig2mkl(alpha_, alpha); */\ +/* assign_scalar_eig2mkl(beta_, EIGTYPE(1));*/ \ + alpha_ = alpha.real(); \ + beta_ = 1.0; \ +/* Copy with conjugation in some cases*/ \ + MatrixType a; \ + if (conjA) { \ + Map > mapA(lhs,n,k,OuterStride<>(lhsStride)); \ + a = mapA.conjugate(); \ + lda = a.outerStride(); \ + a_ptr = a.data(); \ + } else a_ptr=lhs; \ + MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \ + } \ +}; + + +EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk) +EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk) + +//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk) +//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk) + + +} // end namespace internal + +#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h new file mode 100644 index 000000000..9ad589e66 --- /dev/null +++ b/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h @@ -0,0 +1,116 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * General matrix-matrix product functionality based on ?GEMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H +#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +/********************************************************************** +* This file implements general matrix-matrix multiplication using BLAS +* gemm function via partial specialization of +* general_matrix_matrix_product::run(..) method for float, double, +* std::complex and std::complex types +**********************************************************************/ + +// gemm specialization + +#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \ +template< \ + typename Index, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct general_matrix_matrix_product \ +{ \ +static void run(Index rows, Index cols, Index depth, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha, \ + level3_blocking& blocking, \ + GemmParallelInfo* info = 0) \ +{ \ + using std::conj; \ +\ + char transa, transb; \ + MKL_INT m, n, k, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX a_tmp, b_tmp; \ + EIGTYPE myone(1);\ +\ +/* Set transpose options */ \ + transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ + transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ +\ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ + k = (MKL_INT)depth; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)lhsStride; \ + ldb = (MKL_INT)rhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \ + Map > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \ + a_tmp = lhs.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else a = _lhs; \ +\ + if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \ + Map > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \ + b_tmp = rhs.conjugate(); \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } else b = _rhs; \ +\ + MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +}}; + +GEMM_SPECIALIZATION(double, d, double, d) +GEMM_SPECIALIZATION(float, f, float, s) +GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z) +GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c) + +} //end of namespase + +#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index e0e2cbf8f..1481c138d 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -40,8 +40,8 @@ namespace internal { * |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp * |cplx |real |real | optimal case, vectorization possible via real-cplx mul */ -template -struct general_matrix_vector_product +template +struct general_matrix_vector_product { typedef typename scalar_product_traits::ReturnType ResScalar; @@ -296,8 +296,8 @@ EIGEN_DONT_INLINE static void run( * - alpha is always a complex (or converted to a complex) * - no vectorization */ -template -struct general_matrix_vector_product +template +struct general_matrix_vector_product { typedef typename scalar_product_traits::ReturnType ResScalar; diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h new file mode 100644 index 000000000..20194c9fa --- /dev/null +++ b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h @@ -0,0 +1,129 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * General matrix-vector product functionality based on ?GEMV. + ******************************************************************************** +*/ + +#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H +#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +/********************************************************************** +* This file implements general matrix-vector multiplication using BLAS +* gemv function via partial specialization of +* general_matrix_vector_product::run(..) method for float, double, +* std::complex and std::complex types +**********************************************************************/ + +// gemv specialization + +template +struct general_matrix_vector_product_gemv : + general_matrix_vector_product {}; + +#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \ +template \ +struct general_matrix_vector_product { \ +static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const Scalar* lhs, Index lhsStride, \ + const Scalar* rhs, Index rhsIncr, \ + Scalar* res, Index resIncr, Scalar alpha) \ +{ \ + if (ConjugateLhs) { \ + general_matrix_vector_product::run( \ + rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ + } else { \ + general_matrix_vector_product_gemv::run( \ + rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ + } \ +} \ +}; \ +template \ +struct general_matrix_vector_product { \ +static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const Scalar* lhs, Index lhsStride, \ + const Scalar* rhs, Index rhsIncr, \ + Scalar* res, Index resIncr, Scalar alpha) \ +{ \ + general_matrix_vector_product_gemv::run( \ + rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ +} \ +}; \ + +EIGEN_MKL_GEMV_SPECIALIZE(double) +EIGEN_MKL_GEMV_SPECIALIZE(float) +EIGEN_MKL_GEMV_SPECIALIZE(dcomplex) +EIGEN_MKL_GEMV_SPECIALIZE(scomplex) + +#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \ +template \ +struct general_matrix_vector_product_gemv \ +{ \ +typedef Matrix GEMVVector;\ +\ +static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* lhs, Index lhsStride, \ + const EIGTYPE* rhs, Index rhsIncr, \ + EIGTYPE* res, Index resIncr, EIGTYPE alpha) \ +{ \ + MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \ + MKLTYPE alpha_, beta_; \ + const EIGTYPE *x_ptr, myone(1); \ + char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \ + if (LhsStorageOrder==RowMajor) { \ + m=cols; \ + n=rows; \ + }\ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ + GEMVVector x_tmp; \ + if (ConjugateRhs) { \ + Map > map_x(rhs,cols,1,InnerStride<>(incx)); \ + x_tmp=map_x.conjugate(); \ + x_ptr=x_tmp.data(); \ + incx=1; \ + } else x_ptr=rhs; \ + MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ +}\ +}; + +EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d) +EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s) +EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z) +EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c) + +} //end of namespase + +#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index ecdedc363..d002f6d2e 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -85,7 +85,7 @@ template struct GemmParallelInfo template void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose) { -#ifndef EIGEN_HAS_OPENMP +#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_MKL) // FIXME the transpose variable is only needed to properly split // the matrix product when multithreading is enabled. This is a temporary // fix to support row-major destination matrices. This whole diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h new file mode 100644 index 000000000..338a57a55 --- /dev/null +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h @@ -0,0 +1,293 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H +#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + + +/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ + +#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct product_selfadjoint_matrix \ +{\ +\ + static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='L', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + EIGTYPE myone(1);\ +\ +/* Set transpose options */ \ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)lhsStride; \ + ldb = (MKL_INT)rhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (LhsStorageOrder==RowMajor) uplo='U'; \ + a = _lhs; \ +\ + if (RhsStorageOrder==RowMajor) { \ + Map > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = rhs.adjoint(); \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } else b = _rhs; \ +\ + MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +\ + } \ +}; + + +#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct product_selfadjoint_matrix \ +{\ + static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='L', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + Matrix a_tmp; \ + EIGTYPE myone(1); \ +\ +/* Set transpose options */ \ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)lhsStride; \ + ldb = (MKL_INT)rhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \ + Map, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \ + a_tmp = lhs.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else a = _lhs; \ + if (LhsStorageOrder==RowMajor) uplo='U'; \ +\ + if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \ + b = _rhs; } \ + else { \ + if (RhsStorageOrder==ColMajor && ConjugateRhs) { \ + Map > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \ + b_tmp = rhs.conjugate(); \ + } else \ + if (ConjugateRhs) { \ + Map > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = rhs.adjoint(); \ + } else { \ + Map > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = rhs.transpose(); \ + } \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } \ +\ + MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +\ + } \ +}; + +EIGEN_MKL_SYMM_L(double, double, d, d) +EIGEN_MKL_SYMM_L(float, float, f, s) +EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c) + + +/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ + +#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct product_selfadjoint_matrix \ +{\ +\ + static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='R', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + EIGTYPE myone(1);\ +\ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)rhsStride; \ + ldb = (MKL_INT)lhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (RhsStorageOrder==RowMajor) uplo='U'; \ + a = _rhs; \ +\ + if (LhsStorageOrder==RowMajor) { \ + Map > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = lhs.adjoint(); \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } else b = _lhs; \ +\ + MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +\ + } \ +}; + + +#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct product_selfadjoint_matrix \ +{\ + static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='R', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + Matrix a_tmp; \ + EIGTYPE myone(1); \ +\ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)rhsStride; \ + ldb = (MKL_INT)lhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \ + Map, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \ + a_tmp = rhs.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else a = _rhs; \ + if (RhsStorageOrder==RowMajor) uplo='U'; \ +\ + if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \ + b = _lhs; } \ + else { \ + if (LhsStorageOrder==ColMajor && ConjugateLhs) { \ + Map > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \ + b_tmp = lhs.conjugate(); \ + } else \ + if (ConjugateLhs) { \ + Map > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \ + b_tmp = lhs.adjoint(); \ + } else { \ + Map > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \ + b_tmp = lhs.transpose(); \ + } \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } \ +\ + MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ + } \ +}; + +EIGEN_MKL_SYMM_R(double, double, d, d) +EIGEN_MKL_SYMM_R(float, float, f, s) +EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c) + +} // end namespace internal + +#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index d6121fc07..14419e168 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -32,8 +32,15 @@ namespace internal { * the number of load/stores of the result by a factor 2 and to reduce * the instruction dependency. */ -template -static EIGEN_DONT_INLINE void product_selfadjoint_vector( + +template +struct selfadjoint_matrix_vector_product; + +template +struct selfadjoint_matrix_vector_product + +{ +static EIGEN_DONT_INLINE void run( Index size, const Scalar* lhs, Index lhsStride, const Scalar* _rhs, Index rhsIncr, @@ -159,6 +166,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector( res[j] += alpha * t2; } } +}; } // end namespace internal @@ -232,7 +240,7 @@ struct SelfadjointProductMatrix } - internal::product_selfadjoint_vector::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)> + internal::selfadjoint_matrix_vector_product::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run ( lhs.rows(), // size &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h new file mode 100644 index 000000000..37d20d581 --- /dev/null +++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h @@ -0,0 +1,112 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV. + ******************************************************************************** +*/ + +#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H +#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +/********************************************************************** +* This file implements selfadjoint matrix-vector multiplication using BLAS +**********************************************************************/ + +// symv/hemv specialization + +template +struct selfadjoint_matrix_vector_product_symv : + selfadjoint_matrix_vector_product {}; + +#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \ +template \ +struct selfadjoint_matrix_vector_product { \ +static EIGEN_DONT_INLINE void run( \ + Index size, const Scalar* lhs, Index lhsStride, \ + const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \ + enum {\ + IsColMajor = StorageOrder==ColMajor \ + }; \ + if (IsColMajor == ConjugateLhs) {\ + selfadjoint_matrix_vector_product::run( \ + size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + } else {\ + selfadjoint_matrix_vector_product_symv::run( \ + size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + }\ + } \ +}; \ + +EIGEN_MKL_SYMV_SPECIALIZE(double) +EIGEN_MKL_SYMV_SPECIALIZE(float) +EIGEN_MKL_SYMV_SPECIALIZE(dcomplex) +EIGEN_MKL_SYMV_SPECIALIZE(scomplex) + +#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \ +template \ +struct selfadjoint_matrix_vector_product_symv \ +{ \ +typedef Matrix SYMVVector;\ +\ +static EIGEN_DONT_INLINE void run( \ +Index size, const EIGTYPE* lhs, Index lhsStride, \ +const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ +{ \ + enum {\ + IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \ + IsLower = UpLo == Lower ? 1 : 0, \ + }; \ + MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \ + MKLTYPE alpha_, beta_; \ + const EIGTYPE *x_ptr, myone(1); \ + char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ + SYMVVector x_tmp; \ + if (ConjugateRhs) { \ + Map > map_x(_rhs,size,1,InnerStride<>(incx)); \ + x_tmp=map_x.conjugate(); \ + x_ptr=x_tmp.data(); \ + incx=1; \ + } else x_ptr=_rhs; \ + MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ +}\ +}; + +EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv) +EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv) +EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv) +EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv) + +} //end of namespase + +#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index 3a4523fa4..1def96719 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -110,7 +110,7 @@ struct selfadjoint_product_selector Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived()); enum { IsRowMajor = (internal::traits::Flags&RowMajorBit) ? 1 : 0 }; - + internal::general_matrix_matrix_triangular_product::IsComplex, Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits::IsComplex, diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index e31727eb2..7632ba85a 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -58,16 +58,16 @@ template + int ResStorageOrder, int Version = Specialized> struct product_triangular_matrix_matrix; template + int RhsStorageOrder, bool ConjugateRhs, int Version> struct product_triangular_matrix_matrix + RhsStorageOrder,ConjugateRhs,RowMajor,Version> { static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, @@ -91,10 +91,10 @@ struct product_triangular_matrix_matrix + int RhsStorageOrder, bool ConjugateRhs, int Version> struct product_triangular_matrix_matrix + RhsStorageOrder,ConjugateRhs,ColMajor,Version> { typedef gebp_traits Traits; @@ -220,10 +220,10 @@ struct product_triangular_matrix_matrix + int RhsStorageOrder, bool ConjugateRhs, int Version> struct product_triangular_matrix_matrix + RhsStorageOrder,ConjugateRhs,ColMajor,Version> { typedef gebp_traits Traits; enum { diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h new file mode 100644 index 000000000..5c0a07739 --- /dev/null +++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h @@ -0,0 +1,307 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Triangular matrix * matrix product functionality based on ?TRMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H +#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + + +template +struct product_triangular_matrix_matrix_trmm : + product_triangular_matrix_matrix {}; + + +// try to go to BLAS specialization +#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \ +template \ +struct product_triangular_matrix_matrix { \ + inline static void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\ + const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { \ + product_triangular_matrix_matrix_trmm::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \ + } \ +}; + +EIGEN_MKL_TRMM_SPECIALIZE(double, true) +EIGEN_MKL_TRMM_SPECIALIZE(double, false) +EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true) +EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false) +EIGEN_MKL_TRMM_SPECIALIZE(float, true) +EIGEN_MKL_TRMM_SPECIALIZE(float, false) +EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true) +EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false) + +// implements col-major += alpha * op(triangular) * op(general) +#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct product_triangular_matrix_matrix_trmm \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \ + }; \ +\ + static EIGEN_DONT_INLINE void run( \ + Index _rows, Index _cols, Index _depth, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + Index diagSize = (std::min)(_rows,_depth); \ + Index rows = IsLower ? _rows : diagSize; \ + Index depth = IsLower ? diagSize : _depth; \ + Index cols = _cols; \ +\ + typedef Matrix MatrixLhs; \ + typedef Matrix MatrixRhs; \ +\ +/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ + if (rows != depth) { \ +\ + int nthr = mkl_domain_get_max_threads(MKL_BLAS); \ +\ + if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \ + /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ + product_triangular_matrix_matrix::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \ + /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \ + } else { \ + /* Make sense to call GEMM */ \ + Map > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \ + MatrixLhs aa_tmp=lhsMap.template triangularView(); \ + MKL_INT aStride = aa_tmp.outerStride(); \ + gemm_blocking_space blocking(_rows,_cols,_depth); \ + general_matrix_matrix_product::run( \ + rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ +\ + /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ + } \ + return; \ + } \ + char side = 'L', transa, uplo, diag = 'N'; \ + EIGTYPE *b; \ + const EIGTYPE *a; \ + MKL_INT m, n, k, lda, ldb, ldc; \ + MKLTYPE alpha_; \ +\ +/* Set alpha_*/ \ + assign_scalar_eig2mkl(alpha_, alpha); \ +\ +/* Set m, n */ \ + m = (MKL_INT)diagSize; \ + n = (MKL_INT)cols; \ +\ +/* Set trans */ \ + transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ +\ +/* Set b, ldb */ \ + Map > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \ + MatrixX##EIGPREFIX b_tmp; \ +\ + if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ +\ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + Map > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ + MatrixLhs a_tmp; \ +\ + if ((conjA!=0) || (SetDiag==0)) { \ + if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \ + if (IsZeroDiag) \ + a_tmp.diagonal().setZero(); \ + else if (IsUnitDiag) \ + a_tmp.diagonal().setOnes();\ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _lhs; \ + lda = lhsStride; \ + } \ + /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \ +/* call ?trmm*/ \ + MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ +\ +/* Add op(a_triangular)*b into res*/ \ + Map > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ + res_tmp=res_tmp+b_tmp; \ + } \ +}; + +EIGEN_MKL_TRMM_L(double, double, d, d) +EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_TRMM_L(float, float, f, s) +EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c) + +// implements col-major += alpha * op(general) * op(triangular) +#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct product_triangular_matrix_matrix_trmm \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \ + }; \ +\ + static EIGEN_DONT_INLINE void run( \ + Index _rows, Index _cols, Index _depth, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + Index diagSize = (std::min)(_cols,_depth); \ + Index rows = _rows; \ + Index depth = IsLower ? _depth : diagSize; \ + Index cols = IsLower ? diagSize : _cols; \ +\ + typedef Matrix MatrixLhs; \ + typedef Matrix MatrixRhs; \ +\ +/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ + if (cols != depth) { \ +\ + int nthr = mkl_domain_get_max_threads(MKL_BLAS); \ +\ + if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \ + /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ + product_triangular_matrix_matrix::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \ + /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \ + } else { \ + /* Make sense to call GEMM */ \ + Map > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \ + MatrixRhs aa_tmp=rhsMap.template triangularView(); \ + MKL_INT aStride = aa_tmp.outerStride(); \ + gemm_blocking_space blocking(_rows,_cols,_depth); \ + general_matrix_matrix_product::run( \ + rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, blocking); \ +\ + /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ + } \ + return; \ + } \ + char side = 'R', transa, uplo, diag = 'N'; \ + EIGTYPE *b; \ + const EIGTYPE *a; \ + MKL_INT m, n, k, lda, ldb, ldc; \ + MKLTYPE alpha_; \ +\ +/* Set alpha_*/ \ + assign_scalar_eig2mkl(alpha_, alpha); \ +\ +/* Set m, n */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)diagSize; \ +\ +/* Set trans */ \ + transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ +\ +/* Set b, ldb */ \ + Map > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ + MatrixX##EIGPREFIX b_tmp; \ +\ + if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ +\ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + Map > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \ + MatrixRhs a_tmp; \ +\ + if ((conjA!=0) || (SetDiag==0)) { \ + if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \ + if (IsZeroDiag) \ + a_tmp.diagonal().setZero(); \ + else if (IsUnitDiag) \ + a_tmp.diagonal().setOnes();\ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _rhs; \ + lda = rhsStride; \ + } \ + /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \ +/* call ?trmm*/ \ + MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ +\ +/* Add op(a_triangular)*b into res*/ \ + Map > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ + res_tmp=res_tmp+b_tmp; \ + } \ +}; + +EIGEN_MKL_TRMM_R(double, double, d, d) +EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_TRMM_R(float, float, f, s) +EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c) + +} // end namespace internal + +#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index 7bfff504c..ebda994f6 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -27,11 +27,11 @@ namespace internal { -template -struct product_triangular_matrix_vector; +template +struct triangular_matrix_vector_product; -template -struct product_triangular_matrix_vector +template +struct triangular_matrix_vector_product { typedef typename scalar_product_traits::ReturnType ResScalar; enum { @@ -75,7 +75,7 @@ struct product_triangular_matrix_vector0) { Index s = IsLower ? pi+actualPanelWidth : 0; - general_matrix_vector_product::run( + general_matrix_vector_product::run( r, actualPanelWidth, &lhs.coeffRef(s,pi), lhsStride, &rhs.coeffRef(pi), rhsIncr, @@ -93,8 +93,8 @@ struct product_triangular_matrix_vector -struct product_triangular_matrix_vector +template +struct triangular_matrix_vector_product { typedef typename scalar_product_traits::ReturnType ResScalar; enum { @@ -138,7 +138,7 @@ struct product_triangular_matrix_vector0) { Index s = IsLower ? 0 : pi + actualPanelWidth; - general_matrix_vector_product::run( + general_matrix_vector_product::run( actualPanelWidth, r, &lhs.coeffRef(pi,s), lhsStride, &rhs.coeffRef(s), rhsIncr, @@ -271,7 +271,7 @@ template<> struct trmv_selector MappedDest(actualDestPtr, dest.size()) = dest; } - internal::product_triangular_matrix_vector + internal::triangular_matrix_vector_product struct trmv_selector Map(actualRhsPtr, actualRhs.size()) = actualRhs; } - internal::product_triangular_matrix_vector + internal::triangular_matrix_vector_product +struct triangular_matrix_vector_product_trmv : + triangular_matrix_vector_product {}; + +#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \ +template \ +struct triangular_matrix_vector_product { \ + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ + const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ + triangular_matrix_vector_product_trmv::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + } \ +}; \ +template \ +struct triangular_matrix_vector_product { \ + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ + const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ + triangular_matrix_vector_product_trmv::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + } \ +}; + +EIGEN_MKL_TRMV_SPECIALIZE(double) +EIGEN_MKL_TRMV_SPECIALIZE(float) +EIGEN_MKL_TRMV_SPECIALIZE(dcomplex) +EIGEN_MKL_TRMV_SPECIALIZE(scomplex) + +// implements col-major: res += alpha * op(triangular) * vector +#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct triangular_matrix_vector_product_trmv { \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper \ + }; \ + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \ + { \ + if (ConjLhs || IsZeroDiag) { \ + triangular_matrix_vector_product::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + return; \ + }\ + Index size = (std::min)(_rows,_cols); \ + Index rows = IsLower ? _rows : size; \ + Index cols = IsLower ? size : _cols; \ +\ + typedef VectorX##EIGPREFIX VectorRhs; \ + EIGTYPE *x, *y;\ +\ +/* Set x*/ \ + Map > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \ + VectorRhs x_tmp; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ +\ +/* Square part handling */\ +\ + char trans, uplo, diag; \ + MKL_INT m, n, k, lda, incx, incy; \ + EIGTYPE const *a; \ + MKLTYPE alpha_, beta_; \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, EIGTYPE(1)); \ +\ +/* Set m, n */ \ + n = (MKL_INT)size; \ + lda = lhsStride; \ + incx = 1; \ + incy = resIncr; \ +\ +/* Set uplo, trans and diag*/ \ + trans = 'N'; \ + uplo = IsLower ? 'L' : 'U'; \ + diag = IsUnitDiag ? 'U' : 'N'; \ +\ +/* call ?TRMV*/ \ + std::cout << "TRMV: CM\n";\ + MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ +\ +/* Add op(a_tr)rhs into res*/ \ + MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ +/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ + if (size<(std::max)(rows,cols)) { \ + typedef Matrix MatrixLhs; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ + if (size \ +struct triangular_matrix_vector_product_trmv { \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper \ + }; \ + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \ + { \ + if (IsZeroDiag) { \ + triangular_matrix_vector_product::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + return; \ + }\ + Index size = (std::min)(_rows,_cols); \ + Index rows = IsLower ? _rows : size; \ + Index cols = IsLower ? size : _cols; \ +\ + typedef VectorX##EIGPREFIX VectorRhs; \ + EIGTYPE *x, *y;\ +\ +/* Set x*/ \ + Map > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \ + VectorRhs x_tmp; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ +\ +/* Square part handling */\ +\ + char trans, uplo, diag; \ + MKL_INT m, n, k, lda, incx, incy; \ + EIGTYPE const *a; \ + MKLTYPE alpha_, beta_; \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, EIGTYPE(1)); \ +\ +/* Set m, n */ \ + n = (MKL_INT)size; \ + lda = lhsStride; \ + incx = 1; \ + incy = resIncr; \ +\ +/* Set uplo, trans and diag*/ \ + trans = ConjLhs ? 'C' : 'T'; \ + uplo = IsLower ? 'U' : 'L'; \ + diag = IsUnitDiag ? 'U' : 'N'; \ +\ +/* call ?TRMV*/ \ + std::cout << "TRMV: RM\n";\ + MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ +\ +/* Add op(a_tr)rhs into res*/ \ + MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ +/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ + if (size<(std::max)(rows,cols)) { \ + typedef Matrix MatrixLhs; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ + if (size \ +struct triangular_solve_matrix \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \ + }; \ + static EIGEN_DONT_INLINE void run( \ + Index size, Index otherSize, \ + const EIGTYPE* _tri, Index triStride, \ + EIGTYPE* _other, Index otherStride) \ + { \ + MKL_INT m = size, n = otherSize, lda, ldb; \ + char side = 'L', uplo, diag='N', transa; \ + /* Set alpha_ */ \ + MKLTYPE alpha; \ + EIGTYPE myone(1); \ + assign_scalar_eig2mkl(alpha, myone); \ + ldb = otherStride;\ +\ + const EIGTYPE *a; \ +/* Set trans */ \ + transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + typedef Matrix MatrixTri; \ + Map > tri(_tri,size,size,OuterStride<>(triStride)); \ + MatrixTri a_tmp; \ +\ + if (conjA) { \ + a_tmp = tri.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _tri; \ + lda = triStride; \ + } \ + if (IsUnitDiag) diag='U'; \ +/* call ?trsm*/ \ + MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ + } \ +}; + +EIGEN_MKL_TRSM_L(double, double, d) +EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z) +EIGEN_MKL_TRSM_L(float, float, s) +EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c) + + +// implements RightSide general * op(triangular)^-1 +#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \ +template \ +struct triangular_solve_matrix \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \ + }; \ + static EIGEN_DONT_INLINE void run( \ + Index size, Index otherSize, \ + const EIGTYPE* _tri, Index triStride, \ + EIGTYPE* _other, Index otherStride) \ + { \ + MKL_INT m = otherSize, n = size, lda, ldb; \ + char side = 'R', uplo, diag='N', transa; \ + /* Set alpha_ */ \ + MKLTYPE alpha; \ + EIGTYPE myone(1); \ + assign_scalar_eig2mkl(alpha, myone); \ + ldb = otherStride;\ +\ + const EIGTYPE *a; \ +/* Set trans */ \ + transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + typedef Matrix MatrixTri; \ + Map > tri(_tri,size,size,OuterStride<>(triStride)); \ + MatrixTri a_tmp; \ +\ + if (conjA) { \ + a_tmp = tri.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _tri; \ + lda = triStride; \ + } \ + if (IsUnitDiag) diag='U'; \ +/* call ?trsm*/ \ + MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ + /*std::cout << "TRMS_L specialization!\n";*/ \ + } \ +}; + +EIGEN_MKL_TRSM_R(double, double, d) +EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z) +EIGEN_MKL_TRSM_R(float, float, s) +EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c) + + +} // end namespace internal + +#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index f1d93d2f8..860d72d91 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -47,7 +47,7 @@ template< int ResStorageOrder> struct general_matrix_matrix_product; -template +template struct general_matrix_vector_product; diff --git a/Eigen/src/Core/util/MKL_support.h b/Eigen/src/Core/util/MKL_support.h new file mode 100644 index 000000000..6d78e97b1 --- /dev/null +++ b/Eigen/src/Core/util/MKL_support.h @@ -0,0 +1,84 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Include file with common MKL declarations + ******************************************************************************** +*/ + +#ifndef EIGEN_MKL_SUPPORT_H +#define EIGEN_MKL_SUPPORT_H + +#include +#include +#include + +#define EIGEN_MKL_VML_THRESHOLD 128 + +typedef std::complex dcomplex; +typedef std::complex scomplex; + +namespace internal { + +template +static inline void assign_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) { + mklScalar=eigenScalar; +} + +template <> +inline void assign_scalar_eig2mkl(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) { + mklScalar.real=eigenScalar.real(); + mklScalar.imag=eigenScalar.imag(); +} + +template <> +inline void assign_scalar_eig2mkl(MKL_Complex8& mklScalar, const scomplex& eigenScalar) { + mklScalar.real=eigenScalar.real(); + mklScalar.imag=eigenScalar.imag(); +} + +template +static inline void assign_conj_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) { + mklScalar=eigenScalar; +} + +template <> +inline void assign_conj_scalar_eig2mkl(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) { + mklScalar.real=eigenScalar.real(); + mklScalar.imag=-eigenScalar.imag(); +} + +template <> +inline void assign_conj_scalar_eig2mkl(MKL_Complex8& mklScalar, const scomplex& eigenScalar) { + mklScalar.real=eigenScalar.real(); + mklScalar.imag=-eigenScalar.imag(); +} + +} // end namespace internal + + +#endif \ No newline at end of file diff --git a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h new file mode 100644 index 000000000..42c9e8723 --- /dev/null +++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h @@ -0,0 +1,90 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Complex Schur needed to complex unsymmetrical eigenvalues/eigenvectors. + ******************************************************************************** +*/ + +#ifndef EIGEN_COMPLEX_SCHUR_MKL_H +#define EIGEN_COMPLEX_SCHUR_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +/** \internal Specialization for the data types supported by MKL */ + +#define EIGEN_MKL_SCHUR_COMPLEX(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ +template<> \ +ComplexSchur >& \ +ComplexSchur >::compute(const Matrix& matrix, bool computeU) \ +{ \ + typedef Matrix MatrixType; \ + typedef MatrixType::Scalar Scalar; \ + typedef MatrixType::RealScalar RealScalar; \ + typedef std::complex ComplexScalar; \ +\ + assert(matrix.cols() == matrix.rows()); \ +\ + m_matUisUptodate = false; \ + if(matrix.cols() == 1) \ + { \ + m_matT = matrix.cast(); \ + if(computeU) m_matU = ComplexMatrixType::Identity(1,1); \ + m_info = Success; \ + m_isInitialized = true; \ + m_matUisUptodate = computeU; \ + return *this; \ + } \ + lapack_int n = matrix.cols(), sdim, info; \ + lapack_int lda = matrix.outerStride(); \ + lapack_int matrix_order = MKLCOLROW; \ + char jobvs, sort='N'; \ + LAPACK_##MKLPREFIX_U##_SELECT1 select; \ + jobvs = (computeU) ? 'V' : 'N'; \ + m_matU.resize(n, n); \ + lapack_int ldvs = m_matU.outerStride(); \ + m_matT = matrix; \ + Matrix w; \ + w.resize(n, 1);\ + info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)w.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ + if(info == 0) \ + m_info = Success; \ + else \ + m_info = NoConvergence; \ +\ + m_isInitialized = true; \ + m_matUisUptodate = computeU; \ + return *this; \ +\ +} + +EIGEN_MKL_SCHUR_COMPLEX(dcomplex, MKL_Complex16, z, Z, ColMajor, LAPACK_COL_MAJOR) +EIGEN_MKL_SCHUR_COMPLEX(scomplex, MKL_Complex8, c, C, ColMajor, LAPACK_COL_MAJOR) +EIGEN_MKL_SCHUR_COMPLEX(dcomplex, MKL_Complex16, z, Z, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_MKL_SCHUR_COMPLEX(scomplex, MKL_Complex8, c, C, RowMajor, LAPACK_ROW_MAJOR) + +#endif // EIGEN_COMPLEX_SCHUR_MKL_H diff --git a/Eigen/src/Eigenvalues/RealSchur_MKL.h b/Eigen/src/Eigenvalues/RealSchur_MKL.h new file mode 100644 index 000000000..406426392 --- /dev/null +++ b/Eigen/src/Eigenvalues/RealSchur_MKL.h @@ -0,0 +1,79 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Real Schur needed to real unsymmetrical eigenvalues/eigenvectors. + ******************************************************************************** +*/ + +#ifndef EIGEN_REAL_SCHUR_MKL_H +#define EIGEN_REAL_SCHUR_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +/** \internal Specialization for the data types supported by MKL */ + +#define EIGEN_MKL_SCHUR_REAL(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ +template<> \ +RealSchur >& \ +RealSchur >::compute(const Matrix& matrix, bool computeU) \ +{ \ + typedef Matrix MatrixType; \ + typedef MatrixType::Scalar Scalar; \ + typedef MatrixType::RealScalar RealScalar; \ +\ + assert(matrix.cols() == matrix.rows()); \ +\ + lapack_int n = matrix.cols(), sdim, info; \ + lapack_int lda = matrix.outerStride(); \ + lapack_int matrix_order = MKLCOLROW; \ + char jobvs, sort='N'; \ + LAPACK_##MKLPREFIX_U##_SELECT2 select; \ + jobvs = (computeU) ? 'V' : 'N'; \ + m_matU.resize(n, n); \ + lapack_int ldvs = m_matU.outerStride(); \ + m_matT = matrix; \ + Matrix wr, wi; \ + wr.resize(n, 1); wi.resize(n, 1); \ + info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)wr.data(), (MKLTYPE*)wi.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ + if(info == 0) \ + m_info = Success; \ + else \ + m_info = NoConvergence; \ +\ + m_isInitialized = true; \ + m_matUisUptodate = computeU; \ + return *this; \ +\ +} + +EIGEN_MKL_SCHUR_REAL(double, double, d, D, ColMajor, LAPACK_COL_MAJOR) +EIGEN_MKL_SCHUR_REAL(float, float, s, S, ColMajor, LAPACK_COL_MAJOR) +EIGEN_MKL_SCHUR_REAL(double, double, d, D, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_MKL_SCHUR_REAL(float, float, s, S, RowMajor, LAPACK_ROW_MAJOR) + +#endif // EIGEN_REAL_SCHUR_MKL_H diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h new file mode 100644 index 000000000..27ef1689d --- /dev/null +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h @@ -0,0 +1,89 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Self-adjoint eigenvalues/eigenvectors. + ******************************************************************************** +*/ + +#ifndef EIGEN_SAEIGENSOLVER_MKL_H +#define EIGEN_SAEIGENSOLVER_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +/** \internal Specialization for the data types supported by MKL */ + +#define EIGEN_MKL_EIG_SELFADJ(EIGTYPE, MKLTYPE, MKLRTYPE, MKLNAME, EIGCOLROW, MKLCOLROW ) \ +template<> \ +SelfAdjointEigenSolver >& \ +SelfAdjointEigenSolver >::compute(const Matrix& matrix, int options) \ +{ \ + eigen_assert(matrix.cols() == matrix.rows()); \ + eigen_assert((options&~(EigVecMask|GenEigMask))==0 \ + && (options&EigVecMask)!=EigVecMask \ + && "invalid option parameter"); \ + bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \ + lapack_int n = matrix.cols(), lda, matrix_order, info; \ + m_eivalues.resize(n,1); \ + m_subdiag.resize(n-1); \ + m_eivec = matrix; \ +\ + if(n==1) \ + { \ + m_eivalues.coeffRef(0,0) = internal::real(matrix.coeff(0,0)); \ + if(computeEigenvectors) m_eivec.setOnes(n,n); \ + m_info = Success; \ + m_isInitialized = true; \ + m_eigenvectorsOk = computeEigenvectors; \ + return *this; \ + } \ +\ + lda = matrix.outerStride(); \ + matrix_order=MKLCOLROW; \ + char jobz, uplo='L', range='A'; \ + jobz = computeEigenvectors ? 'V' : 'N'; \ +\ + info = LAPACKE_##MKLNAME( matrix_order, jobz, uplo, n, (MKLTYPE*)m_eivec.data(), lda, (MKLRTYPE*)m_eivalues.data() ); \ + m_info = (info==0) ? Success : NoConvergence; \ + m_isInitialized = true; \ + m_eigenvectorsOk = computeEigenvectors; \ + return *this; \ +} + + +EIGEN_MKL_EIG_SELFADJ(double, double, double, dsyev, ColMajor, LAPACK_COL_MAJOR) +EIGEN_MKL_EIG_SELFADJ(float, float, float, ssyev, ColMajor, LAPACK_COL_MAJOR) +EIGEN_MKL_EIG_SELFADJ(dcomplex, MKL_Complex16, double, zheev, ColMajor, LAPACK_COL_MAJOR) +EIGEN_MKL_EIG_SELFADJ(scomplex, MKL_Complex8, float, cheev, ColMajor, LAPACK_COL_MAJOR) + +EIGEN_MKL_EIG_SELFADJ(double, double, double, dsyev, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_MKL_EIG_SELFADJ(float, float, float, ssyev, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_MKL_EIG_SELFADJ(dcomplex, MKL_Complex16, double, zheev, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_MKL_EIG_SELFADJ(scomplex, MKL_Complex8, float, cheev, RowMajor, LAPACK_ROW_MAJOR) + + +#endif // EIGEN_SAEIGENSOLVER_H diff --git a/Eigen/src/LU/PartialPivLU_MKL.h b/Eigen/src/LU/PartialPivLU_MKL.h new file mode 100644 index 000000000..5cafd9012 --- /dev/null +++ b/Eigen/src/LU/PartialPivLU_MKL.h @@ -0,0 +1,80 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * LU decomposition with partial pivoting based on LAPACKE_?getrf function. + ******************************************************************************** +*/ + +#ifndef EIGEN_PARTIALLU_LAPACK_H +#define EIGEN_PARTIALLU_LAPACK_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +/** \internal Specialization for the data types supported by MKL */ + +#define EIGEN_MKL_LU_PARTPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \ +template \ +struct partial_lu_impl \ +{ \ + /* \internal performs the LU decomposition in-place of the matrix represented */ \ + static lapack_int blocked_lu(lapack_int rows, lapack_int cols, EIGTYPE* lu_data, lapack_int luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ + { \ + lapack_int matrix_order, first_zero_pivot; \ + lapack_int m, n, lda, *ipiv, info; \ + EIGTYPE* a; \ +/* Set up parameters for ?getrf */ \ + matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ + lda = luStride; \ + a = lu_data; \ + ipiv = row_transpositions; \ + m = rows; \ + n = cols; \ + nb_transpositions = 0; \ +\ + info = LAPACKE_##MKLPREFIX##getrf( matrix_order, m, n, (MKLTYPE*)a, lda, ipiv ); \ +\ + for(int i=0;i= 0); \ +/* something should be done with nb_transpositions */ \ +\ + first_zero_pivot = info; \ + return first_zero_pivot; \ + } \ +}; + +EIGEN_MKL_LU_PARTPIV(double, double, d) +EIGEN_MKL_LU_PARTPIV(float, float, s) +EIGEN_MKL_LU_PARTPIV(dcomplex, MKL_Complex16, z) +EIGEN_MKL_LU_PARTPIV(scomplex, MKL_Complex8, c) + +} // end namespace internal + +#endif // EIGEN_PARTIALLU_LAPACK_H diff --git a/Eigen/src/PARDISOSupport/PARDISOSupport.h b/Eigen/src/PARDISOSupport/PARDISOSupport.h new file mode 100644 index 000000000..d0d50c8f8 --- /dev/null +++ b/Eigen/src/PARDISOSupport/PARDISOSupport.h @@ -0,0 +1,427 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL PARDISO + ******************************************************************************** +*/ + +#ifndef EIGEN_PARDISOSUPPORT_H +#define EIGEN_PARDISOSUPPORT_H + +template +class PardisoLU; +template +class PardisoLLT; +template +class PardisoLDLT; + +namespace internal +{ + template + struct pardiso_run_selector + { + static Index run(_MKL_DSS_HANDLE_t pt, Index maxfct, Index mnum, Index type, Index phase, Index n, void *a, + Index *ia, Index *ja, Index *perm, Index nrhs, Index *iparm, Index msglvl, void *b, void *x) + { + Index error = 0; + ::pardiso(pt, &maxfct, &mnum, &type, &phase, &n, a, ia, ja, perm, &nrhs, iparm, &msglvl, b, x, &error); + return error; + } + }; + template<> + struct pardiso_run_selector + { + typedef long long int Index; + static Index run(_MKL_DSS_HANDLE_t pt, Index maxfct, Index mnum, Index type, Index phase, Index n, void *a, + Index *ia, Index *ja, Index *perm, Index nrhs, Index *iparm, Index msglvl, void *b, void *x) + { + Index error = 0; + ::pardiso_64(pt, &maxfct, &mnum, &type, &phase, &n, a, ia, ja, perm, &nrhs, iparm, &msglvl, b, x, &error); + return error; + } + }; + + template + struct pardiso_traits; + + template + struct pardiso_traits< PardisoLU<_MatrixType> > + { + typedef _MatrixType MatrixType; + typedef typename _MatrixType::Scalar Scalar; + typedef typename _MatrixType::RealScalar RealScalar; + typedef typename _MatrixType::Index Index; + }; + + template + struct pardiso_traits< PardisoLLT<_MatrixType> > + { + typedef _MatrixType MatrixType; + typedef typename _MatrixType::Scalar Scalar; + typedef typename _MatrixType::RealScalar RealScalar; + typedef typename _MatrixType::Index Index; + }; + + template + struct pardiso_traits< PardisoLDLT<_MatrixType> > + { + typedef _MatrixType MatrixType; + typedef typename _MatrixType::Scalar Scalar; + typedef typename _MatrixType::RealScalar RealScalar; + typedef typename _MatrixType::Index Index; + }; + +} + +template +class PardisoImpl +{ + public: + typedef typename internal::pardiso_traits::MatrixType MatrixType; + typedef typename internal::pardiso_traits::Scalar Scalar; + typedef typename internal::pardiso_traits::RealScalar RealScalar; + typedef typename internal::pardiso_traits::Index Index; + typedef Matrix VectorType; + typedef Matrix IntRowVectorType; + typedef Matrix IntColVectorType; + enum { + ScalarIsComplex = NumTraits::IsComplex + }; + + PardisoImpl(int flags) : m_flags(flags) + { + eigen_assert((sizeof(Index) >= sizeof(_INTEGER_t) && sizeof(Index) <= 8) && "Non-supported index type"); + memset(m_iparm, 0, sizeof(m_iparm)); + m_msglvl = 0; /* No output */ + m_initialized = false; + } + + ~PardisoImpl() + { + pardisoRelease(); + } + + inline Index cols() const { return m_matrix.cols(); } + inline Index rows() const { return m_matrix.rows(); } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the matrix.appears to be negative. + */ + ComputationInfo info() const + { + eigen_assert(m_initialized && "Decomposition is not initialized."); + return m_info; + } + + int orderingMethod() const + { + return m_flags&OrderingMask; + } + + Derived& compute(const MatrixType& matrix); + /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * \sa compute() + */ + template + inline const internal::solve_retval + solve(const MatrixBase& b, const int transposed = SvNoTrans) const + { + eigen_assert(m_initialized && "SimplicialCholesky is not initialized."); + eigen_assert(rows()==b.rows() + && "PardisoImpl::solve(): invalid number of rows of the right hand side matrix b"); + return internal::solve_retval(*this, b.derived(), transposed); + } + + Derived& derived() + { + return *static_cast(this); + } + const Derived& derived() const + { + return *static_cast(this); + } + + template + bool _solve(const MatrixBase &b, MatrixBase& x, const int transposed = SvNoTrans) const; + + protected: + void pardisoRelease() + { + if(m_initialized) // Factorization ran at least once + { + internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, -1, m_matrix.rows(), NULL, NULL, NULL, m_perm.data(), 0, + m_iparm, m_msglvl, NULL, NULL); + memset(m_iparm, 0, sizeof(m_iparm)); + } + } + protected: + // cached data to reduce reallocation, etc. + + ComputationInfo m_info; + bool m_symmetric, m_initialized, m_succeeded; + int m_flags; + Index m_type, m_msglvl; + mutable void *m_pt[64]; + mutable Index m_iparm[64]; + mutable SparseMatrix m_matrix; + mutable IntColVectorType m_perm; +}; + +template +Derived& PardisoImpl::compute(const MatrixType& a) +{ + Index n = a.rows(), i; + eigen_assert(a.rows() == a.cols()); + + pardisoRelease(); + memset(m_pt, 0, sizeof(m_pt)); + m_initialized = true; + + m_symmetric = abs(m_type) < 10; + + switch (orderingMethod()) + { + case MinimumDegree_AT_PLUS_A : m_iparm[1] = 0; break; + case NaturalOrdering : m_iparm[5] = 1; break; + case Metis : m_iparm[1] = 3; break; + default: + //std::cerr << "Eigen: ordering method \"" << Base::orderingMethod() << "\" not supported by the PARDISO backend\n"; + m_iparm[1] = 0; + }; + + m_iparm[0] = 1; /* No solver default */ + /* Numbers of processors, value of OMP_NUM_THREADS */ + m_iparm[2] = 1; + m_iparm[3] = 0; /* No iterative-direct algorithm */ + m_iparm[4] = 0; /* No user fill-in reducing permutation */ + m_iparm[5] = 0; /* Write solution into x */ + m_iparm[6] = 0; /* Not in use */ + m_iparm[7] = 2; /* Max numbers of iterative refinement steps */ + m_iparm[8] = 0; /* Not in use */ + m_iparm[9] = 13; /* Perturb the pivot elements with 1E-13 */ + m_iparm[10] = m_symmetric ? 0 : 1; /* Use nonsymmetric permutation and scaling MPS */ + m_iparm[11] = 0; /* Not in use */ + m_iparm[12] = m_symmetric ? 0 : 1; /* Maximum weighted matching algorithm is switched-off (default for symmetric). Try m_iparm[12] = 1 in case of inappropriate accuracy */ + m_iparm[13] = 0; /* Output: Number of perturbed pivots */ + m_iparm[14] = 0; /* Not in use */ + m_iparm[15] = 0; /* Not in use */ + m_iparm[16] = 0; /* Not in use */ + m_iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ + m_iparm[18] = -1; /* Output: Mflops for LU factorization */ + m_iparm[19] = 0; /* Output: Numbers of CG Iterations */ + m_iparm[20] = 0; /* 1x1 pivoting */ + m_iparm[26] = 0; /* No matrix checker */ + m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0; + m_iparm[34] = 0; /* Fortran indexing */ + m_iparm[59] = 1; /* Automatic switch between In-Core and Out-of-Core modes */ + + m_perm.resize(n); + if(orderingMethod() == NaturalOrdering) + { + for(Index i = 0; i < n; i++) + m_perm[i] = i; + } + + m_matrix = a; + + /* Convert to Fortran-style indexing */ + for(i = 0; i <= m_matrix.rows(); ++i) + ++m_matrix._outerIndexPtr()[i]; + for(i = 0; i < m_matrix.nonZeros(); ++i) + ++m_matrix._innerIndexPtr()[i]; + + Index error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 12, n, + m_matrix._valuePtr(), m_matrix._outerIndexPtr(), m_matrix._innerIndexPtr(), + m_perm.data(), 0, m_iparm, m_msglvl, NULL, NULL); + + switch(error) + { + case 0: + m_succeeded = true; + m_info = Success; + return derived(); + case -4: + case -7: + m_info = NumericalIssue; + break; + default: + m_info = InvalidInput; + } + m_succeeded = false; + return derived(); +} + +template +template +bool PardisoImpl::_solve(const MatrixBase &b, + MatrixBase& x, const int transposed) const +{ + if(m_iparm[0] == 0) // Factorization was not computed + return false; + + Index n = m_matrix.rows(); + Index nrhs = b.cols(); + eigen_assert(n==b.rows()); + eigen_assert(((MatrixBase::Flags & RowMajorBit) == 0 || nrhs == 1) && "Row-major right hand sides are not supported"); + eigen_assert(((MatrixBase::Flags & RowMajorBit) == 0 || nrhs == 1) && "Row-major matrices of unknowns are not supported"); + eigen_assert(((nrhs == 1) || b.outerStride() == b.rows())); + + x.derived().resizeLike(b); + + switch (transposed) { + case SvNoTrans : m_iparm[11] = 0 ; break; + case SvTranspose : m_iparm[11] = 2 ; break; + case SvAdjoint : m_iparm[11] = 1 ; break; + default: + //std::cerr << "Eigen: transposition option \"" << transposed << "\" not supported by the PARDISO backend\n"; + m_iparm[11] = 0; + } + + Index error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 33, n, + m_matrix._valuePtr(), m_matrix._outerIndexPtr(), m_matrix._innerIndexPtr(), + m_perm.data(), nrhs, m_iparm, m_msglvl, const_cast(&b(0, 0)), &x(0, 0)); + + return error==0; +} + + +template +class PardisoLU : public PardisoImpl< PardisoLU > +{ + protected: + typedef PardisoImpl< PardisoLU > Base; + typedef typename Base::Scalar Scalar; + typedef typename Base::RealScalar RealScalar; + using Base::m_type; + + public: + + using Base::compute; + using Base::solve; + + PardisoLU(int flags = Metis) + : Base(flags) + { + m_type = Base::ScalarIsComplex ? 13 : 11; + } + + PardisoLU(const MatrixType& matrix, int flags = Metis) + : Base(flags) + { + m_type = Base::ScalarIsComplex ? 13 : 11; + compute(matrix); + } +}; + +template +class PardisoLLT : public PardisoImpl< PardisoLLT > +{ + protected: + typedef PardisoImpl< PardisoLLT > Base; + typedef typename Base::Scalar Scalar; + typedef typename Base::RealScalar RealScalar; + using Base::m_type; + + public: + + using Base::compute; + using Base::solve; + + PardisoLLT(int flags = Metis) + : Base(flags) + { + m_type = Base::ScalarIsComplex ? 4 : 2; + } + + PardisoLLT(const MatrixType& matrix, int flags = Metis) + : Base(flags) + { + m_type = Base::ScalarIsComplex ? 4 : 2; + compute(matrix); + } +}; + +template +class PardisoLDLT : public PardisoImpl< PardisoLDLT > +{ + protected: + typedef PardisoImpl< PardisoLDLT > Base; + typedef typename Base::Scalar Scalar; + typedef typename Base::RealScalar RealScalar; + using Base::m_type; + + public: + + using Base::compute; + using Base::solve; + + PardisoLDLT(int flags = Metis) + : Base(flags) + { + m_type = Base::ScalarIsComplex ? -4 : -2; + } + + PardisoLDLT(const MatrixType& matrix, int flags = Metis, bool hermitian = true) + : Base(flags) + { + compute(matrix, hermitian); + } + + void compute(const MatrixType& matrix, bool hermitian = true) + { + m_type = Base::ScalarIsComplex ? (hermitian ? -4 : 6) : -2; + Base::compute(matrix); + } + +}; + +namespace internal { + +template +struct solve_retval, Rhs> + : solve_retval_base, Rhs> +{ + typedef PardisoImpl<_Derived> Dec; + EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) + + solve_retval(const PardisoImpl<_Derived>& dec, const Rhs& rhs, const int transposed) + : Base(dec, rhs), m_transposed(transposed) {} + + template void evalTo(Dest& dst) const + { + dec()._solve(rhs(),dst,m_transposed); + } + + int m_transposed; +}; + +} + +#endif // EIGEN_PARDISOSUPPORT_H diff --git a/Eigen/src/QR/ColPivHouseholderQR_MKL.h b/Eigen/src/QR/ColPivHouseholderQR_MKL.h new file mode 100644 index 000000000..9cf4d2fae --- /dev/null +++ b/Eigen/src/QR/ColPivHouseholderQR_MKL.h @@ -0,0 +1,94 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Householder QR decomposition of a matrix with column pivoting based on + * LAPACKE_?geqp3 function. + ******************************************************************************** +*/ + +#ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_MKL_H +#define EIGEN_COLPIVOTINGHOUSEHOLDERQR_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +/** \internal Specialization for the data types supported by MKL */ + +#define EIGEN_MKL_QR_COLPIV(EIGTYPE, MKLTYPE, MKLPREFIX, EIGCOLROW, MKLCOLROW) \ +template<> \ +ColPivHouseholderQR >& \ +ColPivHouseholderQR >::compute( \ + const Matrix& matrix) \ +\ +{ \ + typedef Matrix MatrixType; \ + typedef MatrixType::Scalar Scalar; \ + typedef MatrixType::RealScalar RealScalar; \ + Index rows = matrix.rows();\ + Index cols = matrix.cols();\ + Index size = matrix.diagonalSize();\ +\ + m_qr = matrix;\ + m_hCoeffs.resize(size);\ +\ + m_colsTranspositions.resize(cols);\ + Index number_of_transpositions = 0;\ +\ + m_nonzero_pivots = 0; \ + m_maxpivot = RealScalar(0);\ + m_colsPermutation.resize(cols); \ + m_colsPermutation.indices().setZero(); \ +\ + lapack_int lda = m_qr.outerStride(), i; \ + lapack_int matrix_order = MKLCOLROW; \ + LAPACKE_##MKLPREFIX##geqp3( matrix_order, rows, cols, (MKLTYPE*)m_qr.data(), lda, (lapack_int*)m_colsPermutation.indices().data(), (MKLTYPE*)m_hCoeffs.data()); \ + m_isInitialized = true; \ + m_maxpivot=m_qr.diagonal().cwiseAbs().maxCoeff(); \ + m_hCoeffs.adjointInPlace(); \ + RealScalar premultiplied_threshold = internal::abs(m_maxpivot) * threshold(); \ + lapack_int *perm = m_colsPermutation.indices().data(); \ + for(i=0;i premultiplied_threshold);\ + } \ + for(i=0;i \ +void householder_qr_inplace_blocked(MatrixQR& mat, HCoeffs& hCoeffs, \ + typename MatrixQR::Index maxBlockSize=32, \ + EIGTYPE* tempData = 0) \ +{ \ + lapack_int m = mat.rows(); \ + lapack_int n = mat.cols(); \ + lapack_int lda = mat.outerStride(); \ + lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ + LAPACKE_##MKLPREFIX##geqrf( matrix_order, m, n, (MKLTYPE*)mat.data(), lda, (MKLTYPE*)hCoeffs.data()); \ + hCoeffs.adjointInPlace(); \ +\ +} + +EIGEN_MKL_QR_NOPIV(double, double, d) +EIGEN_MKL_QR_NOPIV(float, float, s) +EIGEN_MKL_QR_NOPIV(dcomplex, MKL_Complex16, z) +EIGEN_MKL_QR_NOPIV(scomplex, MKL_Complex8, c) + +} // end namespace internal + +#endif // EIGEN_QR_MKL_H diff --git a/Eigen/src/SVD/JacobiSVD_MKL.h b/Eigen/src/SVD/JacobiSVD_MKL.h new file mode 100644 index 000000000..e6771b1b3 --- /dev/null +++ b/Eigen/src/SVD/JacobiSVD_MKL.h @@ -0,0 +1,88 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Singular Value Decomposition - SVD. + ******************************************************************************** +*/ + +#ifndef EIGEN_JACOBISVD_MKL_H +#define EIGEN_JACOBISVD_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +/** \internal Specialization for the data types supported by MKL */ + +#define EIGEN_MKL_SVD(EIGTYPE, MKLTYPE, MKLRTYPE, MKLPREFIX, EIGCOLROW, MKLCOLROW) \ +template<> \ +JacobiSVD, ColPivHouseholderQRPreconditioner>& \ +JacobiSVD, ColPivHouseholderQRPreconditioner>::compute(const Matrix& matrix, unsigned int computationOptions) \ +{ \ + typedef Matrix MatrixType; \ + typedef MatrixType::Scalar Scalar; \ + typedef MatrixType::RealScalar RealScalar; \ + allocate(matrix.rows(), matrix.cols(), computationOptions); \ +\ + const RealScalar precision = RealScalar(2) * NumTraits::epsilon(); \ + m_nonzeroSingularValues = m_diagSize; \ +\ + lapack_int lda = matrix.outerStride(), ldu, ldvt; \ + lapack_int matrix_order = MKLCOLROW; \ + char jobu, jobvt; \ + MKLTYPE *u, *vt, dummy; \ + jobu = (m_computeFullU) ? 'A' : (m_computeThinU) ? 'S' : 'N'; \ + jobvt = (m_computeFullV) ? 'A' : (m_computeThinV) ? 'S' : 'N'; \ + if (computeU()) { \ + ldu = m_matrixU.outerStride(); \ + u = (MKLTYPE*)m_matrixU.data(); \ + } else { ldu=1; u=&dummy; }\ + MatrixType localV; \ + ldvt = (m_computeFullV) ? m_cols : (m_computeThinV) ? m_diagSize : 1; \ + if (computeV()) { \ + localV.resize(ldvt, m_cols); \ + vt = (MKLTYPE*)localV.data(); \ + } else { ldvt=1; vt=&dummy; }\ + Matrix superb; superb.resize(m_diagSize, 1); \ + MatrixType m_temp; m_temp = matrix; \ + LAPACKE_##MKLPREFIX##gesvd( matrix_order, jobu, jobvt, m_rows, m_cols, (MKLTYPE*)m_temp.data(), lda, (MKLRTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \ + if (computeV()) m_matrixV = localV.adjoint(); \ + /* for(int i=0;i::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::product_triangular_matrix_vector::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::product_triangular_matrix_vector::run); + func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); + func[TR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); + func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::product_triangular_matrix_vector::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::product_triangular_matrix_vector::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::product_triangular_matrix_vector::run); + func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); + func[TR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); + func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::product_triangular_matrix_vector::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::product_triangular_matrix_vector::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::product_triangular_matrix_vector::run); + func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); + func[TR | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); + func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::product_triangular_matrix_vector::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::product_triangular_matrix_vector::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::product_triangular_matrix_vector::run); + func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); + func[TR | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); + func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); init = true; } diff --git a/lapack/cholesky.cpp b/lapack/cholesky.cpp index c3a72c3c5..c51a8a29b 100644 --- a/lapack/cholesky.cpp +++ b/lapack/cholesky.cpp @@ -41,8 +41,8 @@ EIGEN_LAPACK_FUNC(potrf,(char* uplo, int *n, RealScalar *pa, int *lda, int *info Scalar* a = reinterpret_cast(pa); MatrixType A(a,*n,*n,*lda); int ret; - if(UPLO(*uplo)==UP) ret = internal::llt_inplace::blocked(A); - else ret = internal::llt_inplace::blocked(A); + if(UPLO(*uplo)==UP) ret = internal::llt_inplace::blocked(A); + else ret = internal::llt_inplace::blocked(A); if(ret>=0) *info = ret+1; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6b17ed941..ac2413ef3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -67,6 +67,7 @@ if(TEST_LIB) add_definitions("-DEIGEN_EXTERN_INSTANTIATIONS=1") endif(TEST_LIB) +ei_add_test(pardiso_support) ei_add_test(meta) ei_add_test(sizeof) ei_add_test(dynalloc) diff --git a/test/pardiso_support.cpp b/test/pardiso_support.cpp new file mode 100644 index 000000000..a6162b44f --- /dev/null +++ b/test/pardiso_support.cpp @@ -0,0 +1,26 @@ +/* + Intel Copyright (C) .... +*/ + +#include "sparse_solver.h" +#include + +template void test_pardiso_T() +{ + //PardisoLLT < SparseMatrix > pardiso_llt; + //PardisoLDLT< SparseMatrix > pardiso_ldlt; + PardisoLU < SparseMatrix > pardiso_lu; + + //check_sparse_spd_solving(pardiso_llt); + check_sparse_square_solving(pardiso_lu); +} + +void test_pardiso_support() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1(test_pardiso_T()); + CALL_SUBTEST_2(test_pardiso_T()); + CALL_SUBTEST_3(test_pardiso_T< std::complex >()); + CALL_SUBTEST_4(test_pardiso_T< std::complex >()); + } +}