diff --git a/.gitignore b/.gitignore index 353819bf6..4e7dd9eee 100644 --- a/.gitignore +++ b/.gitignore @@ -84,7 +84,6 @@ local.properties .springBeans # CMake generated files -tools/computecpp_info/common_device_compiler_flags.h build/ # Doxygen diff --git a/.scripts/build_computecpp.sh b/.scripts/build_computecpp.sh deleted file mode 100755 index 05bed8608..000000000 --- a/.scripts/build_computecpp.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -set -ev - -########################### -# Get ComputeCpp -########################### -wget https://computecpp.codeplay.com/downloads/computecpp-ce/latest/ubuntu-16.04-64bit.tar.gz -rm -rf /tmp/ComputeCpp-latest && mkdir /tmp/ComputeCpp-latest/ -tar -xzf ubuntu-16.04-64bit.tar.gz -C /tmp/ComputeCpp-latest --strip-components 1 -ls -R /tmp/ComputeCpp-latest/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bc93ff94..e5d73761e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,12 +106,6 @@ set(PORTBLAS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src) set(PORTBLAS_SRC_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/python_generator) list(APPEND THIRD_PARTIES_INCLUDE ${CBLAS_INCLUDE}) -if(IMGDNN_DIR) - add_definitions(-DIMGDNN_LIBRARY) - find_package(IMGDNN REQUIRED) - list(APPEND THIRD_PARTIES_INCLUDE ${IMGDNN_INCLUDE_DIRS}) -endif() - option(BLAS_ENABLE_EXTENSIONS "Whether to enable portBLAS extensions" ON) option(BLAS_ENABLE_COMPLEX "Whether to enable complex data type for GEMM" OFF) option(BLAS_ENABLE_HALF "Whether to enable sycl::half data type for supported operators" OFF) @@ -183,26 +177,19 @@ else() # The portblas target is just a collection of other objects so # the linked libraries are not going to be propagated to this target. # This requires manual linking against SYCL on Windows. - if(is_computecpp) - target_link_libraries(portblas PUBLIC ComputeCpp::ComputeCpp) - elseif(is_dpcpp) + if(is_dpcpp) target_link_libraries(portblas PUBLIC DPCPP::DPCPP) elseif(is_adaptivecpp) target_link_libraries(portblas PUBLIC AdaptiveCpp::acpp-rt) endif() endif() - if(is_computecpp) - set(sycl_impl ComputeCpp::ComputeCpp) - elseif(is_dpcpp) + if(is_dpcpp) set(sycl_impl DPCPP::DPCPP) add_sycl_to_target(TARGET portblas SOURCES) elseif(is_adaptivecpp) set(sycl_impl AdaptiveCpp::acpp-rt) add_sycl_to_target(TARGET portblas SOURCES) endif() - if(IMGDNN_DIR) - target_link_libraries(portblas PUBLIC IMGDNN::IMGDNN) - endif() set_target_properties(portblas PROPERTIES INTERFACE_LINK_LIBRARIES ${sycl_impl} INTERFACE_INCLUDE_DIRECTORIES "${PORTBLAS_INCLUDE}" diff --git a/README.md b/README.md index ef766dbc9..8c9675169 100644 --- a/README.md +++ b/README.md @@ -35,10 +35,6 @@ the project. - [Instaling portBLAS](#instaling-portBLAS) - [Doxygen](#doxygen) - [CMake options](#cmake-options) - - [ComputeCpp Compilation (Deprecated)](#computecpp-deprecated) - - [Compile with ComputeCpp](#compile-with-computecpp) - - [POWER\_VR support](#power_vr-support-computecpp-only) - - [Cross-Compile](#cross-compile-computecpp-only) - [Tests and benchmarks](#tests-and-benchmarks) - [Contributing to the project](#contributing-to-the-project) - [Guides and Other Documents](#guides-and-other-documents) @@ -493,7 +489,7 @@ Some of the supported options are: |---|---|---| | `BLAS_ENABLE_TESTING` | `ON`/`OFF` | Set it to `OFF` to avoid building the tests (`ON` is the default value) | | `BLAS_ENABLE_BENCHMARK` | `ON`/`OFF` | Set it to `OFF` to avoid building the benchmarks (`ON` is the default value) | -| `SYCL_COMPILER` | name | Used to determine which SYCL implementation to use. By default, the first implementation found is used. Supported values are: `dpcpp`, `adaptivecpp` and `computecpp`*(deprecated)*. | +| `SYCL_COMPILER` | name | Used to determine which SYCL implementation to use. By default, the first implementation found is used. Supported values are: `dpcpp` and `adaptivecpp`. | | `TUNING_TARGET` | name | By default, this flag is set to `DEFAULT` to restrict any device specific compiler optimizations. Use this flag to tune the code for a target (**highly recommended** for performance). The supported targets are: `INTEL_GPU`, `NVIDIA_GPU`, `AMD_GPU` | | `CMAKE_PREFIX_PATH` | path | List of paths to check when searching for dependencies | | `CMAKE_INSTALL_PREFIX` | path | Specify the install location, used when invoking `ninja install` | @@ -509,56 +505,6 @@ Some of the supported options are: | `BLAS_ENABLE_HALF` | `ON`/`OFF` | Determines whether to enable Half data type support *(Support is limited to some Level 1 operators and Gemm)* (`OFF` by default) | | `BLAS_INDEX_TYPES` | `int32_t;int64_t` | Determines the type(s) to use for `index_t` and `increment_t`. Default is `int` | -## ComputeCpp Compilation *(Deprecated)* - -portBLAS ComputeCpp compilation is deprecated since ComputeCpp releasing has been -discontinued. More information about this are found in this [announcement](https://codeplay.com/portal/news/2023/07/07/the-future-of-computecpp). - -### Compile with ComputeCpp - -```bash -cd build -cmake -GNinja ../ -DComputeCpp_DIR=/path/to/computecpp -DSYCL_COMPILER=computecpp -ninja -``` - -### Cross-Compile *(ComputeCpp Only)* - -To cross-compile portBLAS first the following environment variables must be -set: - -```bash -export COMPUTECPP_TOOLCHAIN_DIR="PATH TO TOOLCHAIN_DIR" -export COMPUTECPP_TARGET_TRIPLE="PATH TO TARGET_TRIPLE" -export COMPUTECPP_SYSROOT_DIR="$PATH TO SYSROOT_DIR" -``` - -Clone the [ComputeCpp-SDK](https://github.com/codeplaysoftware/computecpp-sdk) to retrieve the toolchain file. -The following CMake command can be used to cross-compile portBLAS: - -```bash -cmake -GNinja \ - ${SOURCE_ROOT} \ - -DCMAKE_PREFIX_PATH="${OPENBLAS_PATH}" \ - -DComputeCpp_DIR="${COMPUTECPP_DEVICE_PATH}" \ - -DComputeCpp_HOST_DIR="${COMPUTECPP_X86_PATH}" \ - -DCMAKE_TOOLCHAIN_FILE="/path/to/computecpp-sdk/cmake/toolchains/gcc-generic.cmake" \ - -DCMAKE_BUILD_TYPE='Release' \ - -DCMAKE_INSTALL_PREFIX=${CROSS_COMPILED_PORTBLAS_INSTALL} \ - -DOpenCL_INCLUDE_DIR="${OpenCL_Headers_PATH}" \ - -DOpenCL_LIBRARY="${OpenCL_LIBRARY}" \ - -DCOMPUTECPP_BITCODE="${DEVICE_BITCODE}" \ - -DCMAKE_CXX_FLAGS='-O3' \ - -DTUNING_TARGET="${CHOSEN_TARGET}" -``` - -### POWER_VR support *(ComputeCpp Only)* - -To enable the PowerVR target tuning, pass: `-DTUNING_TARGET=POWER_VR` - -To use the neural network library from Imagination, pass: `-DIMGDNN_DIR=path/to/library` - - ## Tests and benchmarks The tests and benchmarks have their own documentation: diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index bc93270b2..208e5119a 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -100,34 +100,7 @@ add_custom_command(OUTPUT git_config.h ${CMAKE_CURRENT_BINARY_DIR} ) -# Assume ComputeCpp not available by default. -set(ComputeCpp_INFO_AVAILABLE false) -set(ComputeCpp_VERSION_NUMBER "N/A") -set(ComputeCpp_EDITION "N/A") - -if(is_computecpp) - execute_process(COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} "--version" - OUTPUT_VARIABLE ComputeCpp_DEVICE_COMPILER_VERSION - RESULT_VARIABLE ComputeCpp_DEVICE_COMPILER_EXECUTABLE_RESULT - OUTPUT_STRIP_TRAILING_WHITESPACE) - if(NOT ComputeCpp_DEVICE_COMPILER_EXECUTABLE_RESULT EQUAL "0") - message(WARNING "Compute++ not found - Error obtaining device compiler and ComputeCpp version!") - else() - # Store information about ComputeCpp/compiler for benchmarking. - set(ComputeCpp_INFO_AVAILABLE true) - string(REGEX MATCH - "(CE|PE|RC)" ComputeCpp_EDITION ${ComputeCpp_DEVICE_COMPILER_VERSION}) - if(${ComputeCpp_EDITION} STREQUAL "RC") - set(ComputeCpp_EDITION "Internal") - endif() - string(REGEX MATCH "([0-9]+\.[0-9]+\.[0-9]+)" - ComputeCpp_VERSION_NUMBER ${ComputeCpp_DEVICE_COMPILER_VERSION}) - endif() -endif() - -configure_file(computecpp_version_config.h.in computecpp_version_config.h @ONLY) - -add_library(bench_info STATIC bench_info.cc git_config.h computecpp_version_config.h) +add_library(bench_info STATIC bench_info.cc git_config.h) target_include_directories(bench_info PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) if(VERBOSE) diff --git a/benchmark/bench_info.cc b/benchmark/bench_info.cc index 2bc6a4009..f70f829b9 100644 --- a/benchmark/bench_info.cc +++ b/benchmark/bench_info.cc @@ -22,5 +22,4 @@ * @filename bench_info.cc * **************************************************************************/ -#include "computecpp_version_config.h" #include "git_config.h" diff --git a/benchmark/computecpp_version_config.h.in b/benchmark/computecpp_version_config.h.in deleted file mode 100644 index d05439c5f..000000000 --- a/benchmark/computecpp_version_config.h.in +++ /dev/null @@ -1,34 +0,0 @@ -/************************************************************************** - * - * @license - * Copyright (C) Codeplay Software Limited - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * For your convenience, a copy of the License has been included in this - * repository. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * portBLAS: BLAS implementation using SYCL - * - * @filename computecpp_version_config.h.in - * - **************************************************************************/ - -#ifndef COMPUTECPP_VERSION_CONFIG_H_ -#define COMPUTECPP_VERSION_CONFIG_H_ - -extern bool const computecpp_available = @ComputeCpp_INFO_AVAILABLE@; - -extern char const* const computecpp_version = "@ComputeCpp_VERSION_NUMBER@"; -extern char const* const computecpp_edition = "@ComputeCpp_EDITION@"; - -#endif // COMPUTECPP_VERSION_CONFIG_H_ diff --git a/cmake/CmakeFunctionHelper.cmake b/cmake/CmakeFunctionHelper.cmake index 7da14fde9..4a02655a3 100644 --- a/cmake/CmakeFunctionHelper.cmake +++ b/cmake/CmakeFunctionHelper.cmake @@ -93,8 +93,6 @@ function(set_target_compile_def in_target) target_compile_definitions(${in_target} PUBLIC INTEL_GPU=1) elseif(${TUNING_TARGET} STREQUAL "AMD_GPU") target_compile_definitions(${in_target} PUBLIC AMD_GPU=1) - elseif(${TUNING_TARGET} STREQUAL "POWER_VR") - target_compile_definitions(${in_target} PUBLIC POWER_VR=1) elseif(${TUNING_TARGET} STREQUAL "NVIDIA_GPU") target_compile_definitions(${in_target} PUBLIC NVIDIA_GPU=1) else() @@ -501,31 +499,6 @@ if(${TUNING_TARGET} STREQUAL "INTEL_GPU") endif() endforeach() endif() # BLAS_ENABLE_COMPLEX -elseif(${TUNING_TARGET} STREQUAL "POWER_VR" AND NOT IMGDNN_DIR) - set(supported_types - "float" - "half" - ) - foreach(data ${supported_types}) - add_gemm_configuration( - "${data}" 96 "true" "false" "false" - 16 4 6 12 8 1 1 1 1 1 1 1 1 1 float float "local" "standard" "full" 1 "strided" "false") - add_gemm_configuration( - "${data}" 64 "false" "false" "false" - 128 1 1 8 8 1 1 1 1 1 1 1 1 1 float float "local" "standard" "full" 1 "strided" "false") - add_gemm_configuration( - "${data}" 64 "false" "false" "false" - 64 4 4 8 8 1 1 1 1 1 1 1 1 1 float float "no_local" "standard" "full" 1 "strided" "false") - add_gemm_configuration( - "${data}" 128 "false" "false" "false" - 16 4 8 16 8 1 1 1 1 1 1 1 1 1 float float "local" "standard" "full" 1 "strided" "false") - add_gemm_configuration( - "${data}" 64 "false" "false" "false" - 32 4 4 8 8 1 1 1 1 1 1 1 1 1 float float "local" "standard" "full" 1 "strided" "false") - add_gemm_configuration( - "${data}" 64 "false" "false" "false" - 64 4 4 4 4 1 1 1 1 4 4 1 1 1 float float "no_local" "standard" "full" 4 "interleaved" "false") - endforeach() elseif(${TUNING_TARGET} STREQUAL "AMD_GPU") # need investigation set(supported_types "float" @@ -749,7 +722,7 @@ else() # default cpu backend endif() add_library(${func} OBJECT ${gemm_sources}) set_target_compile_def(${func}) -# The blas library depends on FindComputeCpp + target_include_directories(${func} PRIVATE ${PORTBLAS_SRC} ${PORTBLAS_INCLUDE} ${PORTBLAS_COMMON_INCLUDE_DIR} ${THIRD_PARTIES_INCLUDE}) message(STATUS "Adding SYCL to target ${func}") diff --git a/cmake/Modules/FindComputeCpp.cmake b/cmake/Modules/FindComputeCpp.cmake deleted file mode 100644 index 74b61a979..000000000 --- a/cmake/Modules/FindComputeCpp.cmake +++ /dev/null @@ -1,555 +0,0 @@ -#.rst: -# FindComputeCpp -#--------------- -# -# Copyright Codeplay Software Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use these files except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -######################### -# FindComputeCpp.cmake -######################### -# -# Tools for finding and building with ComputeCpp. -# -# User must define ComputeCpp_DIR pointing to the ComputeCpp -# installation. -# -# Latest version of this file can be found at: -# https://github.com/codeplaysoftware/computecpp-sdk - -cmake_minimum_required(VERSION 3.10.2) -include(FindPackageHandleStandardArgs) - -# These should match the types of IR output by compute++ -set(IR_MAP_spir bc) -set(IR_MAP_spir64 bc) -set(IR_MAP_spir32 bc) -set(IR_MAP_spirv spv) -set(IR_MAP_spirv64 spv) -set(IR_MAP_spirv32 spv) -set(IR_MAP_aorta-x86_64 o) -set(IR_MAP_aorta-aarch64 o) -set(IR_MAP_aorta-rcar-cve o) -set(IR_MAP_custom-spir64 bc) -set(IR_MAP_custom-spir32 bc) -set(IR_MAP_custom-spirv64 spv) -set(IR_MAP_custom-spirv32 spv) -set(IR_MAP_ptx64 s) -set(IR_MAP_amdgcn s) - -# Retrieves the filename extension of the IR output of compute++ -function(get_sycl_target_extension output) - set(syclExtension ${IR_MAP_${COMPUTECPP_BITCODE}}) - if(NOT syclExtension) - # Needed when using multiple device targets - set(syclExtension "bc") - endif() - set(${output} ${syclExtension} PARENT_SCOPE) -endfunction() - -set(COMPUTECPP_USER_FLAGS "" CACHE STRING "User flags for compute++") -separate_arguments(COMPUTECPP_USER_FLAGS) -mark_as_advanced(COMPUTECPP_USER_FLAGS) - -set(COMPUTECPP_BITCODE "" CACHE STRING "") -mark_as_advanced(COMPUTECPP_BITCODE) - -if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.20) - # Policy enabling rewrites of paths in depfiles when using ninja - cmake_policy(SET CMP0116 NEW) -endif() - -set(SYCL_LANGUAGE_VERSION "2017" CACHE STRING "SYCL version to use. Defaults to 1.2.1.") - -find_package(OpenCL REQUIRED) - -# Find ComputeCpp package - -set(computecpp_find_hint - "${ComputeCpp_DIR}" - "$ENV{COMPUTECPP_DIR}" -) - -# Used for running executables on the host -set(computecpp_host_find_hint ${computecpp_find_hint}) - -if(CMAKE_CROSSCOMPILING) - # ComputeCpp_HOST_DIR is used to find executables that are run on the host - set(computecpp_host_find_hint - "${ComputeCpp_HOST_DIR}" - "$ENV{COMPUTECPP_HOST_DIR}" - ${computecpp_find_hint} - ) -endif() - -find_program(ComputeCpp_DEVICE_COMPILER_EXECUTABLE compute++ - HINTS ${computecpp_host_find_hint} - PATH_SUFFIXES bin - NO_SYSTEM_ENVIRONMENT_PATH) - -find_program(ComputeCpp_INFO_EXECUTABLE computecpp_info - HINTS ${computecpp_host_find_hint} - PATH_SUFFIXES bin - NO_SYSTEM_ENVIRONMENT_PATH) - -find_library(COMPUTECPP_RUNTIME_LIBRARY - NAMES ComputeCpp - HINTS ${computecpp_find_hint} - PATH_SUFFIXES lib - DOC "ComputeCpp Runtime Library") - -# Found the library, use only single hint from now on -get_filename_component(computecpp_library_path "${COMPUTECPP_RUNTIME_LIBRARY}" DIRECTORY) -get_filename_component(computecpp_find_hint "${computecpp_library_path}/.." ABSOLUTE) - -find_library(COMPUTECPP_RUNTIME_LIBRARY_DEBUG - NAMES ComputeCpp_d ComputeCpp - HINTS ${computecpp_find_hint} - PATH_SUFFIXES lib - DOC "ComputeCpp Debug Runtime Library") - -find_path(ComputeCpp_INCLUDE_DIRS - NAMES "CL/sycl.hpp" - HINTS ${computecpp_find_hint}/include - DOC "The ComputeCpp include directory") -get_filename_component(ComputeCpp_INCLUDE_DIRS ${ComputeCpp_INCLUDE_DIRS} ABSOLUTE) - -get_filename_component(computecpp_canonical_root_dir "${ComputeCpp_INCLUDE_DIRS}/.." ABSOLUTE) -set(ComputeCpp_ROOT_DIR "${computecpp_canonical_root_dir}" CACHE PATH - "The root of the ComputeCpp install") - -if(NOT ComputeCpp_INFO_EXECUTABLE) - message(WARNING "Can't find computecpp_info - check ComputeCpp_DIR") -else() - execute_process(COMMAND ${ComputeCpp_INFO_EXECUTABLE} "--dump-version" - OUTPUT_VARIABLE ComputeCpp_VERSION - RESULT_VARIABLE ComputeCpp_INFO_EXECUTABLE_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) - if(NOT ComputeCpp_INFO_EXECUTABLE_RESULT EQUAL "0") - message(WARNING "Package version - Error obtaining version!") - endif() - - execute_process(COMMAND ${ComputeCpp_INFO_EXECUTABLE} "--dump-is-supported" - OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED - RESULT_VARIABLE ComputeCpp_INFO_EXECUTABLE_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) - if(NOT ComputeCpp_INFO_EXECUTABLE_RESULT EQUAL "0") - message(WARNING "platform - Error checking platform support!") - else() - mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED) - if (COMPUTECPP_PLATFORM_IS_SUPPORTED) - message(STATUS "platform - your system can support ComputeCpp") - else() - message(STATUS "platform - your system is not officially supported") - endif() - endif() -endif() - -find_package_handle_standard_args(ComputeCpp - REQUIRED_VARS ComputeCpp_ROOT_DIR - ComputeCpp_DEVICE_COMPILER_EXECUTABLE - ComputeCpp_INFO_EXECUTABLE - COMPUTECPP_RUNTIME_LIBRARY - COMPUTECPP_RUNTIME_LIBRARY_DEBUG - ComputeCpp_INCLUDE_DIRS - VERSION_VAR ComputeCpp_VERSION) -mark_as_advanced(ComputeCpp_ROOT_DIR - ComputeCpp_DEVICE_COMPILER_EXECUTABLE - ComputeCpp_INFO_EXECUTABLE - COMPUTECPP_RUNTIME_LIBRARY - COMPUTECPP_RUNTIME_LIBRARY_DEBUG - ComputeCpp_INCLUDE_DIRS - ComputeCpp_VERSION) - -if(NOT ComputeCpp_FOUND) - return() -endif() - -list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -O2 -mllvm -inline-threshold=1000 -intelspirmetadata) -mark_as_advanced(COMPUTECPP_DEVICE_COMPILER_FLAGS) - -if(CMAKE_CROSSCOMPILING) - if(NOT COMPUTECPP_DONT_USE_TOOLCHAIN) - list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS --gcc-toolchain=${COMPUTECPP_TOOLCHAIN_DIR}) - endif() - list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS --sysroot=${COMPUTECPP_SYSROOT_DIR}) - list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -target ${COMPUTECPP_TARGET_TRIPLE}) -endif() - -list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -DSYCL_LANGUAGE_VERSION=${SYCL_LANGUAGE_VERSION}) - -foreach (bitcode IN ITEMS ${COMPUTECPP_BITCODE}) - if(NOT "${bitcode}" STREQUAL "") - list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -sycl-target ${bitcode}) - endif() -endforeach() - -message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}") - -if(CMAKE_COMPILER_IS_GNUCXX) - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) - message(FATAL_ERROR "host compiler - gcc version must be > 4.8") - endif() -elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6) - message(FATAL_ERROR "host compiler - clang version must be > 3.6") - endif() -endif() - -if(MSVC) - set(ComputeCpp_STL_CHECK_SRC __STL_check) - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp - "#include \n" - "int main() { return 0; }\n") - set(_stl_test_command ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} - -sycl - ${COMPUTECPP_DEVICE_COMPILER_FLAGS} - -isystem ${ComputeCpp_INCLUDE_DIRS} - -isystem ${OpenCL_INCLUDE_DIRS} - -o ${ComputeCpp_STL_CHECK_SRC}.sycl - -c ${ComputeCpp_STL_CHECK_SRC}.cpp) - execute_process( - COMMAND ${_stl_test_command} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT - ERROR_VARIABLE ComputeCpp_STL_CHECK_ERROR_OUTPUT - OUTPUT_QUIET) - if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0) - # Try disabling compiler version checks - execute_process( - COMMAND ${_stl_test_command} - -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT - ERROR_VARIABLE ComputeCpp_STL_CHECK_ERROR_OUTPUT - OUTPUT_QUIET) - if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0) - # Try again with __CUDACC__ and _HAS_CONDITIONAL_EXPLICIT=0. This relaxes the restritions in the MSVC headers - execute_process( - COMMAND ${_stl_test_command} - -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH - -D_HAS_CONDITIONAL_EXPLICIT=0 - -D__CUDACC__ - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT - ERROR_VARIABLE ComputeCpp_STL_CHECK_ERROR_OUTPUT - OUTPUT_QUIET) - if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0) - message(FATAL_ERROR "compute++ cannot consume hosted STL headers. This means that compute++ can't \ - compile a simple program in this platform and will fail when used in this system. \ - \n ${ComputeCpp_STL_CHECK_ERROR_OUTPUT}") - else() - list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH - -D_HAS_CONDITIONAL_EXPLICIT=0 - -D__CUDACC__) - endif() - else() - list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH) - endif() - endif() - file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp - ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp.sycl) -endif(MSVC) - -if(NOT TARGET OpenCL::OpenCL) - add_library(OpenCL::OpenCL UNKNOWN IMPORTED) - set_target_properties(OpenCL::OpenCL PROPERTIES - IMPORTED_LOCATION "${OpenCL_LIBRARIES}" - INTERFACE_INCLUDE_DIRECTORIES "${OpenCL_INCLUDE_DIRS}" - ) -endif() - -if(NOT TARGET ComputeCpp::ComputeCpp) - add_library(ComputeCpp::ComputeCpp UNKNOWN IMPORTED) - set_target_properties(ComputeCpp::ComputeCpp PROPERTIES - IMPORTED_LOCATION_DEBUG "${COMPUTECPP_RUNTIME_LIBRARY_DEBUG}" - IMPORTED_LOCATION_RELWITHDEBINFO "${COMPUTECPP_RUNTIME_LIBRARY}" - IMPORTED_LOCATION "${COMPUTECPP_RUNTIME_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${ComputeCpp_INCLUDE_DIRS}" - INTERFACE_LINK_LIBRARIES "OpenCL::OpenCL" - ) -endif() - -# This property allows targets to specify that their sources should be -# compiled with the integration header included after the user's -# sources, not before (e.g. when an enum is used in a kernel name, this -# is not technically valid SYCL code but can work with ComputeCpp) -define_property( - TARGET PROPERTY COMPUTECPP_INCLUDE_AFTER - BRIEF_DOCS "Include integration header after user source" - FULL_DOCS "Changes compiler arguments such that the source file is - actually the integration header, and the .cpp file is included on - the command line so that it is seen by the compiler first. Enables - non-standards-conformant SYCL code to compile with ComputeCpp." -) -define_property( - TARGET PROPERTY INTERFACE_COMPUTECPP_FLAGS - BRIEF_DOCS "Interface compile flags to provide compute++" - FULL_DOCS "Set additional compile flags to pass to compute++ when compiling - any target which links to this one." -) -define_property( - SOURCE PROPERTY COMPUTECPP_SOURCE_FLAGS - BRIEF_DOCS "Source file compile flags for compute++" - FULL_DOCS "Set additional compile flags for compiling the SYCL integration - header for the given source file." -) - -#################### -# __build_ir -#################### -# -# Adds a custom target for running compute++ and adding a dependency for the -# resulting integration header and kernel binary. -# -# TARGET : Name of the target. -# SOURCE : Source file to be compiled. -# COUNTER : Counter included in name of custom target. Different counter -# values prevent duplicated names of custom target when source files with -# the same name, but located in different directories, are used for the -# same target. -# -function(__build_ir) - set(options) - set(one_value_args - TARGET - SOURCE - COUNTER - ) - set(multi_value_args) - cmake_parse_arguments(ARG - "${options}" - "${one_value_args}" - "${multi_value_args}" - ${ARGN} - ) - get_filename_component(sourceFileName ${ARG_SOURCE} NAME) - - # Set the path to the integration header. - # The .sycl filename must depend on the target so that different targets - # using the same source file will be generated with a different rule. - set(baseSyclName ${CMAKE_CURRENT_BINARY_DIR}/${ARG_TARGET}_${sourceFileName}) - set(outputSyclFile ${baseSyclName}.sycl) - get_sycl_target_extension(targetExtension) - set(outputDeviceFile ${baseSyclName}.${targetExtension}) - set(depFileName ${baseSyclName}.sycl.d) - - set(include_directories "$") - set(compile_definitions "$") - set(generated_include_directories - $<$:-I$>) - set(generated_compile_definitions - $<$:-D$>) - - # Obtain language standard of the file - set(device_compiler_cxx_standard - "-std=c++$") - - get_property(source_compile_flags - SOURCE ${ARG_SOURCE} - PROPERTY COMPUTECPP_SOURCE_FLAGS - ) - separate_arguments(source_compile_flags) - if(source_compile_flags) - list(APPEND computecpp_source_flags ${source_compile_flags}) - endif() - - list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS - ${device_compiler_cxx_standard} - ${COMPUTECPP_USER_FLAGS} - ${computecpp_source_flags} - ) - - set(ir_dependencies ${ARG_SOURCE}) - get_target_property(target_libraries ${ARG_TARGET} LINK_LIBRARIES) - if(target_libraries) - foreach(library ${target_libraries}) - if(TARGET ${library}) - list(APPEND ir_dependencies ${library}) - endif() - endforeach() - endif() - - # Depfile support was only added in CMake 3.7 - # CMake throws an error if it is unsupported by the generator (i. e. not ninja) - if((NOT CMAKE_VERSION VERSION_LESS 3.7.0) AND - CMAKE_GENERATOR MATCHES "Ninja") - file(RELATIVE_PATH relOutputFile ${CMAKE_BINARY_DIR} ${outputDeviceFile}) - set(generate_depfile -MMD -MF ${depFileName} -MT ${relOutputFile}) - set(enable_depfile DEPFILE ${depFileName}) - endif() - - # Add custom command for running compute++ - add_custom_command( - OUTPUT ${outputDeviceFile} ${outputSyclFile} - COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} - ${COMPUTECPP_DEVICE_COMPILER_FLAGS} - "${generated_include_directories}" - "${generated_compile_definitions}" - -sycl-ih ${outputSyclFile} - -o ${outputDeviceFile} - -c ${ARG_SOURCE} - ${generate_depfile} - COMMAND_EXPAND_LISTS - DEPENDS ${ir_dependencies} - IMPLICIT_DEPENDS CXX ${ARG_SOURCE} - ${enable_depfile} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMENT "Building ComputeCpp integration header file ${outputSyclFile}") - - # Name: (user-defined name)_(source file)_(counter)_ih - set(headerTargetName - ${ARG_TARGET}_${sourceFileName}_${ARG_COUNTER}_ih) - - if(NOT MSVC) - # Add a custom target for the generated integration header - add_custom_target(${headerTargetName} DEPENDS ${outputDeviceFile} ${outputSyclFile}) - add_dependencies(${ARG_TARGET} ${headerTargetName}) - endif() - - # This property can be set on a per-target basis to indicate that the - # integration header should appear after the main source listing - get_target_property(includeAfter ${ARG_TARGET} COMPUTECPP_INCLUDE_AFTER) - - if(includeAfter) - # Change the source file to the integration header - e.g. - # g++ -c source_file_name.cpp.sycl - get_target_property(current_sources ${ARG_TARGET} SOURCES) - # Remove absolute path to source file - list(REMOVE_ITEM current_sources ${ARG_SOURCE}) - # Remove relative path to source file - string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" - rel_source_file ${ARG_SOURCE} - ) - list(REMOVE_ITEM current_sources ${rel_source_file}) - # Add SYCL header to source list - list(APPEND current_sources ${outputSyclFile}) - set_property(TARGET ${ARG_TARGET} - PROPERTY SOURCES ${current_sources}) - # CMake/gcc don't know what language a .sycl file is, so tell them - set_property(SOURCE ${outputSyclFile} PROPERTY LANGUAGE CXX) - set(includedFile ${ARG_SOURCE}) - set(cppFile ${outputSyclFile}) - else() - set_property(SOURCE ${outputSyclFile} PROPERTY HEADER_FILE_ONLY ON) - set(includedFile ${outputSyclFile}) - set(cppFile ${ARG_SOURCE}) - endif() - - # Force inclusion of the integration header for the host compiler - if(MSVC) - # Group SYCL files inside Visual Studio - source_group("SYCL" FILES ${outputSyclFile}) - - if(includeAfter) - # Allow the source file to be edited using Visual Studio. - # It will be added as a header file so it won't be compiled. - set_property(SOURCE ${ARG_SOURCE} PROPERTY HEADER_FILE_ONLY true) - endif() - - # Add both source and the sycl files to the VS solution. - target_sources(${ARG_TARGET} PUBLIC ${ARG_SOURCE} ${outputSyclFile}) - - set(forceIncludeFlags "/FI${includedFile} /TP") - else() - set(forceIncludeFlags "-include ${includedFile} -x c++") - endif() - - set_property( - SOURCE ${cppFile} - APPEND_STRING PROPERTY COMPILE_FLAGS "${forceIncludeFlags}" - ) - -endfunction(__build_ir) - -####################### -# add_sycl_to_target -####################### -# -# Adds a SYCL compilation custom command associated with an existing -# target and sets a dependancy on that new command. -# -# TARGET : Name of the target to add SYCL to. -# SOURCES : Source files to be compiled for SYCL. -# -function(add_sycl_to_target) - set(options) - set(one_value_args - TARGET - ) - set(multi_value_args - SOURCES - ) - cmake_parse_arguments(ARG - "${options}" - "${one_value_args}" - "${multi_value_args}" - ${ARGN} - ) - if ("${ARG_SOURCES}" STREQUAL "") - message(WARNING "No source files provided to add_sycl_to_target. " - "SYCL integration headers may not be generated.") - endif() - set_target_properties(${ARG_TARGET} PROPERTIES LINKER_LANGUAGE CXX) - - # If the CXX compiler is set to compute++ enable the driver. - get_filename_component(cmakeCxxCompilerFileName "${CMAKE_CXX_COMPILER}" NAME) - if("${cmakeCxxCompilerFileName}" STREQUAL "compute++") - if(MSVC) - message(FATAL_ERROR "The compiler driver is not supported by this system, - revert the CXX compiler to your default host compiler.") - endif() - - get_target_property(includeAfter ${ARG_TARGET} COMPUTECPP_INCLUDE_AFTER) - if(includeAfter) - list(APPEND COMPUTECPP_USER_FLAGS -fsycl-ih-last) - endif() - list(INSERT COMPUTECPP_DEVICE_COMPILER_FLAGS 0 -sycl-driver) - # Prepend COMPUTECPP_DEVICE_COMPILER_FLAGS and append COMPUTECPP_USER_FLAGS - foreach(prop COMPILE_OPTIONS INTERFACE_COMPILE_OPTIONS) - get_target_property(target_compile_options ${ARG_TARGET} ${prop}) - if(NOT target_compile_options) - set(target_compile_options "") - endif() - set_property( - TARGET ${ARG_TARGET} - PROPERTY ${prop} - ${COMPUTECPP_DEVICE_COMPILER_FLAGS} - ${target_compile_options} - ${COMPUTECPP_USER_FLAGS} - ) - endforeach() - else() - set(fileCounter 0) - list(INSERT COMPUTECPP_DEVICE_COMPILER_FLAGS 0 -sycl) - # Add custom target to run compute++ and generate the integration header - foreach(sourceFile ${ARG_SOURCES}) - if(NOT IS_ABSOLUTE ${sourceFile}) - set(sourceFile "${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}") - endif() - __build_ir( - TARGET ${ARG_TARGET} - SOURCE ${sourceFile} - COUNTER ${fileCounter} - ) - MATH(EXPR fileCounter "${fileCounter} + 1") - endforeach() - endif() - - set_property(TARGET ${ARG_TARGET} - APPEND PROPERTY LINK_LIBRARIES ComputeCpp::ComputeCpp) - set_property(TARGET ${ARG_TARGET} - APPEND PROPERTY INTERFACE_LINK_LIBRARIES ComputeCpp::ComputeCpp) - target_compile_definitions(${ARG_TARGET} PUBLIC - SYCL_LANGUAGE_VERSION=${SYCL_LANGUAGE_VERSION}) -endfunction(add_sycl_to_target) diff --git a/cmake/Modules/FindIMGDNN.cmake b/cmake/Modules/FindIMGDNN.cmake deleted file mode 100644 index 9311a477e..000000000 --- a/cmake/Modules/FindIMGDNN.cmake +++ /dev/null @@ -1,41 +0,0 @@ -# FindIMGDNN.cmake -# Expects either to find IMGDNN in system directories, or at the location -# specified in IMGDNN_DIR. Outputs variables IMGDNN_INCLUDE_DIRS, -# IMGDNN_LIBRARIES and IMGDNN_ROOT_DIR. It also creates the target -# IMGDNN::IMGDNN, which can be linked against in the usual way. -cmake_minimum_required(VERSION 3.2.2) -include(FindPackageHandleStandardArgs) - -find_library(IMGDNN_LIBRARY - NAMES IMGDNN - HINTS ${IMGDNN_DIR} - PATH_SUFFIXES lib - DOC "The Imagination DNN library") - -find_path(IMGDNN_INCLUDE_DIR - NAMES "imgdnn/imgdnn.h" - HINTS ${IMGDNN_DIR} - DOC "The directory with imgdnn.h and cl.h") - -get_filename_component(imgdnn_canonical_dir "${IMGDNN_INCLUDE_DIR}/.." ABSOLUTE) -set(IMGDNN_ROOT_DIR "${imgdnn_canonical_dir}" CACHE PATH - "The IMGDNN library root") -mark_as_advanced(IMGDNN_ROOT_DIR - IMGDNN_INCLUDE_DIR - IMGDNN_LIBRARY) - -set(IMGDNN_INCLUDE_DIRS "${IMGDNN_INCLUDE_DIR}") -set(IMGDNN_LIBRARIES "${IMGDNN_LIBRARY}") -find_package_handle_standard_args(IMGDNN - REQUIRED_VARS IMGDNN_ROOT_DIR - IMGDNN_INCLUDE_DIRS - IMGDNN_LIBRARIES) - -if(IMGDNN_FOUND AND NOT TARGET IMGDNN::IMGDNN) - add_library(IMGDNN::IMGDNN UNKNOWN IMPORTED) - set_target_properties(IMGDNN::IMGDNN PROPERTIES - IMPORTED_LOCATION "${IMGDNN_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${IMGDNN_INCLUDE_DIR}" - INTERFACE_LINK_LIBRARIES "-Wl,--allow-shlib-undefined" -) -endif() diff --git a/cmake/Modules/SYCL.cmake b/cmake/Modules/SYCL.cmake index 54246a2c5..fb1fc0d1f 100644 --- a/cmake/Modules/SYCL.cmake +++ b/cmake/Modules/SYCL.cmake @@ -60,8 +60,6 @@ else() message(WARNING "Selected AdaptiveCpp as backend, but the compiler is not fully supported") endif() - elseif(SYCL_COMPILER MATCHES "computecpp") - set(is_computecpp ON) else() message(WARNING "SYCL_COMPILER <${SYCL_COMPILER}> is unknown.") endif() @@ -69,22 +67,7 @@ endif() message(STATUS "Using SYCL implementation: ${SYCL_COMPILER}") -if(is_computecpp) - find_package(ComputeCpp REQUIRED) - # Add some performance flags to the calls to compute++. - # NB: This must be after finding ComputeCpp - list(APPEND COMPUTECPP_USER_FLAGS - -O3 - -fsycl-split-modules=20 - -mllvm -inline-threshold=10000 - -Xclang -cl-mad-enable - # We add some flags to workaround OpenCL platform bugs, see ComputeCpp documentation - -no-serial-memop - ) - set(SYCL_INCLUDE_DIRS ${ComputeCpp_INCLUDE_DIRS}) - - -elseif(is_dpcpp) +if(is_dpcpp) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__SYCL_DISABLE_NAMESPACE_INLINE__=ON -O3 -Xclang -cl-mad-enable") if(NOT DEFINED DPCPP_SYCL_TARGET) diff --git a/common/include/common/set_benchmark_label.hpp b/common/include/common/set_benchmark_label.hpp index c684b3bc5..b707276bb 100644 --- a/common/include/common/set_benchmark_label.hpp +++ b/common/include/common/set_benchmark_label.hpp @@ -40,10 +40,6 @@ #include #endif -extern bool const computecpp_available; -extern char const* const computecpp_version; -extern char const* const computecpp_edition; - extern const char* commit_date; extern const char* commit_hash; @@ -60,7 +56,7 @@ namespace device_info { inline void add_device_info(cl::sycl::device const& device, std::map& key_value_map) { // OpenCL is unclear whether strings returned from clGet*Info() should be - // null terminated, and ComputeCpp currently copies embedded nulls. + // null terminated. // On some OpenCL implementations this results in strings that behave // unexpectedly when appended to. This lambda trims those strings. auto trim = [](std::string s) -> std::string { @@ -127,29 +123,6 @@ inline void add_device_info(std::map& key_value_map) { } // namespace device_info -namespace computecpp_info { - -/** - * Add ComputeCpp meta-data (if available) to the benchmark label. The - * version of compute++ is tied to the version of ComputeCpp, so the associated - * meta-data of compute++ will be the same. - * - * portBLAS benchmarks will include these attributes only if ComputeCpp info is - * available. Benchmarks from other libraries such as cublas etc. will not - * include them. - * - * \param [out] key_value_map The benchmark key value pair to hold the info. - */ -inline void add_computecpp_version( - std::map& key_value_map) { - if (computecpp_available) { - key_value_map["@computecpp-version"] = computecpp_version; - key_value_map["@computecpp-edition"] = computecpp_edition; - } -} - -} // namespace computecpp_info - namespace datatype_info { /** * Add the datatype used to the benchmark label. @@ -215,7 +188,6 @@ namespace internal { template inline void add_common_labels( std::map& key_value_map) { - computecpp_info::add_computecpp_version(key_value_map); datatype_info::add_datatype_info(key_value_map); key_value_map["@library"] = "portBLAS"; diff --git a/conanfile.py b/conanfile.py deleted file mode 100644 index f03457a2c..000000000 --- a/conanfile.py +++ /dev/null @@ -1,144 +0,0 @@ -from conans import ConanFile, tools, CMake, RunEnvironment -from conans.errors import ConanInvalidConfiguration, ConanException -import os - - -class PortBlasConan(ConanFile): - name = "portBLAS" - version = "1.0" - settings = "os", "compiler", "build_type", "arch" - description = "An implementation of BLAS using the SYCL open standard for acceleration on OpenCL devices" - url = "https://github.com/codeplaysoftware/portBLAS" - license = "Apache-2.0" - author = "Codeplay Software Ltd." - topics = ('sycl', 'blas') - - options = { - "shared": [True, False], - "fPIC": [True, False], - "acl_backend": ["neon", "opencl"], - "build_acl_benchmarks": [True, False], - "build_benchmarks": [True, False], - "build_clblast_benchmarks": [True, False], - "build_expression_tests": [True, False], - "build_testing": [True, False], - "sycl_target": "ANY", - } - default_options = { - "shared": False, - "fPIC": True, - "acl_backend": "opencl", - "build_acl_benchmarks": False, - "build_benchmarks": False, - "build_clblast_benchmarks": False, - "build_expression_tests": False, - "build_testing": False, - "khronos-opencl-icd-loader:shared": True, - "clblast:shared": True, - "sycl_target": "spirv64" - } - - scm = { - "type": "git", - "url": "auto", - "revision": "auto", - "submodule": "recursive", - } - - generators = "cmake" - - def dep(self, package, fallback_user="_", fallback_channel="_"): - """ - Helper function to switch between internal package forks and community packages - """ - try: - if self.user and self.channel: - return "%s@%s/%s" % (package, self.user, self.channel) - except ConanException: - pass - return "%s@%s/%s" % (package, fallback_user, fallback_channel) - - def config_options(self): - if self.settings.os == "Windows": - del self.options.fPIC - - def configure(self): - if not self.options.build_benchmarks: - if self.options.build_acl_benchmarks: - raise ConanInvalidConfiguration("build_acl_benchmarks requires build_benchmarks") - if self.options.build_clblast_benchmarks: - raise ConanInvalidConfiguration("build_clblast_benchmarks requires build_benchmarks") - if not self.options.build_testing: - if self.options.build_expression_tests: - raise ConanInvalidConfiguration("build_expression_tests requires build_testing") - - def build_requirements(self): - def build_dep(package, fallback_user="_", fallback_channel="_"): - return self.build_requires(self.dep(package, fallback_user, fallback_channel)) - if self.options.build_benchmarks: - build_dep("benchmark/1.5.0") - if self.options.build_acl_benchmarks: - build_dep("computelibrary/19.08", "mmha", "stable") - if self.options.build_clblast_benchmarks: - build_dep("clblast/1.5.0", "mmha", "stable") - if self.options.build_testing: - build_dep("gtest/1.10.0") - if self.options.build_testing or self.options.build_benchmarks: - build_dep("clara/1.1.5", "bincrafters", "stable") - build_dep("openblas/0.3.7", "mmha", "stable") - - def requirements(self): - self.requires(self.dep("khronos-opencl-icd-loader/20191007", "bincrafters", "stable"), override=True) - self.requires(self.dep("khronos-opencl-headers/20190806", "bincrafters", "stable"), override=True) - - def imports(self): - tools.get( - "https://computecpp.codeplay.com/downloads/computecpp-ce/1.1.6/ubuntu-16.04-64bit.tar.gz" - ) - - _cmake = None - - @property - def cmake(self): - if self._cmake is None: - self._cmake = CMake(self) - ccp_path = os.path.join(self.build_folder, - "ComputeCpp-CE-1.1.6-Ubuntu-16.04-x86_64") - clblast_benchmarks = self.options.build_clblast_benchmarks - - config = { - "ACL_BACKEND": str(self.options.acl_backend).upper(), - "BLAS_ENABLE_BENCHMARK": self.options.build_benchmarks, - "BLAS_ENABLE_TESTING": self.options.build_testing, - "BLAS_VERIFY_BENCHMARK": self.options.build_benchmarks, - "BUILD_ACL_BENCHMARKS": self.options.build_acl_benchmarks, - "BUILD_CLBLAST_BENCHMARKS": clblast_benchmarks, - "COMPUTECPP_BITCODE": self.options.sycl_target, - "ComputeCpp_DIR": ccp_path, - "ENABLE_EXPRESSION_TESTS": self.options.build_expression_tests, - } - - self._cmake.definitions.update(config) - with tools.environment_append(RunEnvironment(self).vars): - self._cmake.configure() - return self._cmake - - def build(self): - with tools.environment_append(RunEnvironment(self).vars): - self.cmake.build() - if self.options.build_testing: - self.cmake.test() - - def package(self): - self.cmake.install() - - def package_info(self): - self.cpp_info.libs = tools.collect_libs(self) - - def package_id(self): - del self.info.options.build_testing - del self.info.options.build_expression_tests - del self.info.options.build_benchmarks - del self.info.options.build_acl_benchmarks - del self.info.options.build_clblast_benchmarks - del self.info.options.acl_backend diff --git a/doc/Gemm.md b/doc/Gemm.md index 07b50ae68..f57e132a8 100644 --- a/doc/Gemm.md +++ b/doc/Gemm.md @@ -169,8 +169,6 @@ This cmake variable causes a corresponding define for the selected platform to b #include "interface/blas3/backend/intel_gpu.hpp" #elif defined AMD_GPU #include "interface/blas3/backend/amd_gpu.hpp" -#elif defined POWER_VR -#include "interface/blas3/backend/power_vr.hpp" #else #include "interface/blas3/backend/default.hpp" #endif diff --git a/include/operations/blas_constants.h b/include/operations/blas_constants.h index 82a8e0beb..233fdebdd 100644 --- a/include/operations/blas_constants.h +++ b/include/operations/blas_constants.h @@ -69,26 +69,12 @@ struct IndexValueTuple { index_t ind; value_t val; - // This operator is required due to a ComputeCPP bug - // (If the RHS of this operator is static const, then llvm.memcpy is broken) - constexpr IndexValueTuple(const IndexValueTuple &other) - : val(other.val), ind(other.ind) {} - constexpr explicit IndexValueTuple(index_t _ind, value_t _val) - : ind(_ind), val(_val){}; + : ind(_ind), val(_val) {}; PORTBLAS_INLINE index_t get_index() const { return ind; } PORTBLAS_INLINE typename GetTupleValue::return_t get_value() const { return GetTupleValue::get(val); } - // This operator is required due to a ComputeCPP bug - // (If the RHS of this operator is static const, then llvm.memcpy is broken) - IndexValueTuple &operator=( - const IndexValueTuple &other) { - val = other.val; - ind = other.ind; - - return *this; - } }; /*! @@ -263,18 +249,4 @@ struct constant_pair { } // namespace blas -#ifndef __ADAPTIVECPP__ -template -struct cl::sycl::is_device_copyable> - : std::true_type {}; - -template -struct cl::sycl::is_device_copyable> - : std::true_type {}; - -template -struct std::is_trivially_copyable> - : std::true_type {}; -#endif - #endif // BLAS_CONSTANTS_H diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index a1392057f..9ddc881b6 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -59,9 +59,6 @@ foreach(src_file ${SAMPLES_LIST}) SOURCES ${src_file} ) target_link_libraries(${sample_exec} PRIVATE PORTBLAS::PORTBLAS) - if(IMGDNN_DIR) - target_link_libraries(${sample_exec} PRIVATE IMGDNN::IMGDNN) - endif() install(TARGETS ${sample_exec} RUNTIME DESTINATION bin) endforeach() include_directories(${PORTBLAS_INCLUDE} ${SYCL_INCLUDE_DIRS} ${THIRD_PARTIES_INCLUDE}) diff --git a/samples/README.md b/samples/README.md index 412c8ee2b..4ff465fed 100644 --- a/samples/README.md +++ b/samples/README.md @@ -2,7 +2,7 @@ portBLAS samples === ## How to compile the samples -A SYCL Compiler (DPCPP, hipSYCL or ComputeCpp) along with the target device's +A SYCL Compiler (DPCPP or AdaptiveCpp) along with the target device's relevant compute drivers *(OpenCL, CUDA etc..)* are required to compile and run the samples. Any project that integrates portBLAS can either use it as : @@ -13,7 +13,7 @@ Any project that integrates portBLAS can either use it as : This folder contains a basic CMake configuration file and a module to find portBLAS *(which will be used as a header-only framework)*. It also uses a module -to find the SYCL Compiler(DPCPP, hipSYCL or computeCpp *-deprecated-*) that is +to find the SYCL Compiler(DPCPP or AdaptiveCpp) that is located in the folder `cmake/Modules`. Sample usage with DPCPP Compiler: diff --git a/src/interface/blas2/backend/backend.hpp b/src/interface/blas2/backend/backend.hpp index 40cd96f6e..447a7281a 100644 --- a/src/interface/blas2/backend/backend.hpp +++ b/src/interface/blas2/backend/backend.hpp @@ -26,8 +26,6 @@ #include "interface/blas2/backend/intel_gpu.hpp" #elif AMD_GPU #include "interface/blas2/backend/amd_gpu.hpp" -#elif POWER_VR -#include "interface/blas2/backend/power_vr.hpp" #elif NVIDIA_GPU #include "interface/blas2/backend/nvidia_gpu.hpp" #else diff --git a/src/interface/blas2/backend/power_vr.hpp b/src/interface/blas2/backend/power_vr.hpp deleted file mode 100644 index 6b963987e..000000000 --- a/src/interface/blas2/backend/power_vr.hpp +++ /dev/null @@ -1,176 +0,0 @@ -/*************************************************************************** - * - * @license - * Copyright (C) Codeplay Software Limited - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * For your convenience, a copy of the License has been included in this - * repository. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * portBLAS: BLAS implementation using SYCL - * - * @filename power_vr.hpp - * - **************************************************************************/ -#ifndef PORTBLAS_GEMV_POWER_VR_BACKEND_HPP -#define PORTBLAS_GEMV_POWER_VR_BACKEND_HPP -#include "interface/blas2_interface.h" - -namespace blas { -namespace gemv { -namespace backend { -template -typename SB_Handle::event_t _gemv( - SB_Handle& sb_handle, index_t _M, index_t _N, element_t _alpha, - container_t0 _mA, index_t _lda, container_t1 _vx, increment_t _incx, - element_t _beta, container_t2 _vy, increment_t _incy, - const typename SB_Handle::event_t& _dependencies) { - if (trn == transpose_type::Normal) { - return blas::internal::_gemv_impl<256, 32, gemv_memory_t::local, trn>( - sb_handle, _M, _N, _alpha, _mA, _lda, _vx, _incx, _beta, _vy, _incy, - _dependencies); - } else { - return blas::internal::_gemv_impl<64, 32, gemv_memory_t::local, trn>( - sb_handle, _M, _N, _alpha, _mA, _lda, _vx, _incx, _beta, _vy, _incy, - _dependencies); - } -} -} // namespace backend -} // namespace gemv - -namespace gbmv { -namespace backend { -template -typename SB_Handle::event_t inline _gbmv( - SB_Handle& sb_handle, index_t _M, index_t _N, index_t _KL, index_t _KU, - element_t _alpha, container_t0 _mA, index_t _lda, container_t1 _vx, - increment_t _incx, element_t _beta, container_t2 _vy, increment_t _incy, - const typename SB_Handle::event_t& _dependencies) { - return blas::internal::_gbmv_impl<256, trn>(sb_handle, _M, _N, _KL, _KU, - _alpha, _mA, _lda, _vx, _incx, - _beta, _vy, _incy, _dependencies); -} -} // namespace backend -} // namespace gbmv - -namespace sbmv { -namespace backend { -template -typename SB_Handle::event_t inline _sbmv( - SB_Handle& sb_handle, index_t _N, index_t _K, element_t _alpha, - container_t0 _mA, index_t _lda, container_t1 _vx, increment_t _incx, - element_t _beta, container_t2 _vy, increment_t _incy, - const typename SB_Handle::event_t& _dependencies) { - return blas::internal::_sbmv_impl<256, uplo>(sb_handle, _N, _K, _alpha, _mA, - _lda, _vx, _incx, _beta, _vy, - _incy, _dependencies); -} -} // namespace backend -} // namespace sbmv - -namespace spmv { -namespace backend { -template -typename SB_Handle::event_t inline _spmv( - SB_Handle& sb_handle, index_t _N, element_t _alpha, container_t0 _mA, - container_t1 _vx, increment_t _incx, element_t _beta, container_t2 _vy, - increment_t _incy, const typename SB_Handle::event_t& _dependencies) { - return blas::internal::_spmv_impl<32, 4, uplo>( - sb_handle, _N, _alpha, _mA, _vx, _incx, _beta, _vy, _incy, _dependencies); -} -} // namespace backend -} // namespace spmv - -namespace tbmv { -namespace backend { -template -typename sb_handle_t::event_t _tbmv( - sb_handle_t& sb_handle, index_t _N, index_t _K, container_t0 _mA, - index_t _lda, container_t1 _vx, increment_t _incx, - typename sb_handle_t::event_t _dependencies) { - return blas::internal::_tbmv_impl<256, uplo, trn, diag>( - sb_handle, _N, _K, _mA, _lda, _vx, _incx, _dependencies); -} -} // namespace backend -} // namespace tbmv - -namespace tpmv { -namespace backend { -template -typename sb_handle_t::event_t _tpmv( - sb_handle_t& sb_handle, index_t _N, container_t0 _mA, container_t1 _vx, - increment_t _incx, typename sb_handle_t::event_t _dependencies) { - return blas::internal::_tpmv_impl<32, 4, uplo, trn, diag>( - sb_handle, _N, _mA, _vx, _incx, _dependencies); -} -} // namespace backend -} // namespace tpmv - -namespace trsv { -namespace backend { -template -typename sb_handle_t::event_t _trsv( - sb_handle_t& sb_handle, index_t _N, container_t0 _mA, index_t _lda, - container_t1 _vx, increment_t _incx, - typename sb_handle_t::event_t _dependencies) { - return blas::internal::_trsv_impl<64, 4, uplo, trn, diag>( - sb_handle, _N, _mA, _lda, _vx, _incx, _dependencies); -} -} // namespace backend -} // namespace trsv - -namespace tbsv { -namespace backend { -template -typename sb_handle_t::event_t _tbsv(sb_handle_t& sb_handle, index_t _N, - index_t _K, container_t0 _mA, index_t _lda, - container_t1 _vx, increment_t _incx, - const typename sb_handle_t::event_t& _dependencies) { - return blas::internal::_tbsv_impl<4, 2, uplo, trn, diag>( - sb_handle, _N, _K, _mA, _lda, _vx, _incx, _dependencies); -} -} // namespace backend -} // namespace tbsv - -namespace tpsv { -namespace backend { -template -typename sb_handle_t::event_t _tpsv(sb_handle_t& sb_handle, index_t _N, - container_t0 _mA, container_t1 _vx, - increment_t _incx, - const typename sb_handle_t::event_t& _dependencies) { - return blas::internal::_tpsv_impl<4, 2, uplo, trn, diag>(sb_handle, _N, _mA, - _vx, _incx, _dependencies); -} -} // namespace backend -} // namespace tpsv -} // namespace blas -#endif diff --git a/src/interface/blas3/backend/backend.hpp b/src/interface/blas3/backend/backend.hpp index 44ba8e55e..494c334af 100644 --- a/src/interface/blas3/backend/backend.hpp +++ b/src/interface/blas3/backend/backend.hpp @@ -26,8 +26,6 @@ #include "interface/blas3/backend/intel_gpu.hpp" #elif defined AMD_GPU #include "interface/blas3/backend/amd_gpu.hpp" -#elif defined POWER_VR -#include "interface/blas3/backend/power_vr.hpp" #elif defined NVIDIA_GPU #include "interface/blas3/backend/nvidia_gpu.hpp" #else diff --git a/src/interface/blas3/backend/power_vr.hpp b/src/interface/blas3/backend/power_vr.hpp deleted file mode 100644 index 7b7b2473f..000000000 --- a/src/interface/blas3/backend/power_vr.hpp +++ /dev/null @@ -1,417 +0,0 @@ -/*************************************************************************** - * - * @license - * Copyright (C) Codeplay Software Limited - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * For your convenience, a copy of the License has been included in this - * repository. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * portBLAS: BLAS implementation using SYCL - * - * @filename power_vr.hpp - * - **************************************************************************/ -#ifndef PORTBLAS_GEMM_POWERVR_BACKEND_HPP -#define PORTBLAS_GEMM_POWERVR_BACKEND_HPP -#include "interface/gemm_launcher.h" - -#ifdef IMGDNN_LIBRARY -#include -#include -#include -#include -#endif - -namespace blas { -namespace gemm { -namespace backend { - -#ifdef IMGDNN_LIBRARY -namespace sycl_imagination_nn_api { -/*! - * @brief Select the correct transpose version of GemmFactory, depending on the - * runtime values of transpose. - */ -template -struct Gemm_Launcher { - template - static inline typename sb_handle_t::event_t _select_gemm( - sb_handle_t& sb_handle, index_t _M, index_t _N, index_t _K, - value_t _alpha, container_0_t _A, container_1_t _B, value_t _beta, - container_2_t _C, index_t batch_size, - const typename sb_handle_t::event_t& _dependencies) { - auto m = static_cast(_M); - auto n = static_cast(_N); - auto k = static_cast(_K); - auto n_batches = static_cast(batch_size); - cl::sycl::event sycl_event; - // we swap the matrix as they are row major while the netlib blas require - // column major so C := alpha x (A * B) + (beta x C) will be equal to C := - // alphax (B * A) + (beta x C) - auto a_buffer = _A.get_buffer(); - auto b_buffer = _B.get_buffer(); - auto c_buffer = _C.get_buffer(); - auto interop_event = sb_handle.get_queue().submit([&](cl::sycl::codeplay:: - handler& cgh) { - auto a_acc = - a_buffer.template get_access(cgh); - auto b_acc = - b_buffer.template get_access(cgh); - auto c_acc = - c_buffer.template get_access(cgh); - cgh.interop_task([&, a_acc, b_acc, c_acc]( - const cl::sycl::codeplay::interop_handle& handle) { - auto m_cl_device = handle.get_device(); - auto m_cl_context = handle.get_context(); - imgdnn_device device_; - imgdnn_err_code imgdnn_err = IMGDNN_SUCCESS; - auto context_ = imgdnnCLCreateContext(m_cl_context, 1u, &m_cl_device, - 0u, &device_, &imgdnn_err); - - auto _A_cl_mem_object = imgdnnImportMemory( - context_, handle.get(a_acc), - m * k * sizeof(typename blas::ValueType::type), - IMGDNN_IMPORT_MEM_TYPE_OPENCL, nullptr); - - auto _B_cl_mem_object = imgdnnImportMemory( - context_, handle.get(b_acc), - n * k * sizeof(typename blas::ValueType::type), - IMGDNN_IMPORT_MEM_TYPE_OPENCL, nullptr); - - auto _C_cl_mem_object = imgdnnImportMemory( - context_, handle.get(c_acc), - m * n * sizeof(typename blas::ValueType::type), - IMGDNN_IMPORT_MEM_TYPE_OPENCL, nullptr); - - imgdnn_tensor_descriptor lhs_descriptor, rhs_descriptor, c_descriptor; - - if (TransB) { - lhs_descriptor = { - .dimensions = 3, - .type = IMGDNN_TYPE_F32, - .size = {n_batches, k, n}, - }; - } else { - lhs_descriptor = { - .dimensions = 3, - .type = IMGDNN_TYPE_F32, - .size = {n_batches, n, k}, - }; - } - if (TransA) { - rhs_descriptor = { - .dimensions = 3, - .type = IMGDNN_TYPE_F32, - .size = {n_batches, m, k}, - }; - } else { - rhs_descriptor = { - .dimensions = 3, - .type = IMGDNN_TYPE_F32, - .size = {n_batches, k, m}, - }; - } - - c_descriptor = { - .dimensions = 3, - .type = IMGDNN_TYPE_F32, - .size = {n_batches, n, m}, - }; - - imgdnn_tensor_descriptor scaling_tensor_descriptor = { - .dimensions = 3, .type = IMGDNN_TYPE_F32, .size = {1, 1, 1}}; - - auto network = imgdnnCreateNetwork(nullptr); - auto binding = imgdnnCreateBinding(nullptr); - auto lhs_input = - imgdnnNetworkInput(network, &lhs_descriptor, &imgdnn_err); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE third input: " << imgdnn_err << std::endl; -#endif - auto rhs_input = - imgdnnNetworkInput(network, &rhs_descriptor, &imgdnn_err); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE third input: " << imgdnn_err << std::endl; -#endif - - auto c_input = imgdnnNetworkInput(network, &c_descriptor, &imgdnn_err); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE third input: " << imgdnn_err << std::endl; -#endif - imgdnn_tensor net_inputs[3] = {lhs_input, rhs_input, c_input}; - - const int new_order[3] = {0, 2, 1}; - - imgdnn_tensor matmul_inputs[2]; - matmul_inputs[0] = (TransB) - ? imgdnnNetworkTransposeOp( - network, lhs_input, new_order, &imgdnn_err) - : lhs_input; -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE transpose B: " << imgdnn_err << std::endl; -#endif - matmul_inputs[1] = (TransA) - ? imgdnnNetworkTransposeOp( - network, rhs_input, new_order, &imgdnn_err) - : rhs_input; -#ifdef BLAS_VERBOSE - - std::cout << "ERROR CODE transpose A: " << imgdnn_err << std::endl; -#endif - auto A_Times_B_result = - imgdnnNetworkBinaryOp(network, matmul_inputs[0], matmul_inputs[1], - IMGDNN_OPERATION_MATMUL, &imgdnn_err); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE a times b: " << imgdnn_err << std::endl; -#endif - auto alpha_scaling = imgdnnNetworkFixedInput( - network, &scaling_tensor_descriptor, &_alpha, &imgdnn_err); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE fix_input: " << imgdnn_err << std::endl; -#endif - auto matmul_result = - imgdnnNetworkBinaryOp(network, alpha_scaling, A_Times_B_result, - IMGDNN_OPERATION_MUL, &imgdnn_err); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE create alpha: " << imgdnn_err << std::endl; -#endif - - imgdnn_tensor output_tensor = matmul_result; - - if (_beta != 0.f) { - auto beta_scaling = imgdnnNetworkFixedInput( - network, &scaling_tensor_descriptor, &_beta, &imgdnn_err); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE create bets: " << imgdnn_err << std::endl; -#endif - imgdnn_tensor beta_scaled = - imgdnnNetworkBinaryOp(network, beta_scaling, c_input, - IMGDNN_OPERATION_MUL, &imgdnn_err); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE res times beta: " << imgdnn_err << std::endl; -#endif - output_tensor = - imgdnnNetworkBinaryOp(network, beta_scaled, matmul_result, - IMGDNN_OPERATION_ADD, &imgdnn_err); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE a times b plus c: " << imgdnn_err - << std::endl; -#endif - } - auto network_object = imgdnnCreateNetworkObject( - device_, context_, network, 3u, net_inputs, 1u, &output_tensor, 0u, - nullptr, &imgdnn_err); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE create network object: " << imgdnn_err - << std::endl; -#endif - imgdnn_input inputs[3]; - imgdnn_err = - imgdnnNetworkObjectGetInputs(network_object, 3u, inputs, nullptr); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE get input objects: " << imgdnn_err - << std::endl; -#endif - imgdnn_output output; - imgdnn_err = - imgdnnNetworkObjectGetOutputs(network_object, 1u, &output, nullptr); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE get output objects: " << imgdnn_err - << std::endl; -#endif - // swap the inputs - imgdnn_err = - imgdnnBindingAddInput(binding, inputs[0], _B_cl_mem_object); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE bind input object 0: " << imgdnn_err - << std::endl; -#endif - // swap the inputs - imgdnn_err = - imgdnnBindingAddInput(binding, inputs[1], _A_cl_mem_object); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE bind input object 1: " << imgdnn_err - << std::endl; -#endif - imgdnn_err = - imgdnnBindingAddInput(binding, inputs[2], _C_cl_mem_object); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE bind input object 2: " << imgdnn_err - << std::endl; -#endif - imgdnn_err = imgdnnBindingAddOutput(binding, output, _C_cl_mem_object); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE bind output object: " << imgdnn_err - << std::endl; -#endif - imgdnn_event ev; - imgdnn_err = imgdnnNetworkObjectExecute(network_object, binding, true, - 0u, nullptr, &ev); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE get event object: " << imgdnn_err; -#endif - - cl_event cl_ev = imgdnnCLExportEvent(ev, &imgdnn_err); -#ifdef BLAS_VERBOSE - std::cout << "ERROR CODE export cl event object: " << imgdnn_err - << std::endl; - sycl_event = cl::sycl::event{cl_ev, m_cl_context}; -#endif - imgdnnBindingDestroy(binding); - imgdnnNetworkObjectDestroy(network_object); - imgdnnNetworkDestroy(network); - imgdnnEventDestroy(ev); - imgdnnMemoryDestroy(_A_cl_mem_object); - imgdnnMemoryDestroy(_B_cl_mem_object); - imgdnnMemoryDestroy(_C_cl_mem_object); - imgdnnContextDestroy(context_); - clReleaseEvent(cl_ev); - }); - }); - interop_event.wait(); - return {sycl_event}; - } -}; - -} // namespace sycl_imagination_nn_api - -#endif - -template -typename sb_handle_t::event_t _gemm( - sb_handle_t& sb_handle, index_t _M, index_t _N, index_t _K, - element_t _alpha, container_0_t _a, index_t _lda, index_t _stridea, - container_1_t _b, index_t _ldb, index_t _strideb, element_t _beta, - container_2_t _c, index_t _ldc, index_t _stridec, index_t batch_size, - gemm_batch_type_t batch_type) { -#ifdef IMGDNN_LIBRARY - if (batch_type == gemm_batch_type_t::interleaved) { - std::cerr << "Error: interleaved gemm is not supported with IMGDNN" - << std::endl; - return {}; - } - return blas::gemm::backend::sycl_imagination_nn_api::Gemm_Launcher< - _t_a, _t_b>::template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _b, - _beta, _c, batch_size, _dependencies); -#else - if (batch_type == gemm_batch_type_t::interleaved) { - return blas::Gemm_Launcher< - container_0_t, container_1_t, container_2_t, 64, false, false, false, - 64, Tile<4, 4, 4, 4, 1, 1, 1, 1, 4, 4>, _t_a, _t_b, s_a, s_b, - static_cast(gemm_memory_t::no_local), - static_cast(gemm_algorithm_t::standard), - static_cast(gemm_vectorization_t::full), is_beta_zero, 4, - static_cast(gemm_batch_type_t::interleaved)>:: - template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _lda, _stridea, - _b, _ldb, _strideb, _beta, _c, _ldc, _stridec, - batch_size, _dependencies); - } - // The following _M, _N ,and _K is used for SSD + Mobilenet v2 (TF version) - // We computed the best tile combination for each sizes -(4-March-2018) - // POWER_VR Rogue - if ((_M == 96 && _K == 16 && _N == 22500) || - (_M == 273 && _K == 576 && _N == 100) || - (_M == 384 && _K == 64 && _N == 361)) { - return blas::Gemm_Launcher< - container_0_t, container_1_t, container_2_t, 96, true, false, false, 16, - Tile<4, 6, 12, 8>, _t_a, _t_b, s_a, s_b, - static_cast(gemm_memory_t::local), - static_cast(gemm_algorithm_t::standard), - static_cast(gemm_vectorization_t::full), is_beta_zero, 1, - static_cast( - gemm_batch_type_t::strided)>::template _select_gemm(sb_handle, _M, - _N, _K, _alpha, - _a, _lda, _stridea, _b, - _ldb, _strideb, _beta, _c, - _ldc, _stridec, - batch_size, - _dependencies); - } // The following _M, _N ,and _K is used for SSD + Mobilenet v2 (TF version) - // We computed the best tile combination for each sizes -(4-March-2018) - // POWER_VR Rogue - else if ((_M == 546 && _K == 512 && _N == 4) || - (_M == 24 && _K == 512 && _N == 4) || - (_M == 24 && _K == 256 && _N == 1) || - (_M == 64 && _K == 256 && _N == 4) || - (_M == 24 && _K == 256 && _N == 1) || - (_M == 128 && _K == 64 && _N == 1)) { - return blas::Gemm_Launcher< - container_0_t, container_1_t, container_2_t, 64, false, false, false, - 128, Tile<1, 1, 8, 8>, _t_a, _t_b, s_a, s_b, - static_cast(gemm_memory_t::local), - static_cast(gemm_algorithm_t::standard), - static_cast(gemm_vectorization_t::full), is_beta_zero, 1, - static_cast(gemm_batch_type_t::strided)>:: - template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _lda, _stridea, - _b, _ldb, _strideb, _beta, _c, _ldc, _stridec, - batch_size, _dependencies); - } // The following _M, _N ,and _K is used for SSD + Mobilenet v2 (TF version) - // We computed the best tile combination for each sizes -(4-March-2018) - // POWER_VR Rogue - else if ((_M == 546 && _K == 128 && _N == 1) || - (_M == 546 && _K == 256 && _N == 1)) { - return blas::Gemm_Launcher< - container_0_t, container_1_t, container_2_t, 64, false, false, false, - 64, Tile<4, 4, 8, 8>, _t_a, _t_b, s_a, s_b, - static_cast(gemm_memory_t::no_local), - static_cast(gemm_algorithm_t::standard), - static_cast(gemm_vectorization_t::full), is_beta_zero, 1, - static_cast(gemm_batch_type_t::strided)>:: - template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _lda, _stridea, - _b, _ldb, _strideb, _beta, _c, _ldc, _stridec, - batch_size, _dependencies); - } // The following _M, _N ,and _K is used for SSD + Mobilenet v2 (TF version) - // We computed the best tile combination for each sizes -(4-March-2018) - // POWER_VR Rogue - else if ((_M == 576 && _K == 96 && _N == 361) || - (_M == 64 && _K == 384 && _N == 361) || - (_M == 160 && _K == 576 && _N == 100) || - (_M == 1280 && _K == 320 && _N == 100) || - (_M == 256 && _K == 1280 && _N == 100) || - (_M == 960 && _K == 160 && _N == 100) || - (_M == 192 && _K == 32 && _N == 1444) || - (_M > 64 && _K > 64 && _N > 64 && is_power_of_2(_M) && - is_power_of_2(_K) && is_power_of_2(_N))) { - return blas::Gemm_Launcher< - container_0_t, container_1_t, container_2_t, 128, false, false, false, - 16, Tile<4, 8, 16, 8>, _t_a, _t_b, s_a, s_b, - static_cast(gemm_memory_t::local), - static_cast(gemm_algorithm_t::standard), - static_cast(gemm_vectorization_t::full), is_beta_zero, 1, - static_cast(gemm_batch_type_t::strided)>:: - template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _lda, _stridea, - _b, _ldb, _strideb, _beta, _c, _ldc, _stridec, - batch_size, _dependencies); - } else { - return blas::Gemm_Launcher< - container_0_t, container_1_t, container_2_t, 64, false, false, false, - 32, Tile<4, 4, 8, 8>, _t_a, _t_b, s_a, s_b, - static_cast(gemm_memory_t::local), - static_cast(gemm_algorithm_t::standard), - static_cast(gemm_vectorization_t::full), is_beta_zero, 1, - static_cast(gemm_batch_type_t::strided)>:: - template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _lda, _stridea, - _b, _ldb, _strideb, _beta, _c, _ldc, _stridec, - batch_size, _dependencies); - } -#endif -} -} // namespace backend -} // namespace gemm -} // namespace blas -#endif diff --git a/src/interface/extension_interface.hpp b/src/interface/extension_interface.hpp index 9613a4aeb..2ab86d132 100644 --- a/src/interface/extension_interface.hpp +++ b/src/interface/extension_interface.hpp @@ -348,13 +348,8 @@ typename sb_handle_t::event_t launch_type_based_reduction( sb_handle_t& sb_handle, input_t buffer_in, index_t ld, output_t buffer_out, index_t rows, index_t cols, const typename SB_Handle::event_t& dependencies) { -#ifdef POWER_VR - constexpr int ClSize = 32; - constexpr int WgSize = 64; -#else constexpr int ClSize = 64; constexpr int WgSize = 256; -#endif constexpr index_t reductions_per_thread = 64; using params_t = blas::ReductionParams::valid_thread( cl::sycl::nd_item<1> ndItem) const { return ((ndItem.get_global_id(0) < get_size())); } + template PORTBLAS_INLINE typename TupleOp::value_t TupleOp::eval( typename TupleOp::index_t i) { - return TupleOp::value_t(i, cl::sycl::fabs(rhs_.eval(i))); + return TupleOp::value_t(i, rhs_.eval(i)); } template diff --git a/src/operations/blas_operators.hpp b/src/operations/blas_operators.hpp index ae98f763b..86e6afc06 100644 --- a/src/operations/blas_operators.hpp +++ b/src/operations/blas_operators.hpp @@ -40,63 +40,6 @@ namespace blas { struct Operators {}; -/* StripASP. - * When using ComputeCpp CE, the Device Compiler uses Address Spaces - * to deal with the different global memories. - * However, this causes problem with std type traits, which see the - * types with address space qualifiers as different from the C++ - * standard types. - * - * This is StripASP function servers as a workaround that removes - * the address space for various types. - */ -template -struct StripASP { - typedef type_with_address_space_t type; -}; - -#if defined(__SYCL_DEVICE_ONLY__) && defined(__COMPUTECPP__) -#define GENERATE_STRIP_ASP(entry_type, pointer_type) \ - template <> \ - struct StripASP::pointer_t>::type> { \ - typedef entry_type type; \ - }; - -#define GENERATE_STRIP_ASP_LOCATION(data_t) \ - GENERATE_STRIP_ASP(data_t, constant_ptr) \ - GENERATE_STRIP_ASP(data_t, private_ptr) \ - GENERATE_STRIP_ASP(data_t, local_ptr) \ - GENERATE_STRIP_ASP(data_t, global_ptr) - -#define GENERATE_STRIP_ASP_TUPLE(index_t, data_t, pointer_type) \ - template <> \ - struct StripASP< \ - typename std::remove_pointer>::pointer_t>::type> { \ - typedef IndexValueTuple type; \ - }; -#define GENERATE_STRIP_ASP_NEST_TUPLE(index_t, data_t, pointer_type) \ - template <> \ - struct StripASP>>::pointer_t>::type> { \ - typedef IndexValueTuple> type; \ - }; - -#define INDEX_VALUE_STRIP_ASP_LOCATION(index_t, data_t) \ - GENERATE_STRIP_ASP_TUPLE(index_t, data_t, constant_ptr) \ - GENERATE_STRIP_ASP_TUPLE(index_t, data_t, private_ptr) \ - GENERATE_STRIP_ASP_TUPLE(index_t, data_t, local_ptr) \ - GENERATE_STRIP_ASP_TUPLE(index_t, data_t, global_ptr) - -#define NEST_INDEX_VALUE_STRIP_ASP_LOCATION(index_t, data_t) \ - GENERATE_STRIP_ASP_NEST_TUPLE(index_t, data_t, constant_ptr) \ - GENERATE_STRIP_ASP_NEST_TUPLE(index_t, data_t, private_ptr) \ - GENERATE_STRIP_ASP_NEST_TUPLE(index_t, data_t, local_ptr) \ - GENERATE_STRIP_ASP_NEST_TUPLE(index_t, data_t, global_ptr) -#endif // __SYCL_DEVICE_ONLY__ && __COMPUTECPP__ - /** * AbsoluteValue. * @@ -107,14 +50,12 @@ struct StripASP { * else. */ struct AbsoluteValue { - template - using stripped_t = typename StripASP::type; - #ifdef BLAS_ENABLE_HALF template - using is_floating_point = std::integral_constant< - bool, std::is_floating_point>::value || - std::is_same, cl::sycl::half>::value>; + using is_floating_point = + std::integral_constant::value || + std::is_same::value>; #else template using is_floating_point = std::is_floating_point; @@ -135,23 +76,6 @@ struct AbsoluteValue { } }; -#if defined(__SYCL_DEVICE_ONLY__) && defined(__COMPUTECPP__) -GENERATE_STRIP_ASP_LOCATION(double) -GENERATE_STRIP_ASP_LOCATION(float) -INDEX_VALUE_STRIP_ASP_LOCATION(int, float) -INDEX_VALUE_STRIP_ASP_LOCATION(long, float) -INDEX_VALUE_STRIP_ASP_LOCATION(long long, float) -INDEX_VALUE_STRIP_ASP_LOCATION(int, double) -INDEX_VALUE_STRIP_ASP_LOCATION(long, double) -INDEX_VALUE_STRIP_ASP_LOCATION(long long, double) -NEST_INDEX_VALUE_STRIP_ASP_LOCATION(int, float) -NEST_INDEX_VALUE_STRIP_ASP_LOCATION(long, float) -NEST_INDEX_VALUE_STRIP_ASP_LOCATION(long long, float) -NEST_INDEX_VALUE_STRIP_ASP_LOCATION(int, double) -NEST_INDEX_VALUE_STRIP_ASP_LOCATION(long, double) -NEST_INDEX_VALUE_STRIP_ASP_LOCATION(long long, double) -#endif - /*! Definitions of unary operators */ @@ -224,8 +148,7 @@ struct SquareOperator : public Operators { struct AddOperator : public Operators { template - static PORTBLAS_INLINE typename StripASP::type eval(const lhs_t &l, - const rhs_t &r) { + static PORTBLAS_INLINE rhs_t eval(const lhs_t &l, const rhs_t &r) { return (l + r); } @@ -235,16 +158,15 @@ struct AddOperator : public Operators { } template - static PORTBLAS_INLINE typename StripASP::type get_final_value( - const element_t &l, const index_t &) { + static PORTBLAS_INLINE element_t get_final_value(const element_t &l, + const index_t &) { return l; } }; struct ProductOperator : public Operators { template - static PORTBLAS_INLINE typename StripASP::type eval(const lhs_t &l, - const rhs_t &r) { + static PORTBLAS_INLINE rhs_t eval(const lhs_t &l, const rhs_t &r) { return (l * r); } @@ -254,16 +176,15 @@ struct ProductOperator : public Operators { } template - static PORTBLAS_INLINE typename StripASP::type get_final_value( - const element_t &l, const index_t &) { + static PORTBLAS_INLINE element_t get_final_value(const element_t &l, + const index_t &) { return l; } }; struct DivisionOperator : public Operators { template - static PORTBLAS_INLINE typename StripASP::type eval(const lhs_t &l, - const rhs_t &r) { + static PORTBLAS_INLINE rhs_t eval(const lhs_t &l, const rhs_t &r) { return (l / r); } @@ -273,16 +194,16 @@ struct DivisionOperator : public Operators { } template - static PORTBLAS_INLINE typename StripASP::type get_final_value( - const element_t &l, const index_t &) { + static PORTBLAS_INLINE element_t get_final_value(const element_t &l, + const index_t &) { return l; } }; struct MeanOperator : public Operators { template - static PORTBLAS_INLINE typename StripASP::type eval( - const element_t &accumulator, const element_t &val) { + static PORTBLAS_INLINE element_t eval(const element_t &accumulator, + const element_t &val) { return accumulator + val; } @@ -292,16 +213,15 @@ struct MeanOperator : public Operators { } template - static PORTBLAS_INLINE typename StripASP::type get_final_value( - const element_t &l, const index_t &r) { + static PORTBLAS_INLINE element_t get_final_value(const element_t &l, + const index_t &r) { return (l / static_cast(r)); } }; struct MaxOperator : public Operators { template - static PORTBLAS_INLINE typename StripASP::type eval(const lhs_t &l, - const rhs_t &r) { + static PORTBLAS_INLINE rhs_t eval(const lhs_t &l, const rhs_t &r) { return ((l > r) ? l : r); } @@ -311,16 +231,15 @@ struct MaxOperator : public Operators { } template - static PORTBLAS_INLINE typename StripASP::type get_final_value( - const element_t &l, const index_t &) { + static PORTBLAS_INLINE element_t get_final_value(const element_t &l, + const index_t &) { return l; } }; struct MinOperator : public Operators { template - static PORTBLAS_INLINE typename StripASP::type eval(const lhs_t &l, - const rhs_t &r) { + static PORTBLAS_INLINE rhs_t eval(const lhs_t &l, const rhs_t &r) { return ((l < r) ? l : r); } @@ -330,16 +249,15 @@ struct MinOperator : public Operators { } template - static PORTBLAS_INLINE typename StripASP::type get_final_value( - const element_t &l, const index_t &) { + static PORTBLAS_INLINE element_t get_final_value(const element_t &l, + const index_t &) { return l; } }; struct AbsoluteAddOperator : public Operators { template - static PORTBLAS_INLINE typename StripASP::type eval(const lhs_t &l, - const rhs_t &r) { + static PORTBLAS_INLINE rhs_t eval(const lhs_t &l, const rhs_t &r) { return AbsoluteValue::eval(l) + AbsoluteValue::eval(r); } // namespace blas @@ -349,28 +267,23 @@ struct AbsoluteAddOperator : public Operators { } template - static PORTBLAS_INLINE typename StripASP::type get_final_value( - const element_t &l, const index_t &) { + static PORTBLAS_INLINE element_t get_final_value(const element_t &l, + const index_t &) { return l; } }; struct IMaxOperator : public Operators { template - static PORTBLAS_INLINE typename StripASP::type eval(const lhs_t &l, - const rhs_t &r) { - if (AbsoluteValue::eval( - static_cast::type>(l).get_value()) < - AbsoluteValue::eval( - static_cast::type>(r).get_value()) || - (AbsoluteValue::eval( - static_cast::type>(l).get_value()) == - AbsoluteValue::eval( - static_cast::type>(r).get_value()) && + static PORTBLAS_INLINE rhs_t eval(const lhs_t &l, const rhs_t &r) { + if (AbsoluteValue::eval(static_cast(l).get_value()) < + AbsoluteValue::eval(static_cast(r).get_value()) || + (AbsoluteValue::eval(static_cast(l).get_value()) == + AbsoluteValue::eval(static_cast(r).get_value()) && l.get_index() > r.get_index())) { - return static_cast::type>(r); + return static_cast(r); } else { - return static_cast::type>(l); + return static_cast(l); } } @@ -383,20 +296,15 @@ struct IMaxOperator : public Operators { struct IMinOperator : public Operators { template - static PORTBLAS_INLINE typename StripASP::type eval(const lhs_t &l, - const rhs_t &r) { - if (AbsoluteValue::eval( - static_cast::type>(l).get_value()) > - AbsoluteValue::eval( - static_cast::type>(r).get_value()) || - (AbsoluteValue::eval( - static_cast::type>(l).get_value()) == - AbsoluteValue::eval( - static_cast::type>(r).get_value()) && + static PORTBLAS_INLINE rhs_t eval(const lhs_t &l, const rhs_t &r) { + if (AbsoluteValue::eval(static_cast(l).get_value()) > + AbsoluteValue::eval(static_cast(r).get_value()) || + (AbsoluteValue::eval(static_cast(l).get_value()) == + AbsoluteValue::eval(static_cast(r).get_value()) && l.get_index() > r.get_index())) { - return static_cast::type>(r); + return static_cast(r); } else { - return static_cast::type>(l); + return static_cast(l); } } @@ -410,13 +318,11 @@ struct IMinOperator : public Operators { struct CollapseIndexTupleOperator : public Operators { template static PORTBLAS_INLINE - typename ResolveReturnType::type>::type + typename ResolveReturnType::type eval(const lhs_t &l, const rhs_t &r) { - return typename StripASP::type::value_t( - static_cast::type>(r).get_index() * l + - static_cast::type>(r).val.get_index(), - static_cast::type>(r).get_value()); + return typename rhs_t::value_t(static_cast(r).get_index() * l + + static_cast(r).val.get_index(), + static_cast(r).get_value()); } template diff --git a/src/sb_handle/kernel_constructor.hpp b/src/sb_handle/kernel_constructor.hpp index c63f20f0a..8f415ea73 100644 --- a/src/sb_handle/kernel_constructor.hpp +++ b/src/sb_handle/kernel_constructor.hpp @@ -80,42 +80,6 @@ struct LocalMemory { */ PORTBLAS_INLINE LocalMemory(size_t, cl::sycl::handler &) {} }; -/*! -@brief A struct for containing a local accessor if shared memory is enabled. -Specialised case for using_local_memory == subgroup, which contains a subgroup -local accessor. -@tparam value_t Value type of the accessor. -*/ -#ifdef __COMPUTECPP__ -template -struct LocalMemory { - /*! - @brief Constructor that creates a local accessor from a size and a SYCL - command group handler. - @param size Size in elements of the local accessor. - @param cgh SYCL command group handler. - */ - PORTBLAS_INLINE LocalMemory(size_t size, cl::sycl::handler &cgh) - : subgroupAcc(cl::sycl::range<1>(size), cgh) {} - - /*! - @brief Subscript operator that forwards on to the subgroup accessor subscript - operator. - @param id SYCL id. - @return Reference to an element of the subgroup accessor. - */ - PORTBLAS_INLINE value_t &operator[](cl::sycl::id<1> id) { - return subgroupAcc[id]; - } - - /*! - @brief subgroup accessor. - */ - cl::sycl::accessor - subgroupAcc; -}; -#endif /*! @brief Template struct for containing an eval function, which uses shared memory diff --git a/test/exprtest/CMakeLists.txt b/test/exprtest/CMakeLists.txt index 20cf0cc5e..a8e7e4c55 100644 --- a/test/exprtest/CMakeLists.txt +++ b/test/exprtest/CMakeLists.txt @@ -28,20 +28,13 @@ set(PORTBLAS_EXPRTEST ${CMAKE_CURRENT_SOURCE_DIR}) set(SYCL_EXPRTEST_SRCS ${PORTBLAS_EXPRTEST}/blas1_scal_asum_test.cpp ${PORTBLAS_EXPRTEST}/blas1_axpy_copy_test.cpp -) - -# Temporary disabling the following tests for Intel DPC++ as currently Intel compiler crashes while running the following tests -# https://github.com/intel/llvm/issues/7075 -if(is_computecpp) - list(APPEND SYCL_EXPRTEST_SRCS "${PORTBLAS_EXPRTEST}/collapse_nested_tuple.cpp") -endif() + ${PORTBLAS_EXPRTEST}/collapse_nested_tuple.cpp + ) foreach(blas_test ${SYCL_EXPRTEST_SRCS}) get_filename_component(test_exec ${blas_test} NAME_WE) add_executable(${test_exec} main.cpp ${blas_test}) - if(is_computecpp) - set_property(TARGET ${test_exec} PROPERTY CXX_STANDARD 14) - endif() + # -DTUNING_TARGET is needed when using portBLAS in header only mode. target_compile_definitions(${test_exec} PRIVATE -DBLAS_INDEX_T=${BLAS_TEST_INDEX_TYPE} -D${TUNING_TARGET}) target_link_libraries(${test_exec} PRIVATE gtest_main blas::blas portblas) diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index 1c90530c4..f473ab396 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -119,9 +119,6 @@ foreach(blas_test ${SYCL_UNITTEST_SRCS}) endif() get_filename_component(test_exec ${blas_test} NAME_WE) add_executable(${test_exec} main.cpp ${blas_test}) - if(is_computecpp) - set_property(TARGET ${test_exec} PROPERTY CXX_STANDARD 14) - endif() if(STRESS_TESTING) target_compile_definitions(${test_exec} PRIVATE STRESS_TESTING) endif() diff --git a/tools/auto_tuner/README.md b/tools/auto_tuner/README.md index 08155c52a..33ea8b58a 100644 --- a/tools/auto_tuner/README.md +++ b/tools/auto_tuner/README.md @@ -15,7 +15,7 @@ Building 3. Run `CMake` and `Ninja` from the build directory: ``` -$ cmake -GNinja ../ -DComputeCpp_DIR=/path/to/computecpp [-DTUNING_TARGET=supported backend] +$ cmake -GNinja ../ [-DTUNING_TARGET=supported backend] $ ninja ```