From 89a4158038028c1a278ddec791e15bcff8307460 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 18 Jul 2017 16:15:36 +0800 Subject: [PATCH] enable MKLDNN library and MKL small package --- CMakeLists.txt | 7 +++ cmake/cblas.cmake | 40 +++++++++++---- cmake/configure.cmake | 6 +++ cmake/external/mkldnn.cmake | 78 +++++++++++++++++++++++++++++ paddle/math/MathFunctions.cpp | 93 ++++++++++++++++++----------------- paddle/math/MathFunctions.h | 6 +++ 6 files changed, 176 insertions(+), 54 deletions(-) create mode 100644 cmake/external/mkldnn.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index dcff6b54ca..5e664d1415 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,6 +37,7 @@ include(simd) ################################ Configurations ####################################### option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) +option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ON) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) @@ -94,6 +95,7 @@ include(external/glog) # download, build, install glog include(external/gtest) # download, build, install gtest include(external/protobuf) # download, build, install protobuf include(external/python) # download, build, install python +include(external/mkldnn) # download, build, install mkldnn include(external/openblas) # download, build, install openblas include(external/swig) # download, build, install swig include(external/warpctc) # download, build, install warpctc @@ -136,6 +138,11 @@ if(WITH_GPU) endif(NOT WITH_DSO) endif(WITH_GPU) +if(WITH_MKLDNN) + message(STATUS "MKLDNN_LIBRARY: ${MKLDNN_LIBRARY}") + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKL_LITE_LIB_IOMP}) +endif() + if(USE_NNPACK) include(external/nnpack) list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS}) diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 913f711aff..ee654e64bd 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -16,22 +16,42 @@ set(CBLAS_FOUND OFF) ## Find MKL First. -set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs") -set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL") +set(INTEL_MKL_ROOT "/opt/intel/mkl" CACHE PATH "Folder contains intel mkl libs") +set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains env MKL") + +set(MKL_INCLUDE_SEARCH_PATHS + ${MKL_ROOT}/include + ${INTEL_MKL_ROOT}/include) +set(MKL_LIB_SEARCH_PATHS + ${MKL_ROOT}/lib + ${MKL_ROOT}/lib/intel64 + ${INTEL_MKL_ROOT}/lib + ${INTEL_MKL_ROOT}/lib/intel64) + +if(MKL_LITE_INC_DIR AND MKL_LITE_LIB) + set(CBLAS_FOUND ON) + set(CBLAS_PROVIDER MKL_LITE) + set(CBLAS_INC_DIR ${MKL_LITE_INC_DIR}) + set(CBLAS_LIBRARIES ${MKL_LITE_LIB}) + + add_definitions(-DPADDLE_USE_MKL_LITE) + add_definitions(-DLAPACK_FOUND) + + message(STATUS "Found cblas and lapack in MKL Lite " + "(include: ${MKL_LITE_INC_DIR}, library: ${CBLAS_LIBRARIES})") + return() +endif() find_path(MKL_INC_DIR mkl.h PATHS - ${MKL_ROOT}/include) + ${MKL_INCLUDE_SEARCH_PATHS}) find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS - ${MKL_ROOT}/include) + ${MKL_INCLUDE_SEARCH_PATHS}) find_library(MKL_CORE_LIB NAMES mkl_core PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) if(MKL_LAPACK_INC_DIR AND MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) set(CBLAS_FOUND ON) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 7afab5d534..8719197682 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -67,6 +67,12 @@ else() include_directories(${CUDA_TOOLKIT_INCLUDE}) endif(NOT WITH_GPU) +if(WITH_MKLDNN) + add_definitions(-DPADDLE_USE_MKLDNN) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif(WITH_MKLDNN) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}") diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake new file mode 100644 index 0000000000..834f5ae230 --- /dev/null +++ b/cmake/external/mkldnn.cmake @@ -0,0 +1,78 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT ${WITH_MKLDNN}) + return() +ENDIF(NOT ${WITH_MKLDNN}) + +INCLUDE(ExternalProject) + +SET(MKLDNN_PROJECT "extern_mkldnn") +SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn) +SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn) +SET(MKLDNN_INCLUDE_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE) + +# The following magic numbers should be updated regularly to keep latest version +SET(MKLDNN_TAG "v0.9") +SET(MKLDNN_MKL_VER "mklml_lnx_2018.0.20170425") + +IF(WIN32) + MESSAGE(WARNING "It is not supported compiling with mkldnn in windows Paddle yet." + "Force WITH_MKLDNN=OFF") + SET(WITH_MKLDNN OFF) + return() +ELSE(WIN32) + SET(MKLDNN_LIBRARY "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE) + MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path") + SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + #SET(CMAKE_MACOSX_RPATH 1) # hold for MacOS + SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib") +ENDIF(WIN32) + +INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR}) + +SET(MKLDNN_CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") +SET(MKLDNN_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + +ExternalProject_Add( + ${MKLDNN_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" + GIT_TAG "${MKLDNN_TAG}" + PREFIX ${MKLDNN_SOURCES_DIR} + PATCH_COMMAND cd /scripts && ./prepare_mkl.sh + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + CMAKE_ARGS -DCMAKE_CXX_FLAGS=${MKLDNN_CMAKE_CXX_FLAGS} + CMAKE_ARGS -DCMAKE_C_FLAGS=${MKLDNN_CMAKE_C_FLAGS} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} + CMAKE_ARGS -DCMAKE_INSTALL_LIBDIR=${MKLDNN_INSTALL_DIR}/lib + CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR:PATH=${MKLDNN_INSTALL_DIR}/lib + -DCMAKE_BUILD_TYPE:STRING=Release +) + +SET(MKL_LITE_DIR ${MKLDNN_SOURCES_DIR}/src/${MKLDNN_PROJECT}/external/${MKLDNN_MKL_VER}) +SET(MKL_LITE_INC_DIR ${MKL_LITE_DIR}/include) +SET(MKL_LITE_LIB ${MKL_LITE_DIR}/lib/libmklml_intel.so) +SET(MKL_LITE_LIB_IOMP ${MKL_LITE_DIR}/lib/libiomp5.so) +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKL_LITE_DIR}/lib") + +ADD_LIBRARY(mkldnn STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIBRARY}) +ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) + +LIST(APPEND external_project_dependencies mkldnn) diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp index 7045562dd4..999b72cc15 100644 --- a/paddle/math/MathFunctions.cpp +++ b/paddle/math/MathFunctions.cpp @@ -202,7 +202,7 @@ double dotProduct(const int n, const double* x, const double* y) { return cblas_ddot(n, x, 1, y, 1); } -#ifdef PADDLE_USE_MKL +#if defined(PADDLE_USE_MKL) || defined(PADDLE_USE_MKL_LITE) template <> void vExp(const int n, const float* a, float* r) { @@ -243,7 +243,55 @@ template <> void vAdd(const int n, const double* a, const double* b, double* r) { vdAdd(n, a, b, r); } +#else + +DEFINE_MATRIX_BINARY_OP(vExp, b = std::exp(a)); +template +void vExp(const int n, const T* a, T* r) { + hl_cpu_apply_binary_op, 0, 0>( + binary::vExp(), const_cast(a), r, 1, n, n, n); +} + +DEFINE_MATRIX_BINARY_OP(vLog, b = std::log(a)); +template +void vLog(const int n, const T* a, T* r) { + hl_cpu_apply_binary_op, 0, 0>( + binary::vLog(), const_cast(a), r, 1, n, n, n); +} + +DEFINE_MATRIX_BINARY_PARAMETER_OP(vPow, ONE_PARAMETER, b = std::pow(a, p)); +template +void vPow(const int n, const T* a, const T b, T* r) { + hl_cpu_apply_binary_op, 0, 0>( + binary::vPow(b), const_cast(a), r, 1, n, n, n); +} + +DEFINE_MATRIX_TERNARY_OP(vAdd, c = a + b); +template +void vAdd(const int n, const T* a, const T* b, T* r) { + hl_cpu_apply_ternary_op, 0, 0>(ternary::vAdd(), + const_cast(a), + const_cast(b), + r, + 1, + n, + n, + n, + n); +} + +template void vExp(const int n, const float* a, float* r); +template void vExp(const int n, const double* a, double* r); +template void vLog(const int n, const float* a, float* r); +template void vLog(const int n, const double* a, double* r); +template void vPow(const int n, const float* a, const float b, float* r); +template void vPow(const int n, const double* a, const double b, double* r); +template void vAdd(const int n, const float* a, const float* b, float* r); +template void vAdd(const int n, const double* a, const double* b, double* r); +#endif + +#ifdef PADDLE_USE_MKL template <> void vInvSqrt(const int n, const float* a, float* r) { vsInvSqrt(n, a, r); @@ -275,20 +323,6 @@ void vTanh(const int n, const double* a, double* r) { } #else -DEFINE_MATRIX_BINARY_OP(vExp, b = std::exp(a)); -template -void vExp(const int n, const T* a, T* r) { - hl_cpu_apply_binary_op, 0, 0>( - binary::vExp(), const_cast(a), r, 1, n, n, n); -} - -DEFINE_MATRIX_BINARY_OP(vLog, b = std::log(a)); -template -void vLog(const int n, const T* a, T* r) { - hl_cpu_apply_binary_op, 0, 0>( - binary::vLog(), const_cast(a), r, 1, n, n, n); -} - DEFINE_MATRIX_BINARY_OP(vInvSqrt, b = 1.0f / std::sqrt(a)); template void vInvSqrt(const int n, const T* a, T* r) { @@ -312,41 +346,12 @@ void vTanh(const int n, const T* a, T* r) { binary::vTanh(), const_cast(a), r, 1, n, n, n); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(vPow, ONE_PARAMETER, b = std::pow(a, p)); -template -void vPow(const int n, const T* a, const T b, T* r) { - hl_cpu_apply_binary_op, 0, 0>( - binary::vPow(b), const_cast(a), r, 1, n, n, n); -} - -DEFINE_MATRIX_TERNARY_OP(vAdd, c = a + b); -template -void vAdd(const int n, const T* a, const T* b, T* r) { - hl_cpu_apply_ternary_op, 0, 0>(ternary::vAdd(), - const_cast(a), - const_cast(b), - r, - 1, - n, - n, - n, - n); -} - -template void vExp(const int n, const float* a, float* r); -template void vExp(const int n, const double* a, double* r); -template void vLog(const int n, const float* a, float* r); -template void vLog(const int n, const double* a, double* r); template void vInvSqrt(const int n, const double* a, double* r); template void vInvSqrt(const int n, const float* a, float* r); template void vLog1p(const int n, const float* a, float* r); template void vLog1p(const int n, const double* a, double* r); template void vTanh(const int n, const float* a, float* r); template void vTanh(const int n, const double* a, double* r); -template void vPow(const int n, const float* a, const float b, float* r); -template void vPow(const int n, const double* a, const double b, double* r); -template void vAdd(const int n, const float* a, const float* b, float* r); -template void vAdd(const int n, const double* a, const double* b, double* r); #endif diff --git a/paddle/math/MathFunctions.h b/paddle/math/MathFunctions.h index 8ada0d34c6..799948cf08 100644 --- a/paddle/math/MathFunctions.h +++ b/paddle/math/MathFunctions.h @@ -15,6 +15,12 @@ limitations under the License. */ #ifndef MATHFUNCTIONS_H_ #define MATHFUNCTIONS_H_ +#ifdef PADDLE_USE_MKL_LITE +#include +#include +#include +#endif + #ifdef PADDLE_USE_MKL #include #include -- GitLab