提交 89a41580 编写于 作者: T tensor-tang

enable MKLDNN library and MKL small package

上级 875946ff
......@@ -37,6 +37,7 @@ include(simd)
################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ON)
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
......@@ -94,6 +95,7 @@ include(external/glog) # download, build, install glog
include(external/gtest) # download, build, install gtest
include(external/protobuf) # download, build, install protobuf
include(external/python) # download, build, install python
include(external/mkldnn) # download, build, install mkldnn
include(external/openblas) # download, build, install openblas
include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc
......@@ -136,6 +138,11 @@ if(WITH_GPU)
endif(NOT WITH_DSO)
endif(WITH_GPU)
if(WITH_MKLDNN)
message(STATUS "MKLDNN_LIBRARY: ${MKLDNN_LIBRARY}")
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKL_LITE_LIB_IOMP})
endif()
if(USE_NNPACK)
include(external/nnpack)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS})
......
......@@ -16,22 +16,42 @@
set(CBLAS_FOUND OFF)
## Find MKL First.
set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs")
set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL")
set(INTEL_MKL_ROOT "/opt/intel/mkl" CACHE PATH "Folder contains intel mkl libs")
set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains env MKL")
set(MKL_INCLUDE_SEARCH_PATHS
${MKL_ROOT}/include
${INTEL_MKL_ROOT}/include)
set(MKL_LIB_SEARCH_PATHS
${MKL_ROOT}/lib
${MKL_ROOT}/lib/intel64
${INTEL_MKL_ROOT}/lib
${INTEL_MKL_ROOT}/lib/intel64)
if(MKL_LITE_INC_DIR AND MKL_LITE_LIB)
set(CBLAS_FOUND ON)
set(CBLAS_PROVIDER MKL_LITE)
set(CBLAS_INC_DIR ${MKL_LITE_INC_DIR})
set(CBLAS_LIBRARIES ${MKL_LITE_LIB})
add_definitions(-DPADDLE_USE_MKL_LITE)
add_definitions(-DLAPACK_FOUND)
message(STATUS "Found cblas and lapack in MKL Lite "
"(include: ${MKL_LITE_INC_DIR}, library: ${CBLAS_LIBRARIES})")
return()
endif()
find_path(MKL_INC_DIR mkl.h PATHS
${MKL_ROOT}/include)
${MKL_INCLUDE_SEARCH_PATHS})
find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS
${MKL_ROOT}/include)
${MKL_INCLUDE_SEARCH_PATHS})
find_library(MKL_CORE_LIB NAMES mkl_core PATHS
${MKL_ROOT}/lib
${MKL_ROOT}/lib/intel64)
${MKL_LIB_SEARCH_PATHS})
find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS
${MKL_ROOT}/lib
${MKL_ROOT}/lib/intel64)
${MKL_LIB_SEARCH_PATHS})
find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS
${MKL_ROOT}/lib
${MKL_ROOT}/lib/intel64)
${MKL_LIB_SEARCH_PATHS})
if(MKL_LAPACK_INC_DIR AND MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
set(CBLAS_FOUND ON)
......
......@@ -67,6 +67,12 @@ else()
include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU)
if(WITH_MKLDNN)
add_definitions(-DPADDLE_USE_MKLDNN)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif(WITH_MKLDNN)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}")
......
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
IF(NOT ${WITH_MKLDNN})
return()
ENDIF(NOT ${WITH_MKLDNN})
INCLUDE(ExternalProject)
SET(MKLDNN_PROJECT "extern_mkldnn")
SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn)
SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn)
SET(MKLDNN_INCLUDE_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
# The following magic numbers should be updated regularly to keep latest version
SET(MKLDNN_TAG "v0.9")
SET(MKLDNN_MKL_VER "mklml_lnx_2018.0.20170425")
IF(WIN32)
MESSAGE(WARNING "It is not supported compiling with mkldnn in windows Paddle yet."
"Force WITH_MKLDNN=OFF")
SET(WITH_MKLDNN OFF)
return()
ELSE(WIN32)
SET(MKLDNN_LIBRARY "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE)
MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path")
SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
#SET(CMAKE_MACOSX_RPATH 1) # hold for MacOS
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib")
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR})
SET(MKLDNN_CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
SET(MKLDNN_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
ExternalProject_Add(
${MKLDNN_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
GIT_TAG "${MKLDNN_TAG}"
PREFIX ${MKLDNN_SOURCES_DIR}
PATCH_COMMAND cd <SOURCE_DIR>/scripts && ./prepare_mkl.sh
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${MKLDNN_CMAKE_CXX_FLAGS}
CMAKE_ARGS -DCMAKE_C_FLAGS=${MKLDNN_CMAKE_C_FLAGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_INSTALL_LIBDIR=${MKLDNN_INSTALL_DIR}/lib
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${MKLDNN_INSTALL_DIR}/lib
-DCMAKE_BUILD_TYPE:STRING=Release
)
SET(MKL_LITE_DIR ${MKLDNN_SOURCES_DIR}/src/${MKLDNN_PROJECT}/external/${MKLDNN_MKL_VER})
SET(MKL_LITE_INC_DIR ${MKL_LITE_DIR}/include)
SET(MKL_LITE_LIB ${MKL_LITE_DIR}/lib/libmklml_intel.so)
SET(MKL_LITE_LIB_IOMP ${MKL_LITE_DIR}/lib/libiomp5.so)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKL_LITE_DIR}/lib")
ADD_LIBRARY(mkldnn STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIBRARY})
ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
LIST(APPEND external_project_dependencies mkldnn)
......@@ -202,7 +202,7 @@ double dotProduct<double>(const int n, const double* x, const double* y) {
return cblas_ddot(n, x, 1, y, 1);
}
#ifdef PADDLE_USE_MKL
#if defined(PADDLE_USE_MKL) || defined(PADDLE_USE_MKL_LITE)
template <>
void vExp<float>(const int n, const float* a, float* r) {
......@@ -243,7 +243,55 @@ template <>
void vAdd<double>(const int n, const double* a, const double* b, double* r) {
vdAdd(n, a, b, r);
}
#else
DEFINE_MATRIX_BINARY_OP(vExp, b = std::exp(a));
template <class T>
void vExp(const int n, const T* a, T* r) {
hl_cpu_apply_binary_op<T, binary::vExp<T>, 0, 0>(
binary::vExp<T>(), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_BINARY_OP(vLog, b = std::log(a));
template <class T>
void vLog(const int n, const T* a, T* r) {
hl_cpu_apply_binary_op<T, binary::vLog<T>, 0, 0>(
binary::vLog<T>(), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_BINARY_PARAMETER_OP(vPow, ONE_PARAMETER, b = std::pow(a, p));
template <class T>
void vPow(const int n, const T* a, const T b, T* r) {
hl_cpu_apply_binary_op<T, binary::vPow<T>, 0, 0>(
binary::vPow<T>(b), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_TERNARY_OP(vAdd, c = a + b);
template <class T>
void vAdd(const int n, const T* a, const T* b, T* r) {
hl_cpu_apply_ternary_op<T, ternary::vAdd<T>, 0, 0>(ternary::vAdd<T>(),
const_cast<T*>(a),
const_cast<T*>(b),
r,
1,
n,
n,
n,
n);
}
template void vExp(const int n, const float* a, float* r);
template void vExp(const int n, const double* a, double* r);
template void vLog(const int n, const float* a, float* r);
template void vLog(const int n, const double* a, double* r);
template void vPow(const int n, const float* a, const float b, float* r);
template void vPow(const int n, const double* a, const double b, double* r);
template void vAdd(const int n, const float* a, const float* b, float* r);
template void vAdd(const int n, const double* a, const double* b, double* r);
#endif
#ifdef PADDLE_USE_MKL
template <>
void vInvSqrt<float>(const int n, const float* a, float* r) {
vsInvSqrt(n, a, r);
......@@ -275,20 +323,6 @@ void vTanh<double>(const int n, const double* a, double* r) {
}
#else
DEFINE_MATRIX_BINARY_OP(vExp, b = std::exp(a));
template <class T>
void vExp(const int n, const T* a, T* r) {
hl_cpu_apply_binary_op<T, binary::vExp<T>, 0, 0>(
binary::vExp<T>(), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_BINARY_OP(vLog, b = std::log(a));
template <class T>
void vLog(const int n, const T* a, T* r) {
hl_cpu_apply_binary_op<T, binary::vLog<T>, 0, 0>(
binary::vLog<T>(), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_BINARY_OP(vInvSqrt, b = 1.0f / std::sqrt(a));
template <class T>
void vInvSqrt(const int n, const T* a, T* r) {
......@@ -312,41 +346,12 @@ void vTanh(const int n, const T* a, T* r) {
binary::vTanh<T>(), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_BINARY_PARAMETER_OP(vPow, ONE_PARAMETER, b = std::pow(a, p));
template <class T>
void vPow(const int n, const T* a, const T b, T* r) {
hl_cpu_apply_binary_op<T, binary::vPow<T>, 0, 0>(
binary::vPow<T>(b), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_TERNARY_OP(vAdd, c = a + b);
template <class T>
void vAdd(const int n, const T* a, const T* b, T* r) {
hl_cpu_apply_ternary_op<T, ternary::vAdd<T>, 0, 0>(ternary::vAdd<T>(),
const_cast<T*>(a),
const_cast<T*>(b),
r,
1,
n,
n,
n,
n);
}
template void vExp(const int n, const float* a, float* r);
template void vExp(const int n, const double* a, double* r);
template void vLog(const int n, const float* a, float* r);
template void vLog(const int n, const double* a, double* r);
template void vInvSqrt(const int n, const double* a, double* r);
template void vInvSqrt(const int n, const float* a, float* r);
template void vLog1p(const int n, const float* a, float* r);
template void vLog1p(const int n, const double* a, double* r);
template void vTanh(const int n, const float* a, float* r);
template void vTanh(const int n, const double* a, double* r);
template void vPow(const int n, const float* a, const float b, float* r);
template void vPow(const int n, const double* a, const double b, double* r);
template void vAdd(const int n, const float* a, const float* b, float* r);
template void vAdd(const int n, const double* a, const double* b, double* r);
#endif
......
......@@ -15,6 +15,12 @@ limitations under the License. */
#ifndef MATHFUNCTIONS_H_
#define MATHFUNCTIONS_H_
#ifdef PADDLE_USE_MKL_LITE
#include <mkl_cblas.h>
#include <mkl_lapacke.h>
#include <mkl_vml_functions.h>
#endif
#ifdef PADDLE_USE_MKL
#include <mkl.h>
#include <mkl_lapacke.h>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册