diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 235c95f017f2b6ef24195a0210ccafff36b6ed61..69b7fe61c2db89c708d1510b2fc88c919cbf2d58 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -14,14 +14,39 @@ # set(CBLAS_FOUND OFF) +set(LAPACK_FOUND OFF) + +macro(find_lapack) + ## Find clapack + set(CLAPACK_ROOT $ENV{CLAPACK_ROOT} CACHE PATH "Folder contain clapack") + find_path(CLAPACK_INC_DIR NAMES clapack.h PATHS + ${CLAPACK_ROOT}/include) + find_path(CLAPACK_F2C_INC_DIR NAMES f2c.h PATHS + ${CLAPACK_ROOT}/include) + find_library(CLAPACK_F2C_LIB NAMES f2c PATHS + ${CLAPACK_ROOT}/lib) + find_library(CLAPACK_CBLASWR_LIB NAMES cblaswr PATHS + ${CLAPACK_ROOT}/lib) + find_library(CLAPACK_CLAPACK_LIB NAMES clapack PATHS + ${CLAPACK_ROOT}/lib) + if(CLAPACK_INC_DIR AND CLAPACK_F2C_INC_DIR AND CLAPACK_F2C_LIB AND + CLAPACK_CBLASWR_LIB AND CLAPACK_CLAPACK_LIB AND NOT LAPACK_FOUND) + set(LAPACK_PROVIDER CLAPACK) + set(LAPACK_INC_DIR ${CLAPACK_INC_DIR}) + set(LAPACK_LIBRARIES ${CLAPACK_F2C_LIB} ${CLAPACK_CBLASWR_LIB} ${CLAPACK_CLAPACK_LIB}) + add_definitions(-DPADDLE_USE_CLAPACK) + message(STATUS "Found CLAPACK (include: ${LAPACK_INC_DIR}, library: ${LAPACK_LIBRARIES})") + set(LAPACK_FOUND ON) + endif() +endmacro() ## Find MKL First. set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs") set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL") -find_path(MKL_INCLUDE_DIR mkl.h PATHS +find_path(MKL_INC_DIR mkl.h PATHS ${MKL_ROOT}/include) -find_path(MKL_INCLUDE_DIR mkl_lapacke.h PATHS +find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS ${MKL_ROOT}/include) find_library(MKL_CORE_LIB NAMES mkl_core PATHS ${MKL_ROOT}/lib @@ -34,15 +59,22 @@ find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS ${MKL_ROOT}/lib/intel64) -if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) +if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) set(CBLAS_PROVIDER MKL) - set(CBLAS_INC_DIR ${MKL_INCLUDE_DIR}) + set(CBLAS_INC_DIR ${MKL_INC_DIR}) set(CBLAS_LIBRARIES ${MKL_INTEL_LP64} ${MKL_SEQUENTIAL_LIB} ${MKL_CORE_LIB}) add_definitions(-DPADDLE_USE_MKL) message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") set(CBLAS_FOUND ON) + if(${MKL_LAPACK_INC_DIR}) + set(LAPACK_INC_DIR ${MKL_LAPACK_INC_DIR}) + message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})") + set(LAPACK_FOUND ON) + else() + find_lapack() + endif() return() # return file. endif() @@ -68,13 +100,20 @@ find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3 find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3 PATHS ${ATLAS_LIB_SEARCH_PATHS}) -if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB) +if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND) set(CBLAS_PROVIDER ATLAS) - set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR}) + set(CBLAS_INC_DIR ${ATLAS_INC_DIR}) set(CBLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS_LIB}) add_definitions(-DPADDLE_USE_ATLAS) - message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") set(CBLAS_FOUND ON) + if(ATLAS_CLAPACK_INC_DIR) + set(LAPACK_INC_DIR ${ATLAS_CLAPACK_INC_DIR}) + message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})") + set(LAPACK_FOUND ON) + else() + find_lapack() + endif() return() endif() @@ -103,8 +142,15 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB) set(CBLAS_PROVIDER OPENBLAS) set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR}) set(CBLAS_LIBRARIES ${OPENBLAS_LIB}) - message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") set(CBLAS_FOUND ON) + if(OPENBLAS_LAPACKE_INC_DIR) + set(LAPACK_INC_DIR ${OPENBLAS_LAPACKE_INC_DIR}) + message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})") + set(LAPACK_FOUND ON) + else() + find_lapack() + endif() return() endif() diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 29d17691db9f4575bae4372c61a0e1964e163fc9..f18d8dab044bb203758522d41901b0027e32a62b 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -20,6 +20,7 @@ IF(NOT ${CBLAS_FOUND}) SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE) + SET(LAPACK_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "lapack (in openblas) include directory." FORCE) IF(WIN32) SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/openblas.lib" CACHE FILEPATH "openblas library." FORCE) @@ -64,3 +65,4 @@ IF(NOT ${CBLAS_FOUND}) ENDIF(NOT ${CBLAS_FOUND}) INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) +INCLUDE_DIRECTORIES(${LAPACK_INC_DIR}) diff --git a/paddle/cuda/include/hl_matrix_type.cuh b/paddle/cuda/include/hl_matrix_type.cuh index 59213eee75f50d3c054ed8684a9a0e1053342a0a..f965ba966793f6f6eea0ad3606f60553fe904dda 100644 --- a/paddle/cuda/include/hl_matrix_type.cuh +++ b/paddle/cuda/include/hl_matrix_type.cuh @@ -17,13 +17,20 @@ limitations under the License. */ #include "hl_base.h" -#ifdef __CUDA_ARCH__ +#if defined(__CUDA_ARCH__) #include #ifndef PADDLE_TYPE_DOUBLE typedef float4 vecType; #else typedef double2 vecType; #endif +#elif (defined __ARM_NEON) || (defined __ARM_NEON__) +#include +#ifndef PADDLE_TYPE_DOUBLE +typedef float32x4_t vecType; +#else +#error NEON instructions does not support double precision +#endif #else #include #include diff --git a/paddle/math/MathFunctions.h b/paddle/math/MathFunctions.h index c8559eefd8378450fc18c2ba821c65b39c8cc046..4d3f701b9a74b5488625e3ee94c6d00c197b3290 100644 --- a/paddle/math/MathFunctions.h +++ b/paddle/math/MathFunctions.h @@ -17,7 +17,11 @@ limitations under the License. */ #ifdef PADDLE_USE_MKL #include +#ifdef PADDLE_USE_CLAPACK +#include +#else #include +#endif #else extern "C" { #include @@ -27,9 +31,13 @@ extern "C" { #include } #else +#ifdef PADDLE_USE_CLAPACK +#include +#else #include #endif #endif +#endif #include diff --git a/paddle/utils/CpuId.cpp b/paddle/utils/CpuId.cpp index 8eefdd2980e7f56a836df6fd2ff8c31b81a55555..edd33c454122d95078e0fde2a2e9d68903951ee8 100644 --- a/paddle/utils/CpuId.cpp +++ b/paddle/utils/CpuId.cpp @@ -19,7 +19,7 @@ limitations under the License. */ /// for MSVC #define CPUID(info, x) __cpuidex(info, x, 0) -#else +#elif !defined(__ANDROID__) #include @@ -31,6 +31,7 @@ limitations under the License. */ namespace paddle { SIMDFlags::SIMDFlags() { +#if !defined(__ANDROID__) unsigned int cpuInfo[4]; // CPUID: https://en.wikipedia.org/wiki/CPUID // clang-format off @@ -51,6 +52,9 @@ SIMDFlags::SIMDFlags() { CPUID(cpuInfo, 0x80000001); simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE; // clang-fotmat on +#else + simd_flags_ = SIMD_NEON; +#endif } SIMDFlags const* SIMDFlags::instance() { diff --git a/paddle/utils/CpuId.h b/paddle/utils/CpuId.h index 0f3985cc7b2c018ede9bba9644d2d096561dccee..efad946ae7f1b7d2b4fa3b1acd96387d1d59d5e8 100644 --- a/paddle/utils/CpuId.h +++ b/paddle/utils/CpuId.h @@ -18,6 +18,7 @@ namespace paddle { // clang-format off enum simd_t { SIMD_NONE = 0, ///< None +#if !defined(__ANDROID__) SIMD_SSE = 1 << 0, ///< SSE SIMD_SSE2 = 1 << 1, ///< SSE 2 SIMD_SSE3 = 1 << 2, ///< SSE 3 @@ -29,6 +30,9 @@ enum simd_t { SIMD_AVX = 1 << 8, ///< AVX SIMD_AVX2 = 1 << 9, ///< AVX 2 SIMD_AVX512 = 1 << 10, ///< AVX 512 +#else + SIMD_NEON = 1 << 0, ///< NEON +#endif }; // clang-format on diff --git a/paddle/utils/Logging.cpp b/paddle/utils/Logging.cpp index 5a1c6ecb2219f7983609c27f3215c7fc1e9e9ef2..ea96bad240ad81c4c29b7dab35b015549052e2bb 100644 --- a/paddle/utils/Logging.cpp +++ b/paddle/utils/Logging.cpp @@ -18,6 +18,7 @@ limitations under the License. */ */ #include "Logging.h" +#include namespace paddle { diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp index dbab4ec43ca2fa691445131d2cb14f51721a2e4c..96302a45f36ee192db1bc04ac5787691088a6630 100644 --- a/paddle/utils/Util.cpp +++ b/paddle/utils/Util.cpp @@ -15,11 +15,16 @@ limitations under the License. */ #include "Util.h" #include -#include #include #include #include + +#ifdef __SSE__ #include +#endif +#ifdef __SSE3__ +#include +#endif #include #include @@ -162,8 +167,12 @@ void initMain(int argc, char** argv) { installProfilerSwitch(); +#ifdef __SSE__ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); +#endif +#ifdef __SSE3__ _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#endif if (FLAGS_seed == 0) { unsigned int t = time(NULL);