提交 717f755c 编写于 作者: L Liu Yiqun

Include arm_neon.h on arm platform.

上级 ccd3d0a4
......@@ -14,14 +14,39 @@
#
set(CBLAS_FOUND OFF)
set(LAPACK_FOUND OFF)
macro(find_lapack)
## Find clapack
set(CLAPACK_ROOT $ENV{CLAPACK_ROOT} CACHE PATH "Folder contain clapack")
find_path(CLAPACK_INC_DIR NAMES clapack.h PATHS
${CLAPACK_ROOT}/include)
find_path(CLAPACK_F2C_INC_DIR NAMES f2c.h PATHS
${CLAPACK_ROOT}/include)
find_library(CLAPACK_F2C_LIB NAMES f2c PATHS
${CLAPACK_ROOT}/lib)
find_library(CLAPACK_CBLASWR_LIB NAMES cblaswr PATHS
${CLAPACK_ROOT}/lib)
find_library(CLAPACK_CLAPACK_LIB NAMES clapack PATHS
${CLAPACK_ROOT}/lib)
if(CLAPACK_INC_DIR AND CLAPACK_F2C_INC_DIR AND CLAPACK_F2C_LIB AND
CLAPACK_CBLASWR_LIB AND CLAPACK_CLAPACK_LIB AND NOT LAPACK_FOUND)
set(LAPACK_PROVIDER CLAPACK)
set(LAPACK_INC_DIR ${CLAPACK_INC_DIR})
set(LAPACK_LIBRARIES ${CLAPACK_F2C_LIB} ${CLAPACK_CBLASWR_LIB} ${CLAPACK_CLAPACK_LIB})
add_definitions(-DPADDLE_USE_CLAPACK)
message(STATUS "Found CLAPACK (include: ${LAPACK_INC_DIR}, library: ${LAPACK_LIBRARIES})")
set(LAPACK_FOUND ON)
endif()
endmacro()
## Find MKL First.
set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs")
set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS
find_path(MKL_INC_DIR mkl.h PATHS
${MKL_ROOT}/include)
find_path(MKL_INCLUDE_DIR mkl_lapacke.h PATHS
find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS
${MKL_ROOT}/include)
find_library(MKL_CORE_LIB NAMES mkl_core PATHS
${MKL_ROOT}/lib
......@@ -34,15 +59,22 @@ find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS
${MKL_ROOT}/lib/intel64)
if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
set(CBLAS_PROVIDER MKL)
set(CBLAS_INC_DIR ${MKL_INCLUDE_DIR})
set(CBLAS_INC_DIR ${MKL_INC_DIR})
set(CBLAS_LIBRARIES ${MKL_INTEL_LP64}
${MKL_SEQUENTIAL_LIB}
${MKL_CORE_LIB})
add_definitions(-DPADDLE_USE_MKL)
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
if(${MKL_LAPACK_INC_DIR})
set(LAPACK_INC_DIR ${MKL_LAPACK_INC_DIR})
message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})")
set(LAPACK_FOUND ON)
else()
find_lapack()
endif()
return() # return file.
endif()
......@@ -68,13 +100,20 @@ find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3
find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3
PATHS ${ATLAS_LIB_SEARCH_PATHS})
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB)
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND)
set(CBLAS_PROVIDER ATLAS)
set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR})
set(CBLAS_INC_DIR ${ATLAS_INC_DIR})
set(CBLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
add_definitions(-DPADDLE_USE_ATLAS)
message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
if(ATLAS_CLAPACK_INC_DIR)
set(LAPACK_INC_DIR ${ATLAS_CLAPACK_INC_DIR})
message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})")
set(LAPACK_FOUND ON)
else()
find_lapack()
endif()
return()
endif()
......@@ -103,8 +142,15 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
set(CBLAS_PROVIDER OPENBLAS)
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR})
set(CBLAS_LIBRARIES ${OPENBLAS_LIB})
message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
if(OPENBLAS_LAPACKE_INC_DIR)
set(LAPACK_INC_DIR ${OPENBLAS_LAPACKE_INC_DIR})
message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
set(LAPACK_FOUND ON)
else()
find_lapack()
endif()
return()
endif()
......
......@@ -20,6 +20,7 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE)
SET(LAPACK_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "lapack (in openblas) include directory." FORCE)
IF(WIN32)
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/openblas.lib" CACHE FILEPATH "openblas library." FORCE)
......@@ -64,3 +65,4 @@ IF(NOT ${CBLAS_FOUND})
ENDIF(NOT ${CBLAS_FOUND})
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
INCLUDE_DIRECTORIES(${LAPACK_INC_DIR})
......@@ -17,13 +17,20 @@ limitations under the License. */
#include "hl_base.h"
#ifdef __CUDA_ARCH__
#if defined(__CUDA_ARCH__)
#include <vector_types.h>
#ifndef PADDLE_TYPE_DOUBLE
typedef float4 vecType;
#else
typedef double2 vecType;
#endif
#elif (defined __ARM_NEON) || (defined __ARM_NEON__)
#include <arm_neon.h>
#ifndef PADDLE_TYPE_DOUBLE
typedef float32x4_t vecType;
#else
#error NEON instructions does not support double precision
#endif
#else
#include <mmintrin.h>
#include <xmmintrin.h>
......
......@@ -17,7 +17,11 @@ limitations under the License. */
#ifdef PADDLE_USE_MKL
#include <mkl.h>
#ifdef PADDLE_USE_CLAPACK
#include <clapack.h>
#else
#include <mkl_lapacke.h>
#endif
#else
extern "C" {
#include <cblas.h>
......@@ -27,9 +31,13 @@ extern "C" {
#include <clapack.h>
}
#else
#ifdef PADDLE_USE_CLAPACK
#include <clapack.h>
#else
#include <lapacke.h>
#endif
#endif
#endif
#include <cmath>
......
......@@ -19,7 +19,7 @@ limitations under the License. */
/// for MSVC
#define CPUID(info, x) __cpuidex(info, x, 0)
#else
#elif !defined(__ANDROID__)
#include <cpuid.h>
......@@ -31,6 +31,7 @@ limitations under the License. */
namespace paddle {
SIMDFlags::SIMDFlags() {
#if !defined(__ANDROID__)
unsigned int cpuInfo[4];
// CPUID: https://en.wikipedia.org/wiki/CPUID
// clang-format off
......@@ -51,6 +52,9 @@ SIMDFlags::SIMDFlags() {
CPUID(cpuInfo, 0x80000001);
simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE;
// clang-fotmat on
#else
simd_flags_ = SIMD_NEON;
#endif
}
SIMDFlags const* SIMDFlags::instance() {
......
......@@ -18,6 +18,7 @@ namespace paddle {
// clang-format off
enum simd_t {
SIMD_NONE = 0, ///< None
#if !defined(__ANDROID__)
SIMD_SSE = 1 << 0, ///< SSE
SIMD_SSE2 = 1 << 1, ///< SSE 2
SIMD_SSE3 = 1 << 2, ///< SSE 3
......@@ -29,6 +30,9 @@ enum simd_t {
SIMD_AVX = 1 << 8, ///< AVX
SIMD_AVX2 = 1 << 9, ///< AVX 2
SIMD_AVX512 = 1 << 10, ///< AVX 512
#else
SIMD_NEON = 1 << 0, ///< NEON
#endif
};
// clang-format on
......
......@@ -18,6 +18,7 @@ limitations under the License. */
*/
#include "Logging.h"
#include <cstdlib>
namespace paddle {
......
......@@ -15,11 +15,16 @@ limitations under the License. */
#include "Util.h"
#include <dirent.h>
#include <pmmintrin.h>
#include <signal.h>
#include <sys/stat.h>
#include <sys/types.h>
#ifdef __SSE__
#include <xmmintrin.h>
#endif
#ifdef __SSE3__
#include <pmmintrin.h>
#endif
#include <fstream>
#include <mutex>
......@@ -162,8 +167,12 @@ void initMain(int argc, char** argv) {
installProfilerSwitch();
#ifdef __SSE__
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#endif
#ifdef __SSE3__
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
#endif
if (FLAGS_seed == 0) {
unsigned int t = time(NULL);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册