提交 ba11f5c4 编写于 作者: L Liu Yiqun

Modify compiling flags to support neon and add the definition of PADDLE_USE_LAPACK.

上级 4430fc9e
...@@ -14,31 +14,6 @@ ...@@ -14,31 +14,6 @@
# #
set(CBLAS_FOUND OFF) set(CBLAS_FOUND OFF)
set(LAPACK_FOUND OFF)
macro(find_lapack)
## Find clapack
set(CLAPACK_ROOT $ENV{CLAPACK_ROOT} CACHE PATH "Folder contain clapack")
find_path(CLAPACK_INC_DIR NAMES clapack.h PATHS
${CLAPACK_ROOT}/include)
find_path(CLAPACK_F2C_INC_DIR NAMES f2c.h PATHS
${CLAPACK_ROOT}/include)
find_library(CLAPACK_F2C_LIB NAMES f2c PATHS
${CLAPACK_ROOT}/lib)
find_library(CLAPACK_CBLASWR_LIB NAMES cblaswr PATHS
${CLAPACK_ROOT}/lib)
find_library(CLAPACK_CLAPACK_LIB NAMES clapack PATHS
${CLAPACK_ROOT}/lib)
if(CLAPACK_INC_DIR AND CLAPACK_F2C_INC_DIR AND CLAPACK_F2C_LIB AND
CLAPACK_CBLASWR_LIB AND CLAPACK_CLAPACK_LIB AND NOT LAPACK_FOUND)
set(LAPACK_PROVIDER CLAPACK)
set(LAPACK_INC_DIR ${CLAPACK_INC_DIR})
set(LAPACK_LIBRARIES ${CLAPACK_F2C_LIB} ${CLAPACK_CBLASWR_LIB} ${CLAPACK_CLAPACK_LIB})
add_definitions(-DPADDLE_USE_CLAPACK)
message(STATUS "Found CLAPACK (include: ${LAPACK_INC_DIR}, library: ${LAPACK_LIBRARIES})")
set(LAPACK_FOUND ON)
endif()
endmacro()
## Find MKL First. ## Find MKL First.
set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs") set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs")
...@@ -69,11 +44,8 @@ if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) ...@@ -69,11 +44,8 @@ if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
if(${MKL_LAPACK_INC_DIR}) if(${MKL_LAPACK_INC_DIR})
set(LAPACK_INC_DIR ${MKL_LAPACK_INC_DIR}) add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})") message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})")
set(LAPACK_FOUND ON)
else()
find_lapack()
endif() endif()
return() # return file. return() # return file.
endif() endif()
...@@ -108,11 +80,8 @@ if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND) ...@@ -108,11 +80,8 @@ if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND)
message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
if(ATLAS_CLAPACK_INC_DIR) if(ATLAS_CLAPACK_INC_DIR)
set(LAPACK_INC_DIR ${ATLAS_CLAPACK_INC_DIR}) add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})") message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})")
set(LAPACK_FOUND ON)
else()
find_lapack()
endif() endif()
return() return()
endif() endif()
...@@ -145,11 +114,8 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB) ...@@ -145,11 +114,8 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
if(OPENBLAS_LAPACKE_INC_DIR) if(OPENBLAS_LAPACKE_INC_DIR)
set(LAPACK_INC_DIR ${OPENBLAS_LAPACKE_INC_DIR}) add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})") message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
set(LAPACK_FOUND ON)
else()
find_lapack()
endif() endif()
return() return()
endif() endif()
......
...@@ -20,7 +20,6 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -20,7 +20,6 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas) SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE) SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE)
SET(LAPACK_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "lapack (in openblas) include directory." FORCE)
IF(WIN32) IF(WIN32)
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/openblas.lib" CACHE FILEPATH "openblas library." FORCE) SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/openblas.lib" CACHE FILEPATH "openblas library." FORCE)
...@@ -38,6 +37,8 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -38,6 +37,8 @@ IF(NOT ${CBLAS_FOUND})
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...") "you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
ENDIF(NOT CMAKE_Fortran_COMPILER) ENDIF(NOT CMAKE_Fortran_COMPILER)
ADD_DEFINITIONS(-DPADDLE_USE_LAPACK)
ExternalProject_Add( ExternalProject_Add(
openblas openblas
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
...@@ -65,4 +66,3 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -65,4 +66,3 @@ IF(NOT ${CBLAS_FOUND})
ENDIF(NOT ${CBLAS_FOUND}) ENDIF(NOT ${CBLAS_FOUND})
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
INCLUDE_DIRECTORIES(${LAPACK_INC_DIR})
...@@ -22,7 +22,7 @@ SET(ZLIB_INCLUDE_DIR "${ZLIB_INSTALL_DIR}/include" CACHE PATH "zlib include dire ...@@ -22,7 +22,7 @@ SET(ZLIB_INCLUDE_DIR "${ZLIB_INSTALL_DIR}/include" CACHE PATH "zlib include dire
IF(WIN32) IF(WIN32)
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE) SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE)
ELSE(WIN32) ELSE(WIN32)
set(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE) SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
ENDIF(WIN32) ENDIF(WIN32)
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
......
...@@ -10,7 +10,7 @@ IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID ...@@ -10,7 +10,7 @@ IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID
set(SSE3_FLAG "-msse3") set(SSE3_FLAG "-msse3")
SET(AVX_FLAG "-mavx") SET(AVX_FLAG "-mavx")
SET(AVX2_FLAG "-mavx2") SET(AVX2_FLAG "-mavx2")
SET(NEON_FLAG "-mfloat-abi=softfp -mfpu=neon") SET(NEON_FLAG "-pie -fPIE -mfloat-abi=softfp -mfpu=neon")
ELSEIF(MSVC) ELSEIF(MSVC)
set(MMX_FLAG "/arch:MMX") set(MMX_FLAG "/arch:MMX")
set(SSE2_FLAG "/arch:SSE2") set(SSE2_FLAG "/arch:SSE2")
......
...@@ -85,11 +85,16 @@ int getrf<float>(const CBLAS_ORDER order, ...@@ -85,11 +85,16 @@ int getrf<float>(const CBLAS_ORDER order,
float* A, float* A,
const int lda, const int lda,
int* ipiv) { int* ipiv) {
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS #ifdef PADDLE_USE_ATLAS
return clapack_sgetrf(order, M, N, A, lda, ipiv); return clapack_sgetrf(order, M, N, A, lda, ipiv);
#else #else
return LAPACKE_sgetrf(order, M, N, A, lda, ipiv); return LAPACKE_sgetrf(order, M, N, A, lda, ipiv);
#endif #endif
#else
LOG(FATAL) << "Not implemented";
#endif
return 0;
} }
template <> template <>
...@@ -99,11 +104,16 @@ int getrf<double>(const CBLAS_ORDER order, ...@@ -99,11 +104,16 @@ int getrf<double>(const CBLAS_ORDER order,
double* A, double* A,
const int lda, const int lda,
int* ipiv) { int* ipiv) {
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS #ifdef PADDLE_USE_ATLAS
return clapack_dgetrf(order, M, N, A, lda, ipiv); return clapack_dgetrf(order, M, N, A, lda, ipiv);
#else #else
return LAPACKE_dgetrf(order, M, N, A, lda, ipiv); return LAPACKE_dgetrf(order, M, N, A, lda, ipiv);
#endif #endif
#else
LOG(FATAL) << "Not implemented".
#endif
return 0;
} }
template <> template <>
...@@ -112,11 +122,16 @@ int getri<float>(const CBLAS_ORDER order, ...@@ -112,11 +122,16 @@ int getri<float>(const CBLAS_ORDER order,
float* A, float* A,
const int lda, const int lda,
const int* ipiv) { const int* ipiv) {
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS #ifdef PADDLE_USE_ATLAS
return clapack_sgetri(order, N, A, lda, ipiv); return clapack_sgetri(order, N, A, lda, ipiv);
#else #else
return LAPACKE_sgetri(order, N, A, lda, ipiv); return LAPACKE_sgetri(order, N, A, lda, ipiv);
#endif #endif
#else
LOG(FATAL) << "Not implemented".
#endif
return 0;
} }
template <> template <>
...@@ -125,11 +140,16 @@ int getri<double>(const CBLAS_ORDER order, ...@@ -125,11 +140,16 @@ int getri<double>(const CBLAS_ORDER order,
double* A, double* A,
const int lda, const int lda,
const int* ipiv) { const int* ipiv) {
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS #ifdef PADDLE_USE_ATLAS
return clapack_dgetri(order, N, A, lda, ipiv); return clapack_dgetri(order, N, A, lda, ipiv);
#else #else
return LAPACKE_dgetri(order, N, A, lda, ipiv); return LAPACKE_dgetri(order, N, A, lda, ipiv);
#endif #endif
#else
LOG(FATAL) << "Not implemented".
#endif
return 0;
} }
template <> template <>
......
...@@ -17,23 +17,19 @@ limitations under the License. */ ...@@ -17,23 +17,19 @@ limitations under the License. */
#ifdef PADDLE_USE_MKL #ifdef PADDLE_USE_MKL
#include <mkl.h> #include <mkl.h>
#ifdef PADDLE_USE_CLAPACK #ifdef PADDLE_USE_LAPACK
#include <clapack.h>
#else
#include <mkl_lapacke.h> #include <mkl_lapacke.h>
#endif #endif
#else #else
extern "C" { extern "C" {
#include <cblas.h> #include <cblas.h>
} }
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS #ifdef PADDLE_USE_ATLAS
extern "C" { extern "C" {
#include <clapack.h> #include <clapack.h>
} }
#else #else
#ifdef PADDLE_USE_CLAPACK
#include <clapack.h>
#else
#include <lapacke.h> #include <lapacke.h>
#endif #endif
#endif #endif
......
...@@ -18,7 +18,6 @@ namespace paddle { ...@@ -18,7 +18,6 @@ namespace paddle {
// clang-format off // clang-format off
enum simd_t { enum simd_t {
SIMD_NONE = 0, ///< None SIMD_NONE = 0, ///< None
#if !defined(__ANDROID__)
SIMD_SSE = 1 << 0, ///< SSE SIMD_SSE = 1 << 0, ///< SSE
SIMD_SSE2 = 1 << 1, ///< SSE 2 SIMD_SSE2 = 1 << 1, ///< SSE 2
SIMD_SSE3 = 1 << 2, ///< SSE 3 SIMD_SSE3 = 1 << 2, ///< SSE 3
...@@ -30,9 +29,7 @@ enum simd_t { ...@@ -30,9 +29,7 @@ enum simd_t {
SIMD_AVX = 1 << 8, ///< AVX SIMD_AVX = 1 << 8, ///< AVX
SIMD_AVX2 = 1 << 9, ///< AVX 2 SIMD_AVX2 = 1 << 9, ///< AVX 2
SIMD_AVX512 = 1 << 10, ///< AVX 512 SIMD_AVX512 = 1 << 10, ///< AVX 512
#else SIMD_NEON = 1 << 11, /// NEON
SIMD_NEON = 1 << 0, ///< NEON
#endif
}; };
// clang-format on // clang-format on
...@@ -99,6 +96,7 @@ private: ...@@ -99,6 +96,7 @@ private:
#define HAS_AVX HAS_SIMD(SIMD_AVX) #define HAS_AVX HAS_SIMD(SIMD_AVX)
#define HAS_AVX2 HAS_SIMD(SIMD_AVX2) #define HAS_AVX2 HAS_SIMD(SIMD_AVX2)
#define HAS_AVX512 HAS_SIMD(SIMD_AVX512) #define HAS_AVX512 HAS_SIMD(SIMD_AVX512)
#define HAS_NEON HAS_SIMD(SIMD_NEON)
// clang-format on // clang-format on
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册