diff --git a/CMakeLists.txt b/CMakeLists.txt index b39e48505c653e3163be2f49810dc3dc6ffb2c2f..17238a2e704acfe95131a8e8507d49964a133975 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,6 +107,7 @@ option(SANITIZER_TYPE "Choose the type of sanitizer, options are: Address, Leak, option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF) option(WITH_NCCL "Compile PaddlePaddle with NCCL support" ON) option(WITH_CRYPTO "Compile PaddlePaddle with crypto support" ON) +option(WITH_ARM "Compile PaddlePaddle with arm support" OFF) # PY_VERSION if(NOT PY_VERSION) @@ -213,6 +214,12 @@ if(WITH_AMD_GPU) include(hip) endif(WITH_AMD_GPU) +if(WITH_ARM) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") + add_definitions(-DPADDLE_WITH_ARM) +endif() + set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 162ea532faca8477e15f899a163caaf154a88098..5e47f268a36699b7e2310c5f5b2c20bcf6f18f1b 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -19,6 +19,9 @@ SET(CBLAS_SOURCE_DIR ${THIRD_PARTY_PATH}/openblas/src/extern_openblas) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) SET(CBLAS_REPOSITORY https://github.com/xianyi/OpenBLAS.git) SET(CBLAS_TAG v0.3.7) +IF(WITH_ARM) + SET(CBLAS_TAG v0.2.18) +ENDIF() cache_third_party(extern_openblas REPOSITORY ${CBLAS_REPOSITORY} TAG ${CBLAS_TAG} diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 0c12846efe5dcf8cc7f433c1ce2d3cfa5458d881..e6a77c38ab5c0f5178669d9a4d18c571b638fb21 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -187,7 +187,7 @@ set(GPU_COMMON_FLAGS -Wno-error=unused-function # Warnings in Numpy Header. -Wno-error=array-bounds # Warnings in Eigen::array ) -if (NOT WITH_NV_JETSON) +if (NOT WITH_NV_JETSON AND NOT WITH_ARM) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64") endif() endif(NOT WIN32) diff --git a/paddle/fluid/operators/search_compute.h b/paddle/fluid/operators/search_compute.h index 675f5691aaa5429f6dd17b98c722fc7d3ed8d2c0..d166b350af30f7eb1118b4b507e80a515c1a42bb 100644 --- a/paddle/fluid/operators/search_compute.h +++ b/paddle/fluid/operators/search_compute.h @@ -14,7 +14,9 @@ limitations under the License. */ #pragma once +#if !defined(PADDLE_WITH_ARM) #include +#endif #include #include #include @@ -72,6 +74,8 @@ void call_gemm_batched(const framework::ExecutionContext& ctx, } } +#if !defined(PADDLE_WITH_ARM) + #define __m256x __m256 static const unsigned int AVX_STEP_SIZE = 8; @@ -94,6 +98,8 @@ static const unsigned int SSE_CUT_LEN_MASK = 1U; #define _mm_store_px _mm_storeu_ps #define _mm_load1_px _mm_load1_ps +#endif + template inline void axpy(const T* x, T* y, size_t len, const T alpha) { unsigned int jjj, lll; @@ -108,6 +114,8 @@ inline void axpy(const T* x, T* y, size_t len, const T alpha) { _mm256_add_px(_mm256_load_px(y + jjj), _mm256_mul_px(mm_alpha, _mm256_load_px(x + jjj)))); } +#elif defined(PADDLE_WITH_ARM) + PADDLE_THROW(platform::errors::Unimplemented("axpy is not supported")); #else lll = len & ~SSE_CUT_LEN_MASK; __m128x mm_alpha = _mm_load1_px(&alpha); @@ -135,6 +143,8 @@ inline void axpy_noadd(const T* x, T* y, size_t len, const T alpha) { for (jjj = 0; jjj < lll; jjj += AVX_STEP_SIZE) { _mm256_store_px(y + jjj, _mm256_mul_px(mm_alpha, _mm256_load_px(x + jjj))); } +#elif defined(PADDLE_WITH_ARM) + PADDLE_THROW(platform::errors::Unimplemented("axpy_noadd is not supported")); #else lll = len & ~SSE_CUT_LEN_MASK; __m128x mm_alpha = _mm_load1_px(&alpha); diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index 6545e62927fe05b1d7a3426b3d91c5eb6aac0c2d..63760ada2b4d5226035b990cf5ecb7e1d21fbbe2 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -139,7 +139,7 @@ bool MayIUse(const cpu_isa_t cpu_isa) { if (cpu_isa == isa_any) { return true; } else { -#ifndef WITH_NV_JETSON +#if !defined(WITH_NV_JETSON) && !defined(PADDLE_WITH_ARM) int reg[4]; cpuid(reg, 0); int nIds = reg[0]; diff --git a/paddle/fluid/platform/cpu_info.h b/paddle/fluid/platform/cpu_info.h index 66f05d51c0b6795e7509b3e80d9ffd3e4a28adfd..c071246c512500b47c1e131a44a71c6bb5377be2 100644 --- a/paddle/fluid/platform/cpu_info.h +++ b/paddle/fluid/platform/cpu_info.h @@ -40,7 +40,7 @@ limitations under the License. */ #ifdef _WIN32 #define cpuid(reg, x) __cpuidex(reg, x, 0) #else -#ifndef WITH_NV_JETSON +#if !defined(WITH_NV_JETSON) && !defined(PADDLE_WITH_ARM) #include inline void cpuid(int reg[4], int x) { __cpuid_count(x, 0, reg[0], reg[1], reg[2], reg[3]); diff --git a/python/setup.py.in b/python/setup.py.in index f1e9457c19db5c085eb24138c1450f468baa1ec4..4fbfab17d58e292089522909aa11c1dd7b55ce9f 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -6,6 +6,7 @@ import shutil import sys import fnmatch import errno +import platform from contextlib import contextmanager from setuptools import Command @@ -310,8 +311,9 @@ if '${CMAKE_BUILD_TYPE}' == 'Release': command = "install_name_tool -id \"@loader_path/../libs/\" ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so' else: command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so' - if os.system(command) != 0: - raise Exception("patch ${FLUID_CORE_NAME}.%s failed, command: %s" % (ext_name, command)) + if platform.machine() != 'aarch64': + if os.system(command) != 0: + raise Exception("patch ${FLUID_CORE_NAME}.%s failed, command: %s" % (ext_name, command)) ext_modules = [Extension('_foo', ['stub.cc'])] if os.name == 'nt':