未验证 提交 4c964abd 编写于 作者: W Wilber 提交者: GitHub

support build on arm. test=develop (#25212)

上级 f78e161e
...@@ -107,6 +107,7 @@ option(SANITIZER_TYPE "Choose the type of sanitizer, options are: Address, Leak, ...@@ -107,6 +107,7 @@ option(SANITIZER_TYPE "Choose the type of sanitizer, options are: Address, Leak,
option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF) option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF)
option(WITH_NCCL "Compile PaddlePaddle with NCCL support" ON) option(WITH_NCCL "Compile PaddlePaddle with NCCL support" ON)
option(WITH_CRYPTO "Compile PaddlePaddle with crypto support" ON) option(WITH_CRYPTO "Compile PaddlePaddle with crypto support" ON)
option(WITH_ARM "Compile PaddlePaddle with arm support" OFF)
# PY_VERSION # PY_VERSION
if(NOT PY_VERSION) if(NOT PY_VERSION)
...@@ -213,6 +214,12 @@ if(WITH_AMD_GPU) ...@@ -213,6 +214,12 @@ if(WITH_AMD_GPU)
include(hip) include(hip)
endif(WITH_AMD_GPU) endif(WITH_AMD_GPU)
if(WITH_ARM)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
add_definitions(-DPADDLE_WITH_ARM)
endif()
set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
......
...@@ -19,6 +19,9 @@ SET(CBLAS_SOURCE_DIR ${THIRD_PARTY_PATH}/openblas/src/extern_openblas) ...@@ -19,6 +19,9 @@ SET(CBLAS_SOURCE_DIR ${THIRD_PARTY_PATH}/openblas/src/extern_openblas)
SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
SET(CBLAS_REPOSITORY https://github.com/xianyi/OpenBLAS.git) SET(CBLAS_REPOSITORY https://github.com/xianyi/OpenBLAS.git)
SET(CBLAS_TAG v0.3.7) SET(CBLAS_TAG v0.3.7)
IF(WITH_ARM)
SET(CBLAS_TAG v0.2.18)
ENDIF()
cache_third_party(extern_openblas cache_third_party(extern_openblas
REPOSITORY ${CBLAS_REPOSITORY} REPOSITORY ${CBLAS_REPOSITORY}
TAG ${CBLAS_TAG} TAG ${CBLAS_TAG}
......
...@@ -187,7 +187,7 @@ set(GPU_COMMON_FLAGS ...@@ -187,7 +187,7 @@ set(GPU_COMMON_FLAGS
-Wno-error=unused-function # Warnings in Numpy Header. -Wno-error=unused-function # Warnings in Numpy Header.
-Wno-error=array-bounds # Warnings in Eigen::array -Wno-error=array-bounds # Warnings in Eigen::array
) )
if (NOT WITH_NV_JETSON) if (NOT WITH_NV_JETSON AND NOT WITH_ARM)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
endif() endif()
endif(NOT WIN32) endif(NOT WIN32)
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#if !defined(PADDLE_WITH_ARM)
#include <immintrin.h> #include <immintrin.h>
#endif
#include <cfloat> #include <cfloat>
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
...@@ -72,6 +74,8 @@ void call_gemm_batched(const framework::ExecutionContext& ctx, ...@@ -72,6 +74,8 @@ void call_gemm_batched(const framework::ExecutionContext& ctx,
} }
} }
#if !defined(PADDLE_WITH_ARM)
#define __m256x __m256 #define __m256x __m256
static const unsigned int AVX_STEP_SIZE = 8; static const unsigned int AVX_STEP_SIZE = 8;
...@@ -94,6 +98,8 @@ static const unsigned int SSE_CUT_LEN_MASK = 1U; ...@@ -94,6 +98,8 @@ static const unsigned int SSE_CUT_LEN_MASK = 1U;
#define _mm_store_px _mm_storeu_ps #define _mm_store_px _mm_storeu_ps
#define _mm_load1_px _mm_load1_ps #define _mm_load1_px _mm_load1_ps
#endif
template <typename T> template <typename T>
inline void axpy(const T* x, T* y, size_t len, const T alpha) { inline void axpy(const T* x, T* y, size_t len, const T alpha) {
unsigned int jjj, lll; unsigned int jjj, lll;
...@@ -108,6 +114,8 @@ inline void axpy(const T* x, T* y, size_t len, const T alpha) { ...@@ -108,6 +114,8 @@ inline void axpy(const T* x, T* y, size_t len, const T alpha) {
_mm256_add_px(_mm256_load_px(y + jjj), _mm256_add_px(_mm256_load_px(y + jjj),
_mm256_mul_px(mm_alpha, _mm256_load_px(x + jjj)))); _mm256_mul_px(mm_alpha, _mm256_load_px(x + jjj))));
} }
#elif defined(PADDLE_WITH_ARM)
PADDLE_THROW(platform::errors::Unimplemented("axpy is not supported"));
#else #else
lll = len & ~SSE_CUT_LEN_MASK; lll = len & ~SSE_CUT_LEN_MASK;
__m128x mm_alpha = _mm_load1_px(&alpha); __m128x mm_alpha = _mm_load1_px(&alpha);
...@@ -135,6 +143,8 @@ inline void axpy_noadd(const T* x, T* y, size_t len, const T alpha) { ...@@ -135,6 +143,8 @@ inline void axpy_noadd(const T* x, T* y, size_t len, const T alpha) {
for (jjj = 0; jjj < lll; jjj += AVX_STEP_SIZE) { for (jjj = 0; jjj < lll; jjj += AVX_STEP_SIZE) {
_mm256_store_px(y + jjj, _mm256_mul_px(mm_alpha, _mm256_load_px(x + jjj))); _mm256_store_px(y + jjj, _mm256_mul_px(mm_alpha, _mm256_load_px(x + jjj)));
} }
#elif defined(PADDLE_WITH_ARM)
PADDLE_THROW(platform::errors::Unimplemented("axpy_noadd is not supported"));
#else #else
lll = len & ~SSE_CUT_LEN_MASK; lll = len & ~SSE_CUT_LEN_MASK;
__m128x mm_alpha = _mm_load1_px(&alpha); __m128x mm_alpha = _mm_load1_px(&alpha);
......
...@@ -139,7 +139,7 @@ bool MayIUse(const cpu_isa_t cpu_isa) { ...@@ -139,7 +139,7 @@ bool MayIUse(const cpu_isa_t cpu_isa) {
if (cpu_isa == isa_any) { if (cpu_isa == isa_any) {
return true; return true;
} else { } else {
#ifndef WITH_NV_JETSON #if !defined(WITH_NV_JETSON) && !defined(PADDLE_WITH_ARM)
int reg[4]; int reg[4];
cpuid(reg, 0); cpuid(reg, 0);
int nIds = reg[0]; int nIds = reg[0];
......
...@@ -40,7 +40,7 @@ limitations under the License. */ ...@@ -40,7 +40,7 @@ limitations under the License. */
#ifdef _WIN32 #ifdef _WIN32
#define cpuid(reg, x) __cpuidex(reg, x, 0) #define cpuid(reg, x) __cpuidex(reg, x, 0)
#else #else
#ifndef WITH_NV_JETSON #if !defined(WITH_NV_JETSON) && !defined(PADDLE_WITH_ARM)
#include <cpuid.h> #include <cpuid.h>
inline void cpuid(int reg[4], int x) { inline void cpuid(int reg[4], int x) {
__cpuid_count(x, 0, reg[0], reg[1], reg[2], reg[3]); __cpuid_count(x, 0, reg[0], reg[1], reg[2], reg[3]);
......
...@@ -6,6 +6,7 @@ import shutil ...@@ -6,6 +6,7 @@ import shutil
import sys import sys
import fnmatch import fnmatch
import errno import errno
import platform
from contextlib import contextmanager from contextlib import contextmanager
from setuptools import Command from setuptools import Command
...@@ -310,6 +311,7 @@ if '${CMAKE_BUILD_TYPE}' == 'Release': ...@@ -310,6 +311,7 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
command = "install_name_tool -id \"@loader_path/../libs/\" ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so' command = "install_name_tool -id \"@loader_path/../libs/\" ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so'
else: else:
command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so' command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so'
if platform.machine() != 'aarch64':
if os.system(command) != 0: if os.system(command) != 0:
raise Exception("patch ${FLUID_CORE_NAME}.%s failed, command: %s" % (ext_name, command)) raise Exception("patch ${FLUID_CORE_NAME}.%s failed, command: %s" % (ext_name, command))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册