未验证 提交 c5f0293c 编写于 作者: Z Zhaolong Xing 提交者: GitHub

NV jetson(nano, tx2, xavier) inference compile support (#21393)

* add jeston compile support
test=develop

* refine the cmake
test=develop
上级 4bf115b4
......@@ -69,6 +69,7 @@ option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(ON_INFER "Turn on inference optimization and inference-lib generation" OFF)
################################ Internal Configurations #######################################
option(WITH_AMD_GPU "Compile PaddlePaddle with AMD GPU" OFF)
option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
......
......@@ -2,11 +2,20 @@ if(NOT WITH_GPU)
return()
endif()
set(paddle_known_gpu_archs "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs7 "30 35 50 52")
set(paddle_known_gpu_archs8 "30 35 50 52 60 61")
set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75")
if (WITH_NV_JETSON)
set(paddle_known_gpu_archs "53 62 72")
set(paddle_known_gpu_archs7 "53")
set(paddle_known_gpu_archs8 "53 62")
set(paddle_known_gpu_archs9 "53 62")
set(paddle_known_gpu_archs10 "53 62 72")
else()
set(paddle_known_gpu_archs "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs7 "30 35 50 52")
set(paddle_known_gpu_archs8 "30 35 50 52 60 61")
set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75")
endif()
######################################################################################
# A function for automatic detection of GPUs installed (if autodetection is enabled)
......
......@@ -187,7 +187,9 @@ set(GPU_COMMON_FLAGS
-Wno-error=unused-function # Warnings in Numpy Header.
-Wno-error=array-bounds # Warnings in Eigen::array
)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
if (NOT WITH_NV_JETSON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
endif()
endif(NOT WIN32)
if (APPLE)
......
......@@ -149,26 +149,28 @@ cc_library(op_call_stack SRCS op_call_stack.cc DEPS op_proto_maker enforce)
nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
py_proto_compile(framework_py_proto SRCS framework.proto data_feed.proto)
py_proto_compile(trainer_py_proto SRCS trainer_desc.proto data_feed.proto)
if(WITH_PYTHON)
py_proto_compile(framework_py_proto SRCS framework.proto data_feed.proto)
py_proto_compile(trainer_py_proto SRCS trainer_desc.proto data_feed.proto)
#Generate an empty \
#__init__.py to make framework_py_proto as a valid python module.
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(framework_py_proto framework_py_proto_init)
if (NOT WIN32)
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(framework_py_proto framework_py_proto_init)
if (NOT WIN32)
add_custom_command(TARGET framework_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto
COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/
COMMENT "Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
else(NOT WIN32)
else(NOT WIN32)
string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/")
add_custom_command(TARGET framework_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto
COMMAND copy /Y *.py ${proto_dstpath}
COMMENT "Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif(NOT WIN32)
endif(NOT WIN32)
endif()
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
......
......@@ -19,7 +19,7 @@ namespace framework {
std::shared_ptr<FILE> shell_fopen(const std::string& path,
const std::string& mode) {
#if defined _WIN32 || defined __APPLE__
#if defined _WIN32 || defined __APPLE__ || defined PADDLE_ARM
return nullptr;
#else
if (shell_verbose()) {
......@@ -44,7 +44,7 @@ std::shared_ptr<FILE> shell_fopen(const std::string& path,
// The implementation is async signal safe
// Mostly copy from CPython code
static int close_open_fds_internal() {
#if defined _WIN32 || defined __APPLE__
#if defined _WIN32 || defined __APPLE__ || defined PADDLE_ARM
return 0;
#else
struct linux_dirent {
......
......@@ -31,6 +31,11 @@
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/string_helper.h"
#if defined(__arm__) || defined(__aarch64__) || defined(__ARM_NEON) || \
defined(__ARM_NEON__)
#define PADDLE_ARM
#endif
namespace paddle {
namespace framework {
......
proto_library(profiler_proto SRCS profiler.proto DEPS framework_proto simple_threadpool)
py_proto_compile(profiler_py_proto SRCS profiler.proto)
proto_library(error_codes_proto SRCS error_codes.proto)
add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(profiler_py_proto profiler_py_proto_init)
if (NOT WIN32)
add_custom_command(TARGET profiler_py_proto POST_BUILD
if (WITH_PYTHON)
py_proto_compile(profiler_py_proto SRCS profiler.proto)
add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(profiler_py_proto profiler_py_proto_init)
if (NOT WIN32)
add_custom_command(TARGET profiler_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
else(NOT WIN32)
string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler/")
add_custom_command(TARGET profiler_py_proto POST_BUILD
else(NOT WIN32)
string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler/")
add_custom_command(TARGET profiler_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
COMMAND copy /Y *.py ${proto_dstpath}
COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif(NOT WIN32)
endif(NOT WIN32)
endif()
cc_library(flags SRCS flags.cc DEPS gflags)
......
......@@ -38,24 +38,6 @@ limitations under the License. */
#include <cuda_fp16.h>
#endif
#if defined(__arm__) || defined(__aarch64__)
#define PADDLE_ARM
#endif
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
#define PADDLE_NEON
#include <arm_neon.h>
#endif
#if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \
(PADDLE_GNUC_VER >= 62 || PADDLE_CLANG_VER >= 37)
#define PADDLE_WITH_NATIVE_FP16
#endif
#ifndef PADDLE_ARM
#include <immintrin.h>
#endif // PADDLE_ARM
#if !defined(_WIN32)
#define PADDLE_ALIGN(x) __attribute__((aligned(x)))
#else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册