diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d1c3d4913bf4479aba70b01b81fc3e7200ccb47..c8e32f4c5321912805b505ea2bc7a347a528120d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,6 +69,7 @@ option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(ON_INFER "Turn on inference optimization." OFF) ################################ Internal Configurations ####################################### option(WITH_AMD_GPU "Compile PaddlePaddle with AMD GPU" OFF) +option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF) option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 09d713642a153c39a3125f5fc44890a2fedee923..0c2822557d4c5daaf55896a84468727aa7a6e4ab 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -2,11 +2,20 @@ if(NOT WITH_GPU) return() endif() -set(paddle_known_gpu_archs "30 35 50 52 60 61 70") -set(paddle_known_gpu_archs7 "30 35 50 52") -set(paddle_known_gpu_archs8 "30 35 50 52 60 61") -set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70") -set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75") + +if (WITH_NV_JETSON) + set(paddle_known_gpu_archs "53 62 72") + set(paddle_known_gpu_archs7 "53") + set(paddle_known_gpu_archs8 "53 62") + set(paddle_known_gpu_archs9 "53 62") + set(paddle_known_gpu_archs10 "53 62 72") +else() + set(paddle_known_gpu_archs "30 35 50 52 60 61 70") + set(paddle_known_gpu_archs7 "30 35 50 52") + set(paddle_known_gpu_archs8 "30 35 50 52 60 61") + set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70") + set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75") +endif() ###################################################################################### # A function for automatic detection of GPUs installed (if autodetection is enabled) diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 1d40ad108fbfba69d86bfa16be6e46ddfbdc25da..44f9c5a3a4ccd8847c4c4e20e9178599f7f50245 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -33,7 +33,7 @@ IF(NOT ${CBLAS_FOUND}) IF (NOT WIN32) SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable") - SET(OPENBLAS_COMMIT "v0.2.20") + SET(OPENBLAS_COMMIT "v0.3.7") IF(APPLE) SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}") @@ -54,7 +54,6 @@ IF(NOT ${CBLAS_FOUND}) BUILD_IN_SOURCE 1 BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} ${COMMON_ARGS} ${OPTIONAL_ARGS} INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 NO_LAPACK=1 PREFIX= - && rm -r ${CBLAS_INSTALL_DIR}/lib/cmake ${CBLAS_INSTALL_DIR}/lib/pkgconfig UPDATE_COMMAND "" CONFIGURE_COMMAND "" ) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 2559fab37f27cb1c3cd38469512a45f0235cc701..1982812568a42bca47690b96c6a0208eca5f0d67 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -187,7 +187,9 @@ set(GPU_COMMON_FLAGS -Wno-error=unused-function # Warnings in Numpy Header. -Wno-error=array-bounds # Warnings in Eigen::array ) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64") +if (NOT WITH_NV_JETSON) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64") +endif() endif(NOT WIN32) if (APPLE) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index e0e5e18890f25573bc170d811b2b5b31e16e188f..e7099c543d885a9d5f1210f0c2c8fa9bee043afb 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -140,26 +140,28 @@ cc_library(op_call_stack SRCS op_call_stack.cc DEPS op_proto_maker enforce) nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) -py_proto_compile(framework_py_proto SRCS framework.proto data_feed.proto) -py_proto_compile(trainer_py_proto SRCS trainer_desc.proto data_feed.proto) +if(WITH_PYTHON) + py_proto_compile(framework_py_proto SRCS framework.proto data_feed.proto) + py_proto_compile(trainer_py_proto SRCS trainer_desc.proto data_feed.proto) #Generate an empty \ #__init__.py to make framework_py_proto as a valid python module. -add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) -add_dependencies(framework_py_proto framework_py_proto_init) -if (NOT WIN32) - add_custom_command(TARGET framework_py_proto POST_BUILD + add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) + add_dependencies(framework_py_proto framework_py_proto_init) + if (NOT WIN32) + add_custom_command(TARGET framework_py_proto POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/ COMMENT "Copy generated python proto into directory paddle/fluid/proto." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -else(NOT WIN32) - string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/") - add_custom_command(TARGET framework_py_proto POST_BUILD + else(NOT WIN32) + string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/") + add_custom_command(TARGET framework_py_proto POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto COMMAND copy /Y *.py ${proto_dstpath} COMMENT "Copy generated python proto into directory paddle/fluid/proto." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -endif(NOT WIN32) + endif(NOT WIN32) +endif() cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) diff --git a/paddle/fluid/framework/io/shell.cc b/paddle/fluid/framework/io/shell.cc index ce0c3a767da3ca1331579e8f7d6a61ae3c71053d..53afa2ce71c2ae7aa5c9074265051ad16c50c0cf 100644 --- a/paddle/fluid/framework/io/shell.cc +++ b/paddle/fluid/framework/io/shell.cc @@ -19,7 +19,7 @@ namespace framework { std::shared_ptr shell_fopen(const std::string& path, const std::string& mode) { -#if defined _WIN32 || defined __APPLE__ +#if defined _WIN32 || defined __APPLE__ || defined PADDLE_ARM return nullptr; #else if (shell_verbose()) { @@ -44,7 +44,7 @@ std::shared_ptr shell_fopen(const std::string& path, // The implementation is async signal safe // Mostly copy from CPython code static int close_open_fds_internal() { -#if defined _WIN32 || defined __APPLE__ +#if defined _WIN32 || defined __APPLE__ || defined PADDLE_ARM return 0; #else struct linux_dirent { diff --git a/paddle/fluid/framework/io/shell.h b/paddle/fluid/framework/io/shell.h index 46fcc92bafa84e4c1b89e4603fe0db364572b73e..0aebe86c4c51a85bed436e2e550ecc47d08ce88c 100644 --- a/paddle/fluid/framework/io/shell.h +++ b/paddle/fluid/framework/io/shell.h @@ -31,6 +31,11 @@ #include "paddle/fluid/platform/port.h" #include "paddle/fluid/string/string_helper.h" +#if defined(__arm__) || defined(__aarch64__) || defined(__ARM_NEON) || \ + defined(__ARM_NEON__) +#define PADDLE_ARM +#endif + namespace paddle { namespace framework { diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 753c87253237b1f2b46d657605efed98d7302fe9..513f58e9beb81d697de41bba9d5072410b6a4a46 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -500,6 +500,8 @@ std::unique_ptr CreatePaddlePredictor< std::string flag = "--fraction_of_gpu_memory_to_use=" + std::to_string(fraction_of_gpu_memory); flags.push_back(flag); + // use auto growth strategy here. + flags.push_back("--allocator_strategy=auto_growth"); flags.push_back("--cudnn_deterministic=True"); VLOG(3) << "set flag: " << flag; framework::InitGflags(flags); diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 2c94f939b74e68ea54498c2b8b84b297f757378d..12994ce422cfa12e15cd9911d5e169d433513529 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -1,24 +1,27 @@ proto_library(profiler_proto SRCS profiler.proto DEPS framework_proto simple_threadpool) -py_proto_compile(profiler_py_proto SRCS profiler.proto) proto_library(error_codes_proto SRCS error_codes.proto) -add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) -add_dependencies(profiler_py_proto profiler_py_proto_init) -if (NOT WIN32) -add_custom_command(TARGET profiler_py_proto POST_BUILD +if (WITH_PYTHON) + py_proto_compile(profiler_py_proto SRCS profiler.proto) + add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) + add_dependencies(profiler_py_proto profiler_py_proto_init) + + if (NOT WIN32) + add_custom_command(TARGET profiler_py_proto POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -else(NOT WIN32) -string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler/") -add_custom_command(TARGET profiler_py_proto POST_BUILD + else(NOT WIN32) + string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler/") + add_custom_command(TARGET profiler_py_proto POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler COMMAND copy /Y *.py ${proto_dstpath} COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -endif(NOT WIN32) + endif(NOT WIN32) +endif() cc_library(flags SRCS flags.cc DEPS gflags) diff --git a/paddle/fluid/platform/float16.h b/paddle/fluid/platform/float16.h index c203f4e04a28452807a42bbdaf75e89977772a04..496eb78f20ef7bd25db07f68bb15202b2f7f2972 100644 --- a/paddle/fluid/platform/float16.h +++ b/paddle/fluid/platform/float16.h @@ -38,24 +38,6 @@ limitations under the License. */ #include #endif -#if defined(__arm__) || defined(__aarch64__) -#define PADDLE_ARM -#endif - -#if defined(__ARM_NEON) || defined(__ARM_NEON__) -#define PADDLE_NEON -#include -#endif - -#if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \ - (PADDLE_GNUC_VER >= 62 || PADDLE_CLANG_VER >= 37) -#define PADDLE_WITH_NATIVE_FP16 -#endif - -#ifndef PADDLE_ARM -#include -#endif // PADDLE_ARM - #if !defined(_WIN32) #define PADDLE_ALIGN(x) __attribute__((aligned(x))) #else