From 7635d6993e1a5a7fea1b1b6fb7c9c3ee97ae4add Mon Sep 17 00:00:00 2001 From: yongqiangma Date: Wed, 20 Nov 2019 11:27:29 +0800 Subject: [PATCH] support build C++ cuda shared lib (#2401) * support build C++ cuda shared lib --- cmake/generic.cmake | 5 ++++- cmake/lite.cmake | 1 + lite/CMakeLists.txt | 3 +++ lite/api/CMakeLists.txt | 14 ++++++++++---- lite/core/context.h | 7 +++++++ lite/tools/build.sh | 6 +++--- 6 files changed, 28 insertions(+), 8 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 415eb451a9..225a3c19a1 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -490,6 +490,9 @@ function(nv_binary TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS}) + target_link_libraries(${TARGET_NAME} ${CUDNN_LIBRARY} ${CUBLAS_LIBRARIES}) + get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) + target_link_libraries(${TARGET_NAME} ${os_dependency_modules}) if(nv_binary_DEPS) target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS}) add_dependencies(${TARGET_NAME} ${nv_binary_DEPS}) @@ -507,7 +510,7 @@ function(nv_test TARGET_NAME) cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} lite_gtest_main gtest -gflags glog ${os_dependency_modules} ${CUDNN_LIBRARY} ${CUBLAS_LIBRARIES} ) + gflags glog ${os_dependency_modules} ${CUDNN_LIBRARY} ${CUBLAS_LIBRARIES} ) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} lite_gtest_main gtest gflags glog) common_link(${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME}) diff --git a/cmake/lite.cmake b/cmake/lite.cmake index 9cf8b12635..a095eea6d1 100644 --- a/cmake/lite.cmake +++ b/cmake/lite.cmake @@ -248,6 +248,7 @@ endfunction() set(arm_kernels CACHE INTERNAL "arm kernels") set(x86_kernels CACHE INTERNAL "x86 kernels") +set(cuda_kernels CACHE INTERNAL "cuda kernels") set(fpga_kernels CACHE INTERNAL "fpga kernels") set(npu_kernels CACHE INTERNAL "npu kernels") set(xpu_kernels CACHE INTERNAL "xpu kernels") diff --git a/lite/CMakeLists.txt b/lite/CMakeLists.txt index 60edfc357c..61f07583b2 100644 --- a/lite/CMakeLists.txt +++ b/lite/CMakeLists.txt @@ -117,6 +117,9 @@ if (LITE_WITH_X86) add_dependencies(publish_inference_x86_cxx_demos paddle_full_api_shared eigen3) endif() +if(LITE_WITH_CUDA) + add_dependencies(publish_inference paddle_full_api_shared) +endif(LITE_WITH_CUDA) if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM) if (NOT LITE_ON_TINY_PUBLISH) # add cxx lib diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt index c79927ba10..63d53869ea 100644 --- a/lite/api/CMakeLists.txt +++ b/lite/api/CMakeLists.txt @@ -9,7 +9,7 @@ if (LITE_ON_TINY_PUBLISH) set(CMAKE_C_FLAGS_RELEASE "-Os -DNDEBUG") endif() set(light_lib_DEPS light_api paddle_api paddle_api_light optimizer) -if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_X86 OR ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armlinux")) +if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armlinux")) #full api dynamic library add_library(paddle_full_api_shared SHARED "") target_sources(paddle_full_api_shared PUBLIC ${__lite_cc_files} paddle_api.cc light_api.cc cxx_api.cc cxx_api_impl.cc light_api_impl.cc) @@ -19,7 +19,9 @@ if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_X86 OR ARM_TARGET_OS STREQUAL "and add_dependencies(paddle_full_api_shared xxhash) target_link_libraries(paddle_full_api_shared xxhash) endif() - + if(LITE_WITH_CUDA) + target_link_libraries(paddle_full_api_shared ${math_cuda} "-Wl,--whole-archive" ${cuda_kernels} "-Wl,--no-whole-archive") + endif(LITE_WITH_CUDA) #light api dynamic library lite_cc_library(paddle_light_api_shared MODULE SRCS light_api_shared.cc @@ -59,6 +61,7 @@ endif() message(STATUS "get ops ${ops}") message(STATUS "get X86 kernels ${x86_kernels}") +message(STATUS "get CUDA kernels ${cuda_kernels}") message(STATUS "get Host kernels ${host_kernels}") message(STATUS "get ARM kernels ${arm_kernels}") message(STATUS "get NPU kernels ${npu_kernels}") @@ -289,7 +292,8 @@ if(NOT IOS) XPU_DEPS ${xpu_kernels} CL_DEPS ${opencl_kernels} FPGA_DEPS ${fpga_kernels} - X86_DEPS ${x86_kernels}) + X86_DEPS ${x86_kernels} + CUDA_DEPS ${cuda_kernels}) lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags utils ${ops} ${host_kernels} ARM_DEPS ${arm_kernels} @@ -297,7 +301,9 @@ if(NOT IOS) XPU_DEPS ${xpu_kernels} CL_DEPS ${opencl_kernels} FPGA_DEPS ${fpga_kernels} - X86_DEPS ${x86_kernels}) + X86_DEPS ${x86_kernels} + CUDA_DEPS ${cuda_kernels}) + endif() #lite_cc_binary(cxx_api_bin SRCS cxx_api_bin.cc diff --git a/lite/core/context.h b/lite/core/context.h index eb25e7e1d9..545c6d2e88 100644 --- a/lite/core/context.h +++ b/lite/core/context.h @@ -207,6 +207,13 @@ class Context { ctx->cublas_fp32_ = cublas_fp32_; } + CUDAContext& operator=(const CUDAContext& context) { + this->Init( + context.device_id_, context.exec_stream_id_, context.io_stream_id_); + this->cublas_fp32_ = context.cublas_fp32_; + return *this; + } + const cudaStream_t& exec_stream() const { return exec_stream_; } void SetExecStream(cudaStream_t stream) { exec_stream_ = stream; } diff --git a/lite/tools/build.sh b/lite/tools/build.sh index 90baac40e9..319f26ff82 100755 --- a/lite/tools/build.sh +++ b/lite/tools/build.sh @@ -237,10 +237,10 @@ function make_cuda { -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF \ -DWITH_TESTING=OFF \ -DLITE_WITH_ARM=OFF \ - -DLITE_WITH_PYTHON=ON \ + -DLITE_WITH_PYTHON=${BUILD_PYTHON} \ -DLITE_BUILD_EXTRA=ON - - make publish_inference_python_lib -j8 + + make publish_inference -j4 cd - } -- GitLab