From 1fd0ca82f938f8e5de0bdafcce94367813076c4a Mon Sep 17 00:00:00 2001 From: wopeizl Date: Fri, 16 Aug 2019 14:07:26 +0800 Subject: [PATCH] merge from develop: add tensorrt support for win test=develop (#19172) * merge from develop: add tensorrt support for win test=develop --- cmake/configure.cmake | 22 ++++++++++++------- cmake/inference_lib.cmake | 2 +- cmake/tensorrt.cmake | 18 +++++++++++++-- .../inference/anakin/convert/op_converter.h | 2 +- .../inference/api/demo_ci/CMakeLists.txt | 12 ++++++++++ .../inference/tensorrt/convert/op_converter.h | 6 ++--- paddle/fluid/inference/tensorrt/engine.h | 4 ++-- .../tensorrt/plugin/prelu_op_plugin.cu | 1 + .../tensorrt/plugin/trt_plugin_factory.h | 2 +- .../fluid/platform/dynload/dynamic_loader.cc | 2 ++ paddle/fluid/platform/dynload/tensorrt.h | 4 +++- python/setup.py.in | 5 +++++ 12 files changed, 61 insertions(+), 19 deletions(-) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 279f1eba3f5..5ee0ece0564 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -88,14 +88,20 @@ if(WITH_GPU) include_directories(${CUDA_TOOLKIT_INCLUDE}) if(TENSORRT_FOUND) - if(${CUDA_VERSION_MAJOR} VERSION_LESS 8) - message(FATAL_ERROR "TensorRT needs CUDA >= 8.0 to compile") - endif() - if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) - message(FATAL_ERROR "TensorRT needs CUDNN >= 7.0 to compile") - endif() - if(${TENSORRT_MAJOR_VERSION} VERSION_LESS 4) - message(FATAL_ERROR "Paddle needs TensorRT >= 4.0 to compile") + if(WIN32) + if(${CUDA_VERSION_MAJOR} VERSION_LESS 9) + message(FATAL_ERROR "TensorRT needs CUDA >= 9.0 to compile on Windows") + endif() + else() + if(${CUDA_VERSION_MAJOR} VERSION_LESS 8) + message(FATAL_ERROR "TensorRT needs CUDA >= 8.0 to compile") + endif() + if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) + message(FATAL_ERROR "TensorRT needs CUDNN >= 7.0 to compile") + endif() + if(${TENSORRT_MAJOR_VERSION} VERSION_LESS 4) + message(FATAL_ERROR "Paddle needs TensorRT >= 4.0 to compile") + endif() endif() include_directories(${TENSORRT_INCLUDE_DIR}) endif() diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index a854e27c42c..bd46bac0b36 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -218,7 +218,7 @@ endif () if (TENSORRT_FOUND) copy(tensorrt_lib DEPS ${inference_deps} - SRCS ${TENSORRT_ROOT}/include/Nv*.h ${TENSORRT_ROOT}/lib/libnvinfer* + SRCS ${TENSORRT_ROOT}/include/Nv*.h ${TENSORRT_ROOT}/lib/*nvinfer* DSTS ${FLUID_INSTALL_DIR}/third_party/install/tensorrt/include ${FLUID_INSTALL_DIR}/third_party/install/tensorrt/lib) endif () diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake index 3bf12094e4c..fc97fcbf20a 100644 --- a/cmake/tensorrt.cmake +++ b/cmake/tensorrt.cmake @@ -2,14 +2,28 @@ if(NOT WITH_GPU) return() endif() -set(TENSORRT_ROOT "/usr" CACHE PATH "TENSORRT ROOT") +if(WIN32) + if("${TENSORRT_ROOT}" STREQUAL "") + message(WARNING "Please specify the TensorRT root path: TENSORRT_ROOT.") + endif() + string(REPLACE "\\" "/" TENSORRT_ROOT "${TENSORRT_ROOT}") + set(TR_INFER_LIB nvinfer.lib) + set(TR_INFER_RT nvinfer.dll) + set(TR_INFER_PLUGIN_RT nvinfer_plugin.dll) +else() + set(TENSORRT_ROOT "/usr" CACHE PATH "TENSORRT ROOT") + set(TR_INFER_LIB libnvinfer.a) + set(TR_INFER_RT libnvinfer.so) + set(TR_INFER_PLUGIN_RT libnvinfer_plugin.so) +endif() + find_path(TENSORRT_INCLUDE_DIR NvInfer.h PATHS ${TENSORRT_ROOT} ${TENSORRT_ROOT}/include $ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/include NO_DEFAULT_PATH ) -find_library(TENSORRT_LIBRARY NAMES libnvinfer.so libnvinfer.a +find_library(TENSORRT_LIBRARY NAMES ${TR_INFER_LIB} ${TR_INFER_RT} PATHS ${TENSORRT_ROOT} ${TENSORRT_ROOT}/lib $ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/lib NO_DEFAULT_PATH diff --git a/paddle/fluid/inference/anakin/convert/op_converter.h b/paddle/fluid/inference/anakin/convert/op_converter.h index 1058e744bca..9692f42779c 100644 --- a/paddle/fluid/inference/anakin/convert/op_converter.h +++ b/paddle/fluid/inference/anakin/convert/op_converter.h @@ -219,7 +219,7 @@ template class AnakinOpConverter<::anakin::saber::X86, #define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__, precision_type__) \ extern int Touch_anakin_##op_type__##_##place_type__##_##precision_type__(); \ int use_converter_anakin_##op_type__##_##place_type__##_##precision_type__ \ - __attribute__((unused)) = \ + UNUSED = \ Touch_anakin_##op_type__##_##place_type__##_##precision_type__(); #if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE) diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index c8306b8794e..74d92e5881c 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -141,6 +141,10 @@ if(WITH_GPU) endif() set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) else() + if (USE_TENSORRT) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} ) @@ -150,6 +154,14 @@ endif() add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) target_link_libraries(${DEMO_NAME} ${DEPS}) if(WIN32) + if(USE_TENSORRT) + add_custom_command(TARGET ${DEMO_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_SHARED_LIBRARY_SUFFIX} + ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} + COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX} + ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} + ) + endif() if(WITH_MKL) add_custom_command(TARGET ${DEMO_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index f89b0d7efe2..3a2deae3606 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -225,7 +225,7 @@ class OpConverter { return 0; \ } -#define USE_TRT_CONVERTER(op_type__) \ - extern int TouchConverterRegister_##op_type__(); \ - static int use_op_converter_trt_##op_type__ __attribute__((unused)) = \ +#define USE_TRT_CONVERTER(op_type__) \ + extern int TouchConverterRegister_##op_type__(); \ + static int use_op_converter_trt_##op_type__ UNUSED = \ TouchConverterRegister_##op_type__(); diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 80af463d274..d0a759d8503 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -216,8 +216,8 @@ class TensorRTEngine { // TensorRT has too many layers, so that is not wise to add member functions for // them, and an macro like this is more extensible when underlying TensorRT // library add new layer supports. -#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \ - engine__->network()->add##layer__(ARGS); +#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ...) \ + engine__->network()->add##layer__(__VA_ARGS__); class TRTEngineManager { public: diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu index b8a044fe99b..84f938eeb5f 100644 --- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu @@ -34,6 +34,7 @@ int PReluPlugin::initialize() { cudaMalloc(&p_gpu_weight_, sizeof(float) * weight_.size()); cudaMemcpy(p_gpu_weight_, weight_.data(), weight_.size() * sizeof(float), cudaMemcpyHostToDevice); + return 0; } nvinfer1::Dims PReluPlugin::getOutputDimensions(int index, diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h index 139c75595f9..ed825801fc4 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h @@ -68,7 +68,7 @@ class TrtPluginRegistrar { #define REGISTER_TRT_PLUGIN_UNIQ(ctr, name, deserialize_func) \ static paddle::inference::tensorrt::plugin::TrtPluginRegistrar \ - trt_plugin_registrar##ctr __attribute__((unused)) = \ + trt_plugin_registrar##ctr UNUSED = \ paddle::inference::tensorrt::plugin::TrtPluginRegistrar( \ name, deserialize_func) diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc index f7ad7a4f3ae..c2d6abb346c 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.cc +++ b/paddle/fluid/platform/dynload/dynamic_loader.cc @@ -247,6 +247,8 @@ void* GetNCCLDsoHandle() { void* GetTensorRtDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) return GetDsoHandleFromSearchPath(FLAGS_tensorrt_dir, "libnvinfer.dylib"); +#elif defined(_WIN32) + return GetDsoHandleFromSearchPath(FLAGS_mklml_dir, "nvinfer.dll"); #else return GetDsoHandleFromSearchPath(FLAGS_tensorrt_dir, "libnvinfer.so"); #endif diff --git a/paddle/fluid/platform/dynload/tensorrt.h b/paddle/fluid/platform/dynload/tensorrt.h index 751aa54b1ad..4c7ba0f054c 100644 --- a/paddle/fluid/platform/dynload/tensorrt.h +++ b/paddle/fluid/platform/dynload/tensorrt.h @@ -14,7 +14,9 @@ limitations under the License. */ #pragma once #include +#if !defined(_WIN32) #include +#endif #include // NOLINT @@ -34,7 +36,7 @@ extern void* tensorrt_dso_handle; struct DynLoad__##__name { \ template \ auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ - using tensorrt_func = decltype(__name(args...)) (*)(Args...); \ + using tensorrt_func = decltype(&::__name); \ std::call_once(tensorrt_dso_flag, []() { \ tensorrt_dso_handle = \ paddle::platform::dynload::GetTensorRtDsoHandle(); \ diff --git a/python/setup.py.in b/python/setup.py.in index 82b5f6d8ea1..6828d593342 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -166,6 +166,11 @@ package_data['paddle.libs']= [] package_data['paddle.libs']=[('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name] shutil.copy('${WARPCTC_LIBRARIES}', libs_path) +if '${TENSORRT_FOUND}' == 'ON' and os.name == 'nt': + shutil.copy(os.path.join('${TENSORRT_ROOT}', 'lib', '${TR_INFER_RT}'), libs_path) + shutil.copy(os.path.join('${TENSORRT_ROOT}', 'lib', '${TR_INFER_PLUGIN_RT}'), libs_path) + package_data['paddle.libs'] += ['${TR_INFER_RT}', '${TR_INFER_PLUGIN_RT}'] + if '${WITH_MKL}' == 'ON': shutil.copy('${MKLML_SHARED_LIB}', libs_path) shutil.copy('${MKLML_SHARED_IOMP_LIB}', libs_path) -- GitLab