未验证 提交 1fd0ca82 编写于 作者: W wopeizl 提交者: GitHub

merge from develop: add tensorrt support for win test=develop (#19172)

* merge from develop: add tensorrt support for win test=develop
上级 5a86891f
...@@ -88,6 +88,11 @@ if(WITH_GPU) ...@@ -88,6 +88,11 @@ if(WITH_GPU)
include_directories(${CUDA_TOOLKIT_INCLUDE}) include_directories(${CUDA_TOOLKIT_INCLUDE})
if(TENSORRT_FOUND) if(TENSORRT_FOUND)
if(WIN32)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 9)
message(FATAL_ERROR "TensorRT needs CUDA >= 9.0 to compile on Windows")
endif()
else()
if(${CUDA_VERSION_MAJOR} VERSION_LESS 8) if(${CUDA_VERSION_MAJOR} VERSION_LESS 8)
message(FATAL_ERROR "TensorRT needs CUDA >= 8.0 to compile") message(FATAL_ERROR "TensorRT needs CUDA >= 8.0 to compile")
endif() endif()
...@@ -97,6 +102,7 @@ if(WITH_GPU) ...@@ -97,6 +102,7 @@ if(WITH_GPU)
if(${TENSORRT_MAJOR_VERSION} VERSION_LESS 4) if(${TENSORRT_MAJOR_VERSION} VERSION_LESS 4)
message(FATAL_ERROR "Paddle needs TensorRT >= 4.0 to compile") message(FATAL_ERROR "Paddle needs TensorRT >= 4.0 to compile")
endif() endif()
endif()
include_directories(${TENSORRT_INCLUDE_DIR}) include_directories(${TENSORRT_INCLUDE_DIR})
endif() endif()
if(WITH_ANAKIN) if(WITH_ANAKIN)
......
...@@ -218,7 +218,7 @@ endif () ...@@ -218,7 +218,7 @@ endif ()
if (TENSORRT_FOUND) if (TENSORRT_FOUND)
copy(tensorrt_lib DEPS ${inference_deps} copy(tensorrt_lib DEPS ${inference_deps}
SRCS ${TENSORRT_ROOT}/include/Nv*.h ${TENSORRT_ROOT}/lib/libnvinfer* SRCS ${TENSORRT_ROOT}/include/Nv*.h ${TENSORRT_ROOT}/lib/*nvinfer*
DSTS ${FLUID_INSTALL_DIR}/third_party/install/tensorrt/include ${FLUID_INSTALL_DIR}/third_party/install/tensorrt/lib) DSTS ${FLUID_INSTALL_DIR}/third_party/install/tensorrt/include ${FLUID_INSTALL_DIR}/third_party/install/tensorrt/lib)
endif () endif ()
......
...@@ -2,14 +2,28 @@ if(NOT WITH_GPU) ...@@ -2,14 +2,28 @@ if(NOT WITH_GPU)
return() return()
endif() endif()
set(TENSORRT_ROOT "/usr" CACHE PATH "TENSORRT ROOT") if(WIN32)
if("${TENSORRT_ROOT}" STREQUAL "")
message(WARNING "Please specify the TensorRT root path: TENSORRT_ROOT.")
endif()
string(REPLACE "\\" "/" TENSORRT_ROOT "${TENSORRT_ROOT}")
set(TR_INFER_LIB nvinfer.lib)
set(TR_INFER_RT nvinfer.dll)
set(TR_INFER_PLUGIN_RT nvinfer_plugin.dll)
else()
set(TENSORRT_ROOT "/usr" CACHE PATH "TENSORRT ROOT")
set(TR_INFER_LIB libnvinfer.a)
set(TR_INFER_RT libnvinfer.so)
set(TR_INFER_PLUGIN_RT libnvinfer_plugin.so)
endif()
find_path(TENSORRT_INCLUDE_DIR NvInfer.h find_path(TENSORRT_INCLUDE_DIR NvInfer.h
PATHS ${TENSORRT_ROOT} ${TENSORRT_ROOT}/include PATHS ${TENSORRT_ROOT} ${TENSORRT_ROOT}/include
$ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/include $ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/include
NO_DEFAULT_PATH NO_DEFAULT_PATH
) )
find_library(TENSORRT_LIBRARY NAMES libnvinfer.so libnvinfer.a find_library(TENSORRT_LIBRARY NAMES ${TR_INFER_LIB} ${TR_INFER_RT}
PATHS ${TENSORRT_ROOT} ${TENSORRT_ROOT}/lib PATHS ${TENSORRT_ROOT} ${TENSORRT_ROOT}/lib
$ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/lib $ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/lib
NO_DEFAULT_PATH NO_DEFAULT_PATH
......
...@@ -219,7 +219,7 @@ template class AnakinOpConverter<::anakin::saber::X86, ...@@ -219,7 +219,7 @@ template class AnakinOpConverter<::anakin::saber::X86,
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__, precision_type__) \ #define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__, precision_type__) \
extern int Touch_anakin_##op_type__##_##place_type__##_##precision_type__(); \ extern int Touch_anakin_##op_type__##_##place_type__##_##precision_type__(); \
int use_converter_anakin_##op_type__##_##place_type__##_##precision_type__ \ int use_converter_anakin_##op_type__##_##place_type__##_##precision_type__ \
__attribute__((unused)) = \ UNUSED = \
Touch_anakin_##op_type__##_##place_type__##_##precision_type__(); Touch_anakin_##op_type__##_##place_type__##_##precision_type__();
#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE) #if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE)
......
...@@ -141,6 +141,10 @@ if(WITH_GPU) ...@@ -141,6 +141,10 @@ if(WITH_GPU)
endif() endif()
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
else() else()
if (USE_TENSORRT)
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_STATIC_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX})
endif()
set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} ) set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} )
...@@ -150,6 +154,14 @@ endif() ...@@ -150,6 +154,14 @@ endif()
add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
target_link_libraries(${DEMO_NAME} ${DEPS}) target_link_libraries(${DEMO_NAME} ${DEPS})
if(WIN32) if(WIN32)
if(USE_TENSORRT)
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}
${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}
${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
)
endif()
if(WITH_MKL) if(WITH_MKL)
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD add_custom_command(TARGET ${DEMO_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
......
...@@ -227,5 +227,5 @@ class OpConverter { ...@@ -227,5 +227,5 @@ class OpConverter {
#define USE_TRT_CONVERTER(op_type__) \ #define USE_TRT_CONVERTER(op_type__) \
extern int TouchConverterRegister_##op_type__(); \ extern int TouchConverterRegister_##op_type__(); \
static int use_op_converter_trt_##op_type__ __attribute__((unused)) = \ static int use_op_converter_trt_##op_type__ UNUSED = \
TouchConverterRegister_##op_type__(); TouchConverterRegister_##op_type__();
...@@ -216,8 +216,8 @@ class TensorRTEngine { ...@@ -216,8 +216,8 @@ class TensorRTEngine {
// TensorRT has too many layers, so that is not wise to add member functions for // TensorRT has too many layers, so that is not wise to add member functions for
// them, and an macro like this is more extensible when underlying TensorRT // them, and an macro like this is more extensible when underlying TensorRT
// library add new layer supports. // library add new layer supports.
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \ #define TRT_ENGINE_ADD_LAYER(engine__, layer__, ...) \
engine__->network()->add##layer__(ARGS); engine__->network()->add##layer__(__VA_ARGS__);
class TRTEngineManager { class TRTEngineManager {
public: public:
......
...@@ -34,6 +34,7 @@ int PReluPlugin::initialize() { ...@@ -34,6 +34,7 @@ int PReluPlugin::initialize() {
cudaMalloc(&p_gpu_weight_, sizeof(float) * weight_.size()); cudaMalloc(&p_gpu_weight_, sizeof(float) * weight_.size());
cudaMemcpy(p_gpu_weight_, weight_.data(), weight_.size() * sizeof(float), cudaMemcpy(p_gpu_weight_, weight_.data(), weight_.size() * sizeof(float),
cudaMemcpyHostToDevice); cudaMemcpyHostToDevice);
return 0;
} }
nvinfer1::Dims PReluPlugin::getOutputDimensions(int index, nvinfer1::Dims PReluPlugin::getOutputDimensions(int index,
......
...@@ -68,7 +68,7 @@ class TrtPluginRegistrar { ...@@ -68,7 +68,7 @@ class TrtPluginRegistrar {
#define REGISTER_TRT_PLUGIN_UNIQ(ctr, name, deserialize_func) \ #define REGISTER_TRT_PLUGIN_UNIQ(ctr, name, deserialize_func) \
static paddle::inference::tensorrt::plugin::TrtPluginRegistrar \ static paddle::inference::tensorrt::plugin::TrtPluginRegistrar \
trt_plugin_registrar##ctr __attribute__((unused)) = \ trt_plugin_registrar##ctr UNUSED = \
paddle::inference::tensorrt::plugin::TrtPluginRegistrar( \ paddle::inference::tensorrt::plugin::TrtPluginRegistrar( \
name, deserialize_func) name, deserialize_func)
......
...@@ -247,6 +247,8 @@ void* GetNCCLDsoHandle() { ...@@ -247,6 +247,8 @@ void* GetNCCLDsoHandle() {
void* GetTensorRtDsoHandle() { void* GetTensorRtDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_tensorrt_dir, "libnvinfer.dylib"); return GetDsoHandleFromSearchPath(FLAGS_tensorrt_dir, "libnvinfer.dylib");
#elif defined(_WIN32)
return GetDsoHandleFromSearchPath(FLAGS_mklml_dir, "nvinfer.dll");
#else #else
return GetDsoHandleFromSearchPath(FLAGS_tensorrt_dir, "libnvinfer.so"); return GetDsoHandleFromSearchPath(FLAGS_tensorrt_dir, "libnvinfer.so");
#endif #endif
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <NvInfer.h> #include <NvInfer.h>
#if !defined(_WIN32)
#include <dlfcn.h> #include <dlfcn.h>
#endif
#include <mutex> // NOLINT #include <mutex> // NOLINT
...@@ -34,7 +36,7 @@ extern void* tensorrt_dso_handle; ...@@ -34,7 +36,7 @@ extern void* tensorrt_dso_handle;
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using tensorrt_func = decltype(__name(args...)) (*)(Args...); \ using tensorrt_func = decltype(&::__name); \
std::call_once(tensorrt_dso_flag, []() { \ std::call_once(tensorrt_dso_flag, []() { \
tensorrt_dso_handle = \ tensorrt_dso_handle = \
paddle::platform::dynload::GetTensorRtDsoHandle(); \ paddle::platform::dynload::GetTensorRtDsoHandle(); \
......
...@@ -166,6 +166,11 @@ package_data['paddle.libs']= [] ...@@ -166,6 +166,11 @@ package_data['paddle.libs']= []
package_data['paddle.libs']=[('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name] package_data['paddle.libs']=[('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name]
shutil.copy('${WARPCTC_LIBRARIES}', libs_path) shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
if '${TENSORRT_FOUND}' == 'ON' and os.name == 'nt':
shutil.copy(os.path.join('${TENSORRT_ROOT}', 'lib', '${TR_INFER_RT}'), libs_path)
shutil.copy(os.path.join('${TENSORRT_ROOT}', 'lib', '${TR_INFER_PLUGIN_RT}'), libs_path)
package_data['paddle.libs'] += ['${TR_INFER_RT}', '${TR_INFER_PLUGIN_RT}']
if '${WITH_MKL}' == 'ON': if '${WITH_MKL}' == 'ON':
shutil.copy('${MKLML_SHARED_LIB}', libs_path) shutil.copy('${MKLML_SHARED_LIB}', libs_path)
shutil.copy('${MKLML_SHARED_IOMP_LIB}', libs_path) shutil.copy('${MKLML_SHARED_IOMP_LIB}', libs_path)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册