diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index 7670444ed1e021376fa44491973bb748cf611ecf..54aae0bdc249c6eacbd4bf6b5cc42cbba9f08784 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -31,10 +31,10 @@ message( "WITH_GPU = ${WITH_GPU}") # Paddle Version should be one of: # latest: latest develop build # version number like 1.5.2 -SET(PADDLE_VERSION "1.7.2") +SET(PADDLE_VERSION "1.8.1") if (WITH_GPU) - SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl") + SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl") else() if (WITH_AVX) if (WITH_MKLML) @@ -92,8 +92,15 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib) ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a) -ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a) +ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so) + +ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION /paddle/third_party/TensorRT-6.0.1.5/lib/libnvinfer.so) + +ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION /paddle/third_party/TensorRT-6.0.1.5/lib/libnvinfer_plugin.so) + ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a) @@ -101,4 +108,4 @@ SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/thir LIST(APPEND external_project_dependencies paddle) LIST(APPEND paddle_depend_libs - xxhash) + xxhash nvinfer nvinfer_plugin) diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index 51cfb95a8d56d4261b9dab99df5216c5e6c79733..aedf951d6bb61002a946b3a4737491c3073ed728 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -553,7 +553,9 @@ class CloneDBReloadableInferEngine }; template -class FluidInferEngine : public CloneDBReloadableInferEngine { +// class FluidInferEngine : public CloneDBReloadableInferEngine +// { +class FluidInferEngine : public DBReloadableInferEngine { public: FluidInferEngine() {} ~FluidInferEngine() {} @@ -603,13 +605,13 @@ class VersionedInferEngine : public InferEngine { LOG(ERROR) << "Failed generate engine with type:" << engine_type; return -1; } - VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr; + VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr; int tmp = FLAGS_logtostderr; if (engine->proc_initialize(conf, version) != 0) { LOG(ERROR) << "Failed initialize engine, type:" << engine_type; return -1; } - VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr; + VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr; FLAGS_logtostderr = tmp; auto r = _versions.insert(std::make_pair(engine->version(), engine)); if (!r.second) { diff --git a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h index 2fc6ae587ff26f5f05ff9332f08067ab49d06254..2a4da4b9b03e716b9e8148dbfd0200b887ee66e1 100644 --- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h +++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h @@ -190,7 +190,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { paddle::AnalysisConfig analysis_config; analysis_config.SetModel(data_path); - analysis_config.EnableUseGpu(100, FLAGS_gpuid); + analysis_config.EnableUseGpu(1500, FLAGS_gpuid); analysis_config.SwitchSpecifyInputNames(true); analysis_config.SetCpuMathLibraryNumThreads(1); @@ -198,11 +198,58 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { analysis_config.EnableMemoryOptim(); } - if (params.enable_ir_optimization()) { - analysis_config.SwitchIrOptim(true); - } else { - analysis_config.SwitchIrOptim(false); - } + /* + if (params.enable_ir_optimization()) { + analysis_config.SwitchIrOptim(true); + } else { + analysis_config.SwitchIrOptim(false); + } + */ + + int min_seq_len = 1; + int max_seq_len = 512; + int opt_seq_len = 128; + int head_number = 12; + int batch = 50; + + std::vector min_in_shape = {batch, min_seq_len, 1}; + std::vector max_in_shape = {batch, max_seq_len, 1}; + std::vector opt_in_shape = {batch, opt_seq_len, 1}; + + std::string input1_name = "src_text_a_ids"; + std::string input2_name = "pos_text_a_ids"; + std::string input3_name = "sent_text_a_ids"; + std::string input4_name = "stack_0.tmp_0"; + + std::map> min_input_shape = { + {input1_name, min_in_shape}, + {input2_name, min_in_shape}, + {input3_name, min_in_shape}, + {input4_name, {batch, head_number, min_seq_len, min_seq_len}}, + }; + + std::map> max_input_shape = { + {input1_name, max_in_shape}, + {input2_name, max_in_shape}, + {input3_name, max_in_shape}, + {input4_name, {batch, head_number, max_seq_len, max_seq_len}}, + }; + std::map> opt_input_shape = { + {input1_name, opt_in_shape}, + {input2_name, opt_in_shape}, + {input3_name, opt_in_shape}, + {input4_name, {batch, head_number, opt_seq_len, opt_seq_len}}, + }; + + analysis_config.EnableTensorRtEngine( + 1 << 30, + batch, + 5, + paddle::AnalysisConfig::Precision::kHalf, + true, + true); + analysis_config.SetTRTDynamicShapeInfo( + min_input_shape, max_input_shape, opt_input_shape); AutoLock lock(GlobalPaddleCreateMutex::instance()); _core =