Merge pull request #13802 from NHZlX/add_trt_lib_release1.0

cherry-pick from develop add trt lib to paddle_fluid

Merge pull request #13802 from NHZlX/add_trt_lib_release1.0
cherry-pick from develop add trt lib to paddle_fluid
cddff20d · Xin Pan · GitHub · b85ac935 · e42a3328 · cddff20d
5 changed file
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -19,8 +19,18 @@ cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
 add_subdirectory(api)
+set(STATIC_INFERENCE_APIS paddle_fluid_api paddle_inference_api analysis_predictor)
+set(SHARED_INFERENCE_SRCS
+    io.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc )
+if (WITH_GPU AND TENSORRT_FOUND)
+  set(STATIC_INFERENCE_APIS ${STATIC_INFERENCE_APIS} paddle_inference_tensorrt_subgraph_engine)
+  set(SHARED_INFERENCE_SRCS ${SHARED_INFERENCE_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/api/api_tensorrt_subgraph_engine.cc)
+endif()
 # Create static library
-cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api paddle_inference_api analysis_predictor)
+cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} )
 if(NOT APPLE)
  # TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
  set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_fluid.sym")
@@ -28,9 +38,7 @@ if(NOT APPLE)
 endif()
 # Create shared library
-cc_library(paddle_fluid_shared SHARED
+cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
-    SRCS io.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
    DEPS ${fluid_modules} paddle_fluid_api)
 set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)

--- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt
@@ -3,6 +3,7 @@ project(cpp_inference_demo CXX C)
 option(WITH_MKL        "Compile demo with MKL/OpenBlas support, default use MKL."       ON)
 option(WITH_GPU        "Compile demo with GPU/CPU, default use CPU."                    OFF)
 option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static."   ON)
+option(USE_TENSORRT "Compile demo with TensorRT."   OFF)
 macro(safe_set_static_flag)
    foreach(flag_var
@@ -60,6 +61,13 @@ endif(NOT WIN32)
 include_directories("${PADDLE_LIB}/third_party/boost")
 include_directories("${PADDLE_LIB}/third_party/eigen3")
+if (NOT WIN32) 
+  if (USE_TENSORRT AND WITH_GPU) 
+      include_directories("${TENSORRT_INCLUDE_DIR}")
+      link_directories("${TENSORRT_LIB_DIR}")
+  endif()
+endif(NOT WIN32)
 if (NOT WIN32)
 link_directories("${PADDLE_LIB}/third_party/install/snappy/lib")
 link_directories("${PADDLE_LIB}/third_party/install/snappystream/lib")
@@ -112,6 +120,10 @@ endif(NOT WIN32)
 if(WITH_GPU)
  if(NOT WIN32)
+    if (USE_TENSORRT) 
+      set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_STATIC_LIBRARY_SUFFIX})
+      set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX})
+    endif()
    set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
  else()
    set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )

--- a/paddle/fluid/inference/api/demo_ci/run.sh
+++ b/paddle/fluid/inference/api/demo_ci/run.sh
@@ -2,6 +2,12 @@ set -x
 PADDLE_ROOT=$1
 TURN_ON_MKL=$2 # use MKL or Openblas
 TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode
+DATA_DIR=$4 # dataset
+TENSORRT_INCLUDE_DIR=$5 # TensorRT header file dir, defalut to /usr/local/TensorRT/include
+TENSORRT_LIB_DIR=$6 # TensorRT lib file dir, default to /usr/local/TensorRT/lib
+cd `dirname $0`
+current_dir=`pwd`
 if [ $2 == ON ]; then
  # You can export yourself if move the install path
  MKL_LIB=${PADDLE_ROOT}/build/fluid_install_dir/third_party/install/mklml/lib
@@ -13,6 +19,11 @@ else
  use_gpu_list='false'
 fi
+USE_TENSORRT=OFF
+if [ [-d"$TENSORRT_INCLUDE_DIR"] -a [-d"$TENSORRT_LIB_DIR"] ]; then
+  USE_TENSORRT=ON
+fi
 PREFIX=inference-vis-demos%2F
 URL_ROOT=http://paddlemodels.cdn.bcebos.com/${PREFIX}
@@ -29,15 +40,15 @@ function download() {
  fi
  cd ..
 }
-mkdir -p data
+mkdir -p $DATA_DIR
-cd data
+cd $DATA_DIR
 vis_demo_list='se_resnext50 ocr mobilenet'
 for vis_demo_name in $vis_demo_list; do
  download $vis_demo_name
 done
-cd ..
 # compile and test the demo
+cd $current_dir
 mkdir -p build
 cd build
@@ -73,9 +84,9 @@ for WITH_STATIC_LIB in ON OFF; do
  for use_gpu in $use_gpu_list; do
    for vis_demo_name in $vis_demo_list; do 
      ./vis_demo \
-        --modeldir=../data/$vis_demo_name/model \
+        --modeldir=$DATA_DIR/$vis_demo_name/model \
-        --data=../data/$vis_demo_name/data.txt \
+        --data=$DATA_DIR/$vis_demo_name/data.txt \
-        --refer=../data/$vis_demo_name/result.txt \
+        --refer=$DATA_DIR/$vis_demo_name/result.txt \
        --use_gpu=$use_gpu
      if [ $? -ne 0 ]; then
        echo "vis demo $vis_demo_name runs fail."
@@ -83,5 +94,25 @@ for WITH_STATIC_LIB in ON OFF; do
      fi
    done
  done
+  # --------tensorrt mobilenet------
+  if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
+    rm -rf *
+    cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
+      -DWITH_MKL=$TURN_ON_MKL \
+      -DDEMO_NAME=vis_demo \
+      -DWITH_GPU=$TEST_GPU_CPU \
+      -DWITH_STATIC_LIB=$WITH_STATIC_LIB \
+      -DUSE_TENSORRT=$USE_TENSORRT \
+      -DTENSORRT_INCLUDE_DIR=$TENSORRT_INCLUDE_DIR \
+      -DTENSORRT_LIB_DIR=$TENSORRT_LIB_DIR
+    make -j 
+    ./vis_demo \
+      --modeldir=$DATA_DIR/mobilenet/model \
+      --data=$DATA_DIR/mobilenet/data.txt \
+      --refer=$DATA_DIR/mobilenet/result.txt \
+      --use_gpu=true \
+      --use_trt=true
+  fi
 done
 set +x
--- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
@@ -33,6 +33,7 @@ DEFINE_string(
    "path of data; each line is a record, format is "
    "'<space splitted floats as data>\t<space splitted ints as shape'");
 DEFINE_bool(use_gpu, false, "Whether use gpu.");
+DEFINE_bool(use_trt, false, "Whether use trt.");
 namespace paddle {
 namespace demo {
@@ -100,20 +101,32 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
 /*
 * Use the native fluid engine to inference the demo.
 */
-void Main(bool use_gpu) {
+void Main(bool use_gpu, bool use_trt) {
-  NativeConfig config;
+  std::unique_ptr<PaddlePredictor> predictor;
-  config.param_file = FLAGS_modeldir + "/__params__";
+  if (!use_trt) {
-  config.prog_file = FLAGS_modeldir + "/__model__";
+    NativeConfig config;
-  config.use_gpu = use_gpu;
+    config.param_file = FLAGS_modeldir + "/__params__";
-  config.device = 0;
+    config.prog_file = FLAGS_modeldir + "/__model__";
-  if (FLAGS_use_gpu) {
+    config.use_gpu = use_gpu;
+    config.device = 0;
+    if (FLAGS_use_gpu) {
+      config.fraction_of_gpu_memory = 0.1;  // set by yourself
+    }
+    VLOG(3) << "init predictor";
+    predictor =
+        CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
+  } else {
+    paddle::contrib::MixedRTConfig config;
+    config.param_file = FLAGS_modeldir + "/__params__";
+    config.prog_file = FLAGS_modeldir + "/__model__";
+    config.use_gpu = true;
+    config.device = 0;
+    config.max_batch_size = 1;
    config.fraction_of_gpu_memory = 0.1;  // set by yourself
+    predictor = CreatePaddlePredictor<paddle::contrib::MixedRTConfig>(config);
  }
-  VLOG(3) << "init predictor";
-  auto predictor =
-      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
  VLOG(3) << "begin to process data";
  // Just a single batch of data.
  std::string line;
@@ -131,7 +144,7 @@ void Main(bool use_gpu) {
  VLOG(3) << "run executor";
  std::vector<PaddleTensor> output;
-  predictor->Run({input}, &output);
+  predictor->Run({input}, &output, 1);
  VLOG(3) << "output.size " << output.size();
  auto& tensor = output.front();
@@ -146,9 +159,12 @@ void Main(bool use_gpu) {
 int main(int argc, char** argv) {
  google::ParseCommandLineFlags(&argc, &argv, true);
-  paddle::demo::Main(false /* use_gpu*/);
+  if (FLAGS_use_gpu && FLAGS_use_trt) {
-  if (FLAGS_use_gpu) {
+    paddle::demo::Main(true /*use_gpu*/, true);
-    paddle::demo::Main(true /*use_gpu*/);
+  } else if (FLAGS_use_gpu) {
+    paddle::demo::Main(true /*use_gpu*/, false);
+  } else {
+    paddle::demo::Main(false /*use_gpu*/, false /*use_tensorrt*/);
  }
  return 0;
 }
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -654,11 +654,21 @@ function gen_fluid_inference_lib() {
    if [[ ${WITH_C_API:-OFF} == "OFF" && ${WITH_INFERENCE:-ON} == "ON" ]] ; then
        cat <<EOF
    ========================================
-    Deploying fluid inference library ...
+    Generating fluid inference library ...
    ========================================
 EOF
        cmake .. -DWITH_DISTRIBUTE=OFF
        make -j `nproc` inference_lib_dist
+      fi
+}
+function tar_fluid_inference_lib() {
+    if [[ ${WITH_C_API:-OFF} == "OFF" && ${WITH_INFERENCE:-ON} == "ON" ]] ; then
+        cat <<EOF
+    ========================================
+    Taring fluid inference library ...
+    ========================================
+EOF
        cd ${PADDLE_ROOT}/build
        cp -r fluid_install_dir fluid
        tar -czf fluid.tgz fluid
@@ -673,7 +683,7 @@ function test_fluid_inference_lib() {
    ========================================
 EOF
        cd ${PADDLE_ROOT}/paddle/fluid/inference/api/demo_ci
-        ./run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF}
+        ./run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF} ${INFERENCE_DEMO_INSTALL_DIR} ${TENSORRT_INCLUDE_DIR:-/usr/local/TensorRT/include} ${TENSORRT_LIB_DIR:-/usr/local/TensorRT/lib}
        ./clean.sh
      fi
 }
@@ -722,6 +732,7 @@ function main() {
      fluid_inference_lib)
        cmake_gen ${PYTHON_ABI:-""}
        gen_fluid_inference_lib
+        tar_fluid_inference_lib
        test_fluid_inference_lib
        ;;
      check_style)