From 1b747de76e94f798436e5b6edaed37f480810889 Mon Sep 17 00:00:00 2001
From: Peihan <lphs1234567@gmail.com>
Date: Wed, 18 Aug 2021 17:20:51 +0800
Subject: [PATCH] add paddle detection model in pr-ci-inference (#34986)

---
 paddle/fluid/inference/api/analysis_config.cc |   2 +-
 paddle/fluid/inference/tests/infer_ut/run.sh  | 122 ++++++++------
 .../tests/infer_ut/test_ppyolo_mbv3.cc        | 156 ++++++++++++++++++
 .../tests/infer_ut/test_ppyolov2_r50vd.cc     | 155 +++++++++++++++++
 .../inference/tests/infer_ut/test_resnet50.cc |  29 ++++
 .../inference/tests/infer_ut/test_yolov3.cc   | 155 +++++++++++++++++
 6 files changed, 571 insertions(+), 48 deletions(-)
 create mode 100644 paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc
 create mode 100644 paddle/fluid/inference/tests/infer_ut/test_ppyolov2_r50vd.cc
 create mode 100644 paddle/fluid/inference/tests/infer_ut/test_yolov3.cc

diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
index b515f7050e..bf71994913 100644
--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -740,7 +740,7 @@ std::string AnalysisConfig::Summary() {
   // cpu info
   os.InsertRow(
       {"cpu_math_thread", std::to_string(cpu_math_library_num_threads_)});
-  os.InsertRow({"enable_mkdlnn", use_mkldnn_ ? "true" : "false"});
+  os.InsertRow({"enable_mkldnn", use_mkldnn_ ? "true" : "false"});
   os.InsertRow(
       {"mkldnn_cache_capacity", std::to_string(mkldnn_cache_capacity_)});
   os.InsetDivider();
diff --git a/paddle/fluid/inference/tests/infer_ut/run.sh b/paddle/fluid/inference/tests/infer_ut/run.sh
index 7d17bb647a..c1694c76a7 100755
--- a/paddle/fluid/inference/tests/infer_ut/run.sh
+++ b/paddle/fluid/inference/tests/infer_ut/run.sh
@@ -24,8 +24,14 @@ MSVC_STATIC_CRT=$6
 inference_install_dir=${PADDLE_ROOT}/build/paddle_inference_install_dir
 EXIT_CODE=0 # init default exit code
 
+export RED='\033[0;31m' # red color
+export NC='\033[0m' # no color
+export YELLOW='\033[33m' # yellow color
+
 cd `dirname $0`
 current_dir=`pwd`
+build_dir=${current_dir}/build
+log_dir=${current_dir}/log
 if [ $2 == ON ]; then
   # You can export yourself if move the install path
   MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib
@@ -83,24 +89,42 @@ for model_name in $nlp_download_list; do
     download $url_prefix $model_name
 done
 
+det_download_list='yolov3 ppyolo_mbv3 ppyolov2_r50vd'
+for model_name in $det_download_list; do
+    url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/2.1.1/detection"
+    download $url_prefix $model_name
+done
+
+function compile_test() {
+    mkdir -p ${build_dir}
+    cd ${build_dir}
+    TEST_NAME=$1
+    cmake .. -DPADDLE_LIB=${inference_install_dir} \
+             -DWITH_MKL=$TURN_ON_MKL \
+             -DDEMO_NAME=${TEST_NAME} \
+             -DWITH_GPU=$TEST_GPU_CPU \
+             -DWITH_STATIC_LIB=OFF \
+             -DUSE_TENSORRT=$USE_TENSORRT \
+             -DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
+             -DWITH_GTEST=ON
+    make -j$(nproc)
+    cd -
+}
+
+
 # compile and run test
 cd $current_dir
-mkdir -p build
-cd build
+mkdir -p ${build_dir}
+mkdir -p ${log_dir}
+cd ${build_dir}
 rm -rf *
 
-# ---------tensorrt resnet50 on linux---------
+# ---------tensorrt gpu tests on linux---------
 if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
     rm -rf *
-    cmake .. -DPADDLE_LIB=${inference_install_dir} \
-        -DWITH_MKL=$TURN_ON_MKL \
-        -DDEMO_NAME=test_resnet50 \
-        -DWITH_GPU=$TEST_GPU_CPU \
-        -DWITH_STATIC_LIB=OFF \
-        -DUSE_TENSORRT=$USE_TENSORRT \
-        -DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
-        -DWITH_GTEST=ON
-    make -j$(nproc)
+
+    printf "${YELLOW} start test_resnet50 ${NC} \n";
+    compile_test "test_resnet50"
     ./test_resnet50 \
         --modeldir=$DATA_DIR/resnet50/resnet50 \
         --gtest_output=xml:test_resnet50.xml
@@ -108,18 +132,9 @@ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
         echo "test_resnet50 runs failed" >> ${current_dir}/build/test_summary.txt
         EXIT_CODE=1
     fi
-fi
 
-# ---------tensorrt det_mv3_db on linux---------
-if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
-    cmake .. -DPADDLE_LIB=${inference_install_dir} \
-        -DWITH_MKL=$TURN_ON_MKL \
-        -DDEMO_NAME=test_det_mv3_db \
-        -DWITH_GPU=$TEST_GPU_CPU \
-        -DWITH_STATIC_LIB=OFF \
-        -DUSE_TENSORRT=$USE_TENSORRT \
-        -DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
-        -DWITH_GTEST=ON
+    printf "${YELLOW} start test_det_mv3_db ${NC} \n";
+    compile_test "test_det_mv3_db"
     make -j$(nproc)
     ./test_det_mv3_db \
         --modeldir=$DATA_DIR/ocr_det_mv3_db/ocr_det_mv3_db \
@@ -128,19 +143,9 @@ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
         echo "test_det_mv3_db runs failed" >> ${current_dir}/build/test_summary.txt
         EXIT_CODE=1
     fi
-fi
 
-# ---------tensorrt LeViT on linux---------
-if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
-    cmake .. -DPADDLE_LIB=${inference_install_dir} \
-        -DWITH_MKL=$TURN_ON_MKL \
-        -DDEMO_NAME=test_LeViT \
-        -DWITH_GPU=$TEST_GPU_CPU \
-        -DWITH_STATIC_LIB=OFF \
-        -DUSE_TENSORRT=$USE_TENSORRT \
-        -DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
-        -DWITH_GTEST=ON
-    make -j$(nproc)
+    printf "${YELLOW} start test_LeViT ${NC} \n";
+    compile_test "test_LeViT"
     ./test_LeViT \
         --modeldir=$DATA_DIR/LeViT/LeViT \
         --gtest_output=xml:test_LeViT.xml
@@ -148,19 +153,9 @@ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
         echo "test_LeViT runs failed" >> ${current_dir}/build/test_summary.txt
         EXIT_CODE=1
     fi
-fi
 
-# ---------gpu ernie_text_cls on linux---------
-if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
-    cmake .. -DPADDLE_LIB=${inference_install_dir} \
-        -DWITH_MKL=$TURN_ON_MKL \
-        -DDEMO_NAME=test_ernie_text_cls \
-        -DWITH_GPU=$TEST_GPU_CPU \
-        -DWITH_STATIC_LIB=OFF \
-        -DUSE_TENSORRT=$USE_TENSORRT \
-        -DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
-        -DWITH_GTEST=ON
-    make -j$(nproc)
+    printf "${YELLOW} start test_ernie_text_cls ${NC} \n";
+    compile_test "test_ernie_text_cls"
     ./test_ernie_text_cls \
         --modeldir=$DATA_DIR/ernie_text_cls/ernie_text_cls \
         --gtest_output=xml:test_ernie_text_cls.xml
@@ -168,8 +163,41 @@ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
         echo "test_ernie_text_cls runs failed" >> ${current_dir}/build/test_summary.txt
         EXIT_CODE=1
     fi
+
+    printf "${YELLOW} start test_yolov3 ${NC} \n";
+    compile_test "test_yolov3"
+    ./test_yolov3 \
+        --modeldir=$DATA_DIR/yolov3/yolov3 \
+        --gtest_output=xml:test_yolov3.xml
+    if [ $? -ne 0 ]; then
+        echo "test_yolov3 runs failed" >> ${current_dir}/build/test_summary.txt
+        EXIT_CODE=1
+    fi
+
+    printf "${YELLOW} start test_ppyolo_mbv3 ${NC} \n";
+    compile_test "test_ppyolo_mbv3"
+    ./test_ppyolo_mbv3 \
+        --modeldir=$DATA_DIR/ppyolo_mbv3/ppyolo_mbv3 \
+        --gtest_output=xml:test_ppyolo_mbv3.xml
+    if [ $? -ne 0 ]; then
+        echo "test_ppyolo_mbv3 runs failed" >> ${current_dir}/build/test_summary.txt
+        EXIT_CODE=1
+    fi
+
+    printf "${YELLOW} start test_ppyolov2_r50vd ${NC} \n";
+    compile_test "test_ppyolov2_r50vd"
+    ./test_ppyolov2_r50vd \
+        --modeldir=$DATA_DIR/ppyolov2_r50vd/ppyolov2_r50vd \
+        --gtest_output=xml:test_ppyolov2_r50vd.xml
+    if [ $? -ne 0 ]; then
+        echo "test_ppyolov2_r50vd runs failed" >> ${current_dir}/build/test_summary.txt
+        EXIT_CODE=1
+    fi
+
+    cp ./*.xml ${log_dir};
 fi
 
+
 if [[ -f ${current_dir}/build/test_summary.txt ]];then
   echo "=====================test summary======================"
   cat ${current_dir}/build/test_summary.txt
diff --git a/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc b/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc
new file mode 100644
index 0000000000..ae99cd8cff
--- /dev/null
+++ b/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc
@@ -0,0 +1,156 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test_suite.h"  // NOLINT
+
+DEFINE_string(modeldir, "", "Directory of the inference model.");
+
+namespace paddle_infer {
+
+std::map<std::string, paddle::test::Record> PrepareInput(int batch_size) {
+  // init input data
+  int channel = 3;
+  int width = 320;
+  int height = 320;
+  paddle::test::Record image, im_shape, scale_factor;
+  int input_num = batch_size * channel * width * height;
+  int shape_num = batch_size * 2;
+  std::vector<float> image_data(input_num, 1);
+  for (int i = 1; i < input_num + 1; ++i) {
+    image_data[i] = i % 10 * 0.5;
+  }
+  std::vector<float> im_shape_data(shape_num, 1);
+  std::vector<float> scale_factor_data(shape_num, 1);
+
+  image.data = std::vector<float>(image_data.begin(), image_data.end());
+  image.shape = std::vector<int>{batch_size, channel, width, height};
+  image.type = paddle::PaddleDType::FLOAT32;
+
+  im_shape.data =
+      std::vector<float>(im_shape_data.begin(), im_shape_data.end());
+  im_shape.shape = std::vector<int>{batch_size, 2};
+  im_shape.type = paddle::PaddleDType::FLOAT32;
+
+  scale_factor.data =
+      std::vector<float>(scale_factor_data.begin(), scale_factor_data.end());
+  scale_factor.shape = std::vector<int>{batch_size, 2};
+  scale_factor.type = paddle::PaddleDType::FLOAT32;
+
+  std::map<std::string, paddle::test::Record> input_data_map;
+  input_data_map.insert({"image", image});
+  input_data_map.insert({"im_shape", im_shape});
+  input_data_map.insert({"scale_factor", scale_factor});
+
+  return input_data_map;
+}
+
+TEST(test_ppyolo_mbv3, multi_thread4_trt_fp32_bz2) {
+  int thread_num = 4;
+  // init input data
+  auto input_data_map = PrepareInput(2);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  // prepare groudtruth config
+  paddle_infer::Config config, config_no_ir;
+  config_no_ir.SetModel(FLAGS_modeldir + "/model.pdmodel",
+                        FLAGS_modeldir + "/model.pdiparams");
+  config_no_ir.EnableUseGpu(100, 0);
+  config_no_ir.SwitchIrOptim(false);
+  // prepare inference config
+  config.SetModel(FLAGS_modeldir + "/model.pdmodel",
+                  FLAGS_modeldir + "/model.pdiparams");
+  config.EnableUseGpu(100, 0);
+  config.EnableTensorRtEngine(
+      1 << 20, 2, 3, paddle_infer::PrecisionType::kFloat32, false, false);
+  LOG(INFO) << config.Summary();
+  // get groudtruth by disbale ir
+  paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
+  SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &input_data_map,
+                         &truth_output_data, 1);
+
+  // get infer results from multi threads
+  std::vector<std::thread> threads;
+  services::PredictorPool pred_pool(config, thread_num);
+  for (int i = 0; i < thread_num; ++i) {
+    threads.emplace_back(paddle::test::SingleThreadPrediction,
+                         pred_pool.Retrive(i), &input_data_map,
+                         &infer_output_data, 2);
+  }
+
+  // thread join & check outputs
+  for (int i = 0; i < thread_num; ++i) {
+    LOG(INFO) << "join tid : " << i;
+    threads[i].join();
+    CompareRecord(&truth_output_data, &infer_output_data, 1e-2);
+    // TODO(OliverLPH): precision set to 1e-2 since input is fake, change to
+    // real input later
+  }
+
+  std::cout << "finish multi-thread test" << std::endl;
+}
+
+TEST(DISABLED_test_ppyolo_mbv3, multi_thread4_mkl_bz2) {
+  // TODO(OliverLPH): mkldnn multi thread will fail
+  int thread_num = 4;
+  // init input data
+  auto input_data_map = PrepareInput(2);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  // prepare groudtruth config
+  paddle_infer::Config config, config_no_ir;
+  config_no_ir.SetModel(FLAGS_modeldir + "/model.pdmodel",
+                        FLAGS_modeldir + "/model.pdiparams");
+  config_no_ir.DisableGpu();
+  config_no_ir.SwitchIrOptim(false);
+  // prepare inference config
+  config.SetModel(FLAGS_modeldir + "/model.pdmodel",
+                  FLAGS_modeldir + "/model.pdiparams");
+  config.DisableGpu();
+  config.EnableMKLDNN();
+  config.SetMkldnnCacheCapacity(10);
+  config.SetCpuMathLibraryNumThreads(10);
+  LOG(INFO) << config.Summary();
+  // get groudtruth by disbale ir
+  paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
+  SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &input_data_map,
+                         &truth_output_data, 1);
+
+  // get infer results from multi threads
+  std::vector<std::thread> threads;
+  services::PredictorPool pred_pool(config, thread_num);
+  for (int i = 0; i < thread_num; ++i) {
+    threads.emplace_back(paddle::test::SingleThreadPrediction,
+                         pred_pool.Retrive(i), &input_data_map,
+                         &infer_output_data, 2);
+  }
+
+  // thread join & check outputs
+  for (int i = 0; i < thread_num; ++i) {
+    LOG(INFO) << "join tid : " << i;
+    threads[i].join();
+    CompareRecord(&truth_output_data, &infer_output_data, 1e-4);
+  }
+
+  std::cout << "finish multi-thread test" << std::endl;
+}
+
+}  // namespace paddle_infer
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  ::google::ParseCommandLineFlags(&argc, &argv, true);
+  return RUN_ALL_TESTS();
+}
diff --git a/paddle/fluid/inference/tests/infer_ut/test_ppyolov2_r50vd.cc b/paddle/fluid/inference/tests/infer_ut/test_ppyolov2_r50vd.cc
new file mode 100644
index 0000000000..b2cb4ca322
--- /dev/null
+++ b/paddle/fluid/inference/tests/infer_ut/test_ppyolov2_r50vd.cc
@@ -0,0 +1,155 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test_suite.h"  // NOLINT
+
+DEFINE_string(modeldir, "", "Directory of the inference model.");
+
+namespace paddle_infer {
+
+std::map<std::string, paddle::test::Record> PrepareInput(int batch_size) {
+  // init input data
+  int channel = 3;
+  int width = 640;
+  int height = 640;
+  paddle::test::Record image, im_shape, scale_factor;
+  int input_num = batch_size * channel * width * height;
+  int shape_num = batch_size * 2;
+  std::vector<float> image_data(input_num, 1);
+  for (int i = 1; i < input_num + 1; ++i) {
+    image_data[i] = i % 10 * 0.5;
+  }
+  std::vector<float> im_shape_data(shape_num, 1);
+  std::vector<float> scale_factor_data(shape_num, 1);
+
+  image.data = std::vector<float>(image_data.begin(), image_data.end());
+  image.shape = std::vector<int>{batch_size, channel, width, height};
+  image.type = paddle::PaddleDType::FLOAT32;
+
+  im_shape.data =
+      std::vector<float>(im_shape_data.begin(), im_shape_data.end());
+  im_shape.shape = std::vector<int>{batch_size, 2};
+  im_shape.type = paddle::PaddleDType::FLOAT32;
+
+  scale_factor.data =
+      std::vector<float>(scale_factor_data.begin(), scale_factor_data.end());
+  scale_factor.shape = std::vector<int>{batch_size, 2};
+  scale_factor.type = paddle::PaddleDType::FLOAT32;
+
+  std::map<std::string, paddle::test::Record> input_data_map;
+  input_data_map.insert({"image", image});
+  input_data_map.insert({"im_shape", im_shape});
+  input_data_map.insert({"scale_factor", scale_factor});
+
+  return input_data_map;
+}
+
+TEST(test_ppyolov2_r50vd, multi_thread2_trt_fp32_bz1) {
+  int thread_num = 2;  // thread > 2 may OOM
+  // init input data
+  auto input_data_map = PrepareInput(1);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  // prepare groudtruth config
+  paddle_infer::Config config, config_no_ir;
+  config_no_ir.SetModel(FLAGS_modeldir + "/model.pdmodel",
+                        FLAGS_modeldir + "/model.pdiparams");
+  config_no_ir.EnableUseGpu(100, 0);
+  config_no_ir.SwitchIrOptim(false);
+  // prepare inference config
+  config.SetModel(FLAGS_modeldir + "/model.pdmodel",
+                  FLAGS_modeldir + "/model.pdiparams");
+  config.EnableUseGpu(100, 0);
+  config.EnableTensorRtEngine(
+      1 << 20, 2, 10, paddle_infer::PrecisionType::kFloat32, false, false);
+  LOG(INFO) << config.Summary();
+  // get groudtruth by disbale ir
+  paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
+  SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &input_data_map,
+                         &truth_output_data, 1);
+
+  // get infer results from multi threads
+  std::vector<std::thread> threads;
+  services::PredictorPool pred_pool(config, thread_num);
+  for (int i = 0; i < thread_num; ++i) {
+    threads.emplace_back(paddle::test::SingleThreadPrediction,
+                         pred_pool.Retrive(i), &input_data_map,
+                         &infer_output_data, 2);
+  }
+
+  // thread join & check outputs
+  for (int i = 0; i < thread_num; ++i) {
+    LOG(INFO) << "join tid : " << i;
+    threads[i].join();
+    // CompareRecord(&truth_output_data, &infer_output_data, 1e-2);
+    // TODO(OliverLPH): disable comparison since precsion is low
+  }
+
+  std::cout << "finish multi-thread test" << std::endl;
+}
+
+TEST(test_ppyolov2_r50vd, multi_thread2_mkl_bz2) {
+  int thread_num = 2;
+  // init input data
+  auto input_data_map = PrepareInput(2);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  // prepare groudtruth config
+  paddle_infer::Config config, config_no_ir;
+  config_no_ir.SetModel(FLAGS_modeldir + "/model.pdmodel",
+                        FLAGS_modeldir + "/model.pdiparams");
+  config_no_ir.DisableGpu();
+  config_no_ir.SwitchIrOptim(false);
+  // prepare inference config
+  config.SetModel(FLAGS_modeldir + "/model.pdmodel",
+                  FLAGS_modeldir + "/model.pdiparams");
+  config.DisableGpu();
+  config.EnableMKLDNN();
+  config.SetMkldnnCacheCapacity(10);
+  config.SetCpuMathLibraryNumThreads(10);
+  LOG(INFO) << config.Summary();
+  // get groudtruth by disbale ir
+  paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
+  SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &input_data_map,
+                         &truth_output_data, 1);
+
+  // get infer results from multi threads
+  std::vector<std::thread> threads;
+  services::PredictorPool pred_pool(config, thread_num);
+  for (int i = 0; i < thread_num; ++i) {
+    threads.emplace_back(paddle::test::SingleThreadPrediction,
+                         pred_pool.Retrive(i), &input_data_map,
+                         &infer_output_data, 2);
+  }
+
+  // thread join & check outputs
+  for (int i = 0; i < thread_num; ++i) {
+    LOG(INFO) << "join tid : " << i;
+    threads[i].join();
+    // CompareRecord(&truth_output_data, &infer_output_data, 1e-4);
+    // TODO(OliverLPH): disable comparison since precsion is low
+  }
+
+  std::cout << "finish multi-thread test" << std::endl;
+}
+
+}  // namespace paddle_infer
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  ::google::ParseCommandLineFlags(&argc, &argv, true);
+  return RUN_ALL_TESTS();
+}
diff --git a/paddle/fluid/inference/tests/infer_ut/test_resnet50.cc b/paddle/fluid/inference/tests/infer_ut/test_resnet50.cc
index f497acc4b1..035bc3f34f 100644
--- a/paddle/fluid/inference/tests/infer_ut/test_resnet50.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_resnet50.cc
@@ -170,6 +170,35 @@ TEST(test_resnet50, multi_thread4_trt_fp32_bz2) {
   std::cout << "finish multi-thread test" << std::endl;
 }
 
+TEST(test_resnet50, trt_int8_bz2) {
+  // init input data
+  std::map<std::string, paddle::test::Record> my_input_data_map;
+  my_input_data_map["inputs"] = PrepareInput(2);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  // prepare inference config
+  paddle_infer::Config config;
+  config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                  FLAGS_modeldir + "/inference.pdiparams");
+  config.EnableUseGpu(100, 0);
+  config.EnableTensorRtEngine(1 << 20, 2, 3, paddle_infer::PrecisionType::kInt8,
+                              true, true);
+
+  // get first time prediction int8 results
+  paddle_infer::services::PredictorPool pred_pool(config, 1);
+  SingleThreadPrediction(pred_pool.Retrive(0), &my_input_data_map,
+                         &truth_output_data, 1);
+
+  // get repeat 5 times prediction int8 results
+  SingleThreadPrediction(pred_pool.Retrive(0), &my_input_data_map,
+                         &infer_output_data, 5);
+
+  // check outputs
+  CompareRecord(&truth_output_data, &infer_output_data);
+  std::cout << "finish test" << std::endl;
+}
+
 }  // namespace paddle_infer
 
 int main(int argc, char** argv) {
diff --git a/paddle/fluid/inference/tests/infer_ut/test_yolov3.cc b/paddle/fluid/inference/tests/infer_ut/test_yolov3.cc
new file mode 100644
index 0000000000..845bcbc5c5
--- /dev/null
+++ b/paddle/fluid/inference/tests/infer_ut/test_yolov3.cc
@@ -0,0 +1,155 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test_suite.h"  // NOLINT
+
+DEFINE_string(modeldir, "", "Directory of the inference model.");
+
+namespace paddle_infer {
+
+std::map<std::string, paddle::test::Record> PrepareInput(int batch_size) {
+  // init input data
+  int channel = 3;
+  int width = 608;
+  int height = 608;
+  paddle::test::Record image, im_shape, scale_factor;
+  int input_num = batch_size * channel * width * height;
+  int shape_num = batch_size * 2;
+  std::vector<float> image_data(input_num, 1);
+  for (int i = 1; i < input_num + 1; ++i) {
+    image_data[i] = i % 10 * 0.5;
+  }
+  std::vector<float> im_shape_data(shape_num, 1);
+  std::vector<float> scale_factor_data(shape_num, 1);
+
+  image.data = std::vector<float>(image_data.begin(), image_data.end());
+  image.shape = std::vector<int>{batch_size, channel, width, height};
+  image.type = paddle::PaddleDType::FLOAT32;
+
+  im_shape.data =
+      std::vector<float>(im_shape_data.begin(), im_shape_data.end());
+  im_shape.shape = std::vector<int>{batch_size, 2};
+  im_shape.type = paddle::PaddleDType::FLOAT32;
+
+  scale_factor.data =
+      std::vector<float>(scale_factor_data.begin(), scale_factor_data.end());
+  scale_factor.shape = std::vector<int>{batch_size, 2};
+  scale_factor.type = paddle::PaddleDType::FLOAT32;
+
+  std::map<std::string, paddle::test::Record> input_data_map;
+  input_data_map.insert({"image", image});
+  input_data_map.insert({"im_shape", im_shape});
+  input_data_map.insert({"scale_factor", scale_factor});
+
+  return input_data_map;
+}
+
+TEST(test_yolov3, multi_thread3_trt_fp32_bz2) {
+  int thread_num = 3;
+  // init input data
+  auto input_data_map = PrepareInput(2);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  // prepare groudtruth config
+  paddle_infer::Config config, config_no_ir;
+  config_no_ir.SetModel(FLAGS_modeldir + "/model.pdmodel",
+                        FLAGS_modeldir + "/model.pdiparams");
+  config_no_ir.EnableUseGpu(100, 0);
+  config_no_ir.SwitchIrOptim(false);
+  // prepare inference config
+  config.SetModel(FLAGS_modeldir + "/model.pdmodel",
+                  FLAGS_modeldir + "/model.pdiparams");
+  config.EnableUseGpu(100, 0);
+  config.EnableTensorRtEngine(
+      1 << 20, 2, 3, paddle_infer::PrecisionType::kFloat32, false, false);
+  LOG(INFO) << config.Summary();
+  // get groudtruth by disbale ir
+  paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
+  SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &input_data_map,
+                         &truth_output_data, 1);
+
+  // get infer results from multi threads
+  std::vector<std::thread> threads;
+  services::PredictorPool pred_pool(config, thread_num);
+  for (int i = 0; i < thread_num; ++i) {
+    threads.emplace_back(paddle::test::SingleThreadPrediction,
+                         pred_pool.Retrive(i), &input_data_map,
+                         &infer_output_data, 2);
+  }
+
+  // thread join & check outputs
+  for (int i = 0; i < thread_num; ++i) {
+    LOG(INFO) << "join tid : " << i;
+    threads[i].join();
+    CompareRecord(&truth_output_data, &infer_output_data, 1e-2);
+    // TODO(OliverLPH): precision set to 1e-2 since input is fake, change to
+    // real input later
+  }
+
+  std::cout << "finish multi-thread test" << std::endl;
+}
+
+TEST(test_yolov3, multi_thread4_mkl_bz2) {
+  int thread_num = 4;
+  // init input data
+  auto input_data_map = PrepareInput(2);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  // prepare groudtruth config
+  paddle_infer::Config config, config_no_ir;
+  config_no_ir.SetModel(FLAGS_modeldir + "/model.pdmodel",
+                        FLAGS_modeldir + "/model.pdiparams");
+  config_no_ir.DisableGpu();
+  config_no_ir.SwitchIrOptim(false);
+  // prepare inference config
+  config.SetModel(FLAGS_modeldir + "/model.pdmodel",
+                  FLAGS_modeldir + "/model.pdiparams");
+  config.DisableGpu();
+  config.EnableMKLDNN();
+  config.SetMkldnnCacheCapacity(10);
+  config.SetCpuMathLibraryNumThreads(10);
+  LOG(INFO) << config.Summary();
+  // get groudtruth by disbale ir
+  paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
+  SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &input_data_map,
+                         &truth_output_data, 1);
+
+  // get infer results from multi threads
+  std::vector<std::thread> threads;
+  services::PredictorPool pred_pool(config, thread_num);
+  for (int i = 0; i < thread_num; ++i) {
+    threads.emplace_back(paddle::test::SingleThreadPrediction,
+                         pred_pool.Retrive(i), &input_data_map,
+                         &infer_output_data, 2);
+  }
+
+  // thread join & check outputs
+  for (int i = 0; i < thread_num; ++i) {
+    LOG(INFO) << "join tid : " << i;
+    threads[i].join();
+    CompareRecord(&truth_output_data, &infer_output_data, 1e-4);
+  }
+
+  std::cout << "finish multi-thread test" << std::endl;
+}
+
+}  // namespace paddle_infer
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  ::google::ParseCommandLineFlags(&argc, &argv, true);
+  return RUN_ALL_TESTS();
+}
-- 
GitLab