From 5bb12853f31e7a1cb524be74cbe03c26f7335f66 Mon Sep 17 00:00:00 2001
From: Peihan <lphs1234567@gmail.com>
Date: Tue, 7 Sep 2021 13:34:03 +0800
Subject: [PATCH] support test different infer_ut suite type (#35435)

* notest,test=inference;support test different suite type

* notest,test=inference;fix script bugs

* notest,test=inference;fix count time issue

* test=document_fix; fix readme grammar
---
 .../fluid/inference/tests/infer_ut/README.md  |  37 ++++
 paddle/fluid/inference/tests/infer_ut/run.sh  | 195 ++++++++++--------
 .../inference/tests/infer_ut/test_LeViT.cc    |   8 +-
 .../tests/infer_ut/test_det_mv3_db.cc         |   6 +-
 .../tests/infer_ut/test_ernie_text_cls.cc     |   4 +-
 .../tests/infer_ut/test_ppyolo_mbv3.cc        |   4 +-
 .../tests/infer_ut/test_ppyolov2_r50vd.cc     |   4 +-
 .../inference/tests/infer_ut/test_resnet50.cc |  43 +++-
 .../tests/infer_ut/test_resnet50_quant.cc     |   4 +-
 .../inference/tests/infer_ut/test_suite.h     |  93 +++++++++
 10 files changed, 292 insertions(+), 106 deletions(-)
 create mode 100644 paddle/fluid/inference/tests/infer_ut/README.md

diff --git a/paddle/fluid/inference/tests/infer_ut/README.md b/paddle/fluid/inference/tests/infer_ut/README.md
new file mode 100644
index 0000000000..886c9f1eb1
--- /dev/null
+++ b/paddle/fluid/inference/tests/infer_ut/README.md
@@ -0,0 +1,37 @@
+# Inference Model UT
+
+There are several model tests currently:
+- test_ernie_text_cls.cc
+- test_LeViT.cc
+- test_ppyolo_mbv3.cc
+- test_ppyolov2_r50vd.cc
+- test_resnet50.cc
+- test_resnet50_quant.cc
+- test_yolov3.cc
+
+To build and execute tests on Linux, simply run 
+```
+./run.sh $PADDLE_ROOT $TURN_ON_MKL $TEST_GPU_CPU $DATA_DIR
+```
+To build on windows, run command with busybox
+```
+busybox bash ./run.sh $PADDLE_ROOT $TURN_ON_MKL $TEST_GPU_CPU $DATA_DIR
+```
+
+- After run command, it will build and execute tests and download to ${DATA_DIR} automatically.
+- `$PADDLE_ROOT`: paddle library path
+- `$TURN_ON_MKL`: use MKL or Openblas
+- `$TEST_GPU_CPU`: test both GPU/CPU mode or only CPU mode
+- `$DATA_DIR`: download data path
+
+now only support 4 kinds of tests which controled by `--gtest_filter` argument, test suite name should be same as following. 
+- `TEST(gpu_tester_*, test_name)`
+- `TEST(cpu_tester_*, test_name)`
+- `TEST(mkldnn_tester_*, test_name)`
+- `TEST(tensorrt_tester_*, test_name)`
+
+skpied test suite name.
+- `TEST(DISABLED_gpu_tester_*, test_name)`
+- `TEST(DISABLED_cpu_tester_*, test_name)`
+- `TEST(DISABLED_mkldnn_tester_*, test_name)`
+- `TEST(DISABLED_tensorrt_tester_*, test_name)`
diff --git a/paddle/fluid/inference/tests/infer_ut/run.sh b/paddle/fluid/inference/tests/infer_ut/run.sh
index ec744b358d..1547071e75 100755
--- a/paddle/fluid/inference/tests/infer_ut/run.sh
+++ b/paddle/fluid/inference/tests/infer_ut/run.sh
@@ -24,6 +24,7 @@ MSVC_STATIC_CRT=$6
 inference_install_dir=${PADDLE_ROOT}/build/paddle_inference_install_dir
 EXIT_CODE=0 # init default exit code
 WIN_DETECT=$(echo `uname` | grep "Win") # detect current platform
+test_suite_list="cpu_tester*" # init test suite list, pass to --gtest_filter
 
 export RED='\033[0;31m' # red color
 export NC='\033[0m' # no color
@@ -33,23 +34,30 @@ cd `dirname $0`
 current_dir=`pwd`
 build_dir=${current_dir}/build
 log_dir=${current_dir}/log
+
+# check mkldnn installation
 if [ $2 == ON ]; then
   # You can export yourself if move the install path
   MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib
   export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${MKL_LIB}
+  test_suite_list="${test_suite_list}:mkldnn_tester*"
 fi
+
 if [ $3 == ON ]; then
   use_gpu_list='true false'
+  test_suite_list="${test_suite_list}:gpu_tester*"
 else
   use_gpu_list='false'
 fi
 
+# check tensorrt installation
+TENSORRT_COMPILED=$(cat "${inference_install_dir}/version.txt" | grep "WITH_TENSORRT")
 USE_TENSORRT=OFF
-if [ -d "$TENSORRT_ROOT_DIR" ]; then
+if [ -d "$TENSORRT_ROOT_DIR" ] && [ ! -z "$TENSORRT_COMPILED" ]  ; then
   USE_TENSORRT=ON
+  test_suite_list="${test_suite_list}:tensorrt_tester*"
 fi
 
-
 function download() {
   url_prefix=$1
   model_name=$2
@@ -146,104 +154,119 @@ mkdir -p ${log_dir}
 cd ${build_dir}
 rm -rf *
 
-# ---------tensorrt gpu tests on linux---------
-if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
-    rm -rf *
+if [ $WIN_DETECT != "" ]; then
+    exe_dir=${build_dir}/Release
+else
+    exe_dir=${build_dir}
+fi;
 
-    if [ $WIN_DETECT != "" ]; then
-        exe_dir=${build_dir}/Release
-    else
-        exe_dir=${build_dir}
-    fi;
+printf "${YELLOW} start test_resnet50 ${NC} \n";
+compile_test "test_resnet50"
+${exe_dir}/test_resnet50 \
+    --modeldir=$DATA_DIR/resnet50/resnet50 \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_resnet50.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_resnet50 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi
 
-    printf "${YELLOW} start test_resnet50 ${NC} \n";
-    compile_test "test_resnet50"
-    ${exe_dir}/test_resnet50 \
-        --modeldir=$DATA_DIR/resnet50/resnet50 \
-        --gtest_output=xml:${log_dir}/test_resnet50.xml
-    if [ $? -ne 0 ]; then
-        echo "${RED} test_resnet50 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-        EXIT_CODE=8
-    fi
+printf "${YELLOW} start test_det_mv3_db ${NC} \n";
+compile_test "test_det_mv3_db"
+${exe_dir}/test_det_mv3_db \
+    --modeldir=$DATA_DIR/ocr_det_mv3_db/ocr_det_mv3_db \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_det_mv3_db.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_det_mv3_db runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi
 
-    printf "${YELLOW} start test_det_mv3_db ${NC} \n";
-    compile_test "test_det_mv3_db"
-    ${exe_dir}/test_det_mv3_db \
-        --modeldir=$DATA_DIR/ocr_det_mv3_db/ocr_det_mv3_db \
-        --gtest_output=xml:${log_dir}/test_det_mv3_db.xml
-    if [ $? -ne 0 ]; then
-        echo "${RED} test_det_mv3_db runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-        EXIT_CODE=8
-    fi
+printf "${YELLOW} start test_LeViT ${NC} \n";
+compile_test "test_LeViT"
+${exe_dir}/test_LeViT \
+    --modeldir=$DATA_DIR/LeViT/LeViT \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_LeViT.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_LeViT runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi
 
-    printf "${YELLOW} start test_LeViT ${NC} \n";
-    compile_test "test_LeViT"
-    ${exe_dir}/test_LeViT \
-        --modeldir=$DATA_DIR/LeViT/LeViT \
-        --gtest_output=xml:${log_dir}/test_LeViT.xml
+if [ $WIN_DETECT != "" ]; then
+    #TODO(OliverLPH): enable test_ernie_text_cls on windows after fix compile issue
+    echo "  skip test_ernie_text_cls  "
+else
+    printf "${YELLOW} start test_ernie_text_cls ${NC} \n";
+    compile_test "test_ernie_text_cls"
+    ${exe_dir}/test_ernie_text_cls \
+        --modeldir=$DATA_DIR/ernie_text_cls/ernie_text_cls \
+        --gtest_filter=${test_suite_list} \
+        --gtest_output=xml:${log_dir}/test_ernie_text_cls.xml
     if [ $? -ne 0 ]; then
-        echo "${RED} test_LeViT runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+        echo "${RED} test_ernie_text_cls runs failed ${NC}" >> ${exe_dir}/test_summary.txt
         EXIT_CODE=8
     fi
+fi;
 
-    if [ $WIN_DETECT != "" ]; then
-        echo "  skip test_ernie_text_cls  "
-    else
-        printf "${YELLOW} start test_ernie_text_cls ${NC} \n";
-        compile_test "test_ernie_text_cls"
-        ${exe_dir}/test_ernie_text_cls \
-            --modeldir=$DATA_DIR/ernie_text_cls/ernie_text_cls \
-            --gtest_output=xml:${log_dir}/test_ernie_text_cls.xml
-        if [ $? -ne 0 ]; then
-            echo "${RED} test_ernie_text_cls runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-            EXIT_CODE=8
-        fi
-    fi;
-
-    printf "${YELLOW} start test_yolov3 ${NC} \n";
-    compile_test "test_yolov3"
-    ${exe_dir}/test_yolov3 \
-        --modeldir=$DATA_DIR/yolov3/yolov3 \
-        --gtest_output=xml:${log_dir}/test_yolov3.xml
-    if [ $? -ne 0 ]; then
-        echo "${RED} test_yolov3 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-        EXIT_CODE=8
-    fi
+printf "${YELLOW} start test_yolov3 ${NC} \n";
+compile_test "test_yolov3"
+${exe_dir}/test_yolov3 \
+    --modeldir=$DATA_DIR/yolov3/yolov3 \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_yolov3.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_yolov3 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi
 
-    printf "${YELLOW} start test_ppyolo_mbv3 ${NC} \n";
-    compile_test "test_ppyolo_mbv3"
-    ${exe_dir}/test_ppyolo_mbv3 \
-        --modeldir=$DATA_DIR/ppyolo_mbv3/ppyolo_mbv3 \
-        --gtest_output=xml:${log_dir}/test_ppyolo_mbv3.xml
-    if [ $? -ne 0 ]; then
-        echo "${RED} test_ppyolo_mbv3 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-        EXIT_CODE=8
-    fi
+printf "${YELLOW} start test_ppyolo_mbv3 ${NC} \n";
+compile_test "test_ppyolo_mbv3"
+${exe_dir}/test_ppyolo_mbv3 \
+    --modeldir=$DATA_DIR/ppyolo_mbv3/ppyolo_mbv3 \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_ppyolo_mbv3.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_ppyolo_mbv3 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi
 
-    printf "${YELLOW} start test_ppyolov2_r50vd ${NC} \n";
-    compile_test "test_ppyolov2_r50vd"
-    ${exe_dir}/test_ppyolov2_r50vd \
-        --modeldir=$DATA_DIR/ppyolov2_r50vd/ppyolov2_r50vd \
-        --gtest_output=xml:${log_dir}/test_ppyolov2_r50vd.xml
-    if [ $? -ne 0 ]; then
-        echo "${RED} test_ppyolov2_r50vd runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-        EXIT_CODE=8
-    fi
+printf "${YELLOW} start test_ppyolov2_r50vd ${NC} \n";
+compile_test "test_ppyolov2_r50vd"
+${exe_dir}/test_ppyolov2_r50vd \
+    --modeldir=$DATA_DIR/ppyolov2_r50vd/ppyolov2_r50vd \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_ppyolov2_r50vd.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_ppyolov2_r50vd runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi
 
-    printf "${YELLOW} start test_resnet50_quant ${NC} \n";
-    compile_test "test_resnet50_quant"
-    ${exe_dir}/test_resnet50_quant \
-        --int8dir=$DATA_DIR/resnet50_quant/resnet50_quant/resnet50_quant \
-        --modeldir=$DATA_DIR/resnet50/resnet50 \
-        --datadir=$DATA_DIR/resnet50_quant/resnet50_quant/imagenet-eval-binary/9.data \
-        --gtest_output=xml:${log_dir}/test_resnet50_quant.xml
-    if [ $? -ne 0 ]; then
-        echo "${RED} test_resnet50_quant runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-        EXIT_CODE=8
-    fi
+printf "${YELLOW} start test_resnet50_quant ${NC} \n";
+compile_test "test_resnet50_quant"
+${exe_dir}/test_resnet50_quant \
+    --int8dir=$DATA_DIR/resnet50_quant/resnet50_quant/resnet50_quant \
+    --modeldir=$DATA_DIR/resnet50/resnet50 \
+    --datadir=$DATA_DIR/resnet50_quant/resnet50_quant/imagenet-eval-binary/9.data \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_resnet50_quant.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_resnet50_quant runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
 fi
 
 set +x
+
+test_suites=$(echo ${test_suite_list} | sed 's/:/ /g')
+echo " "
+echo "CI Tested Following Patterns: "
+echo "=====================test patterns======================"
+for test_suite in ${test_suites}; do
+  echo "  ${test_suite}"
+done
+echo "========================================================"
+echo " "
+
 if [[ -f ${exe_dir}/test_summary.txt ]];then
   echo " "
   echo "Summary Failed Tests ..."
diff --git a/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc b/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc
index f115d1f898..a7ff5af1bd 100644
--- a/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc
@@ -32,7 +32,7 @@ paddle::test::Record PrepareInput(int batch_size) {
   return image_Record;
 }
 
-TEST(test_LeViT, analysis_gpu_bz1) {
+TEST(gpu_tester_LeViT, analysis_gpu_bz1) {
   // init input data
   std::map<std::string, paddle::test::Record> my_input_data_map;
   my_input_data_map["x"] = PrepareInput(1);
@@ -60,7 +60,7 @@ TEST(test_LeViT, analysis_gpu_bz1) {
   std::cout << "finish test" << std::endl;
 }
 
-TEST(test_LeViT, trt_fp32_bz2) {
+TEST(tensorrt_tester_LeViT, trt_fp32_bz2) {
   // init input data
   std::map<std::string, paddle::test::Record> my_input_data_map;
   my_input_data_map["x"] = PrepareInput(2);
@@ -91,7 +91,7 @@ TEST(test_LeViT, trt_fp32_bz2) {
   std::cout << "finish test" << std::endl;
 }
 
-TEST(test_LeViT, serial_diff_batch_trt_fp32) {
+TEST(tensorrt_tester_LeViT, serial_diff_batch_trt_fp32) {
   int max_batch_size = 5;
   // prepare groudtruth config
   paddle_infer::Config config, config_no_ir;
@@ -127,7 +127,7 @@ TEST(test_LeViT, serial_diff_batch_trt_fp32) {
   std::cout << "finish test" << std::endl;
 }
 
-TEST(test_LeViT, multi_thread4_trt_fp32_bz2) {
+TEST(tensorrt_tester_LeViT, multi_thread4_trt_fp32_bz2) {
   int thread_num = 4;
   // init input data
   std::map<std::string, paddle::test::Record> my_input_data_map;
diff --git a/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc b/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc
index ce7b8ce463..67c2eeb0be 100644
--- a/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc
@@ -77,7 +77,7 @@ void PrepareDynamicShape(paddle_infer::Config* config, int max_batch_size = 4) {
                                  opt_input_shape);
 }
 
-TEST(test_det_mv3_db, analysis_gpu_bz4) {
+TEST(gpu_tester_det_mv3_db, analysis_gpu_bz4) {
   // init input data
   std::map<std::string, paddle::test::Record> my_input_data_map;
   my_input_data_map["x"] = PrepareInput(4, 640);
@@ -105,7 +105,7 @@ TEST(test_det_mv3_db, analysis_gpu_bz4) {
   std::cout << "finish test" << std::endl;
 }
 
-TEST(test_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) {
+TEST(tensorrt_tester_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) {
   int thread_num = 2;  // thread > 2 may OOM
   // init input data
   std::map<std::string, paddle::test::Record> my_input_data_map;
@@ -149,7 +149,7 @@ TEST(test_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) {
   std::cout << "finish multi-thread test" << std::endl;
 }
 
-TEST(test_det_mv3_db, multi_thread2_mkl_fp32_bz2) {
+TEST(mkldnn_tester_det_mv3_db, multi_thread2_mkl_fp32_bz2) {
   int thread_num = 2;  // thread > 2 may OOM
   // init input data
   std::map<std::string, paddle::test::Record> my_input_data_map;
diff --git a/paddle/fluid/inference/tests/infer_ut/test_ernie_text_cls.cc b/paddle/fluid/inference/tests/infer_ut/test_ernie_text_cls.cc
index f73803fe59..6ef894cc3d 100644
--- a/paddle/fluid/inference/tests/infer_ut/test_ernie_text_cls.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_ernie_text_cls.cc
@@ -52,7 +52,7 @@ std::map<std::string, paddle::test::Record> PrepareInput(int batch_size) {
   return my_input_data_map;
 }
 
-TEST(test_ernie_text_cls, analysis_gpu_bz2_buffer) {
+TEST(gpu_tester_ernie_text_cls, analysis_gpu_bz2_buffer) {
   // init input data
   auto my_input_data_map = PrepareInput(2);
   // init output data
@@ -84,7 +84,7 @@ TEST(test_ernie_text_cls, analysis_gpu_bz2_buffer) {
   std::cout << "finish test" << std::endl;
 }
 
-TEST(test_ernie_text_cls, multi_thread4_mkl_fp32_bz2) {
+TEST(mkldnn_tester_ernie_text_cls, multi_thread4_mkl_fp32_bz2) {
   int thread_num = 4;
   // init input data
   auto my_input_data_map = PrepareInput(2);
diff --git a/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc b/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc
index d845e5da15..0a24975d62 100644
--- a/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc
@@ -55,7 +55,7 @@ std::map<std::string, paddle::test::Record> PrepareInput(int batch_size) {
   return input_data_map;
 }
 
-TEST(test_ppyolo_mbv3, multi_thread4_trt_fp32_bz2) {
+TEST(tensorrt_tester_ppyolo_mbv3, multi_thread4_trt_fp32_bz2) {
   int thread_num = 4;
   // init input data
   auto input_data_map = PrepareInput(2);
@@ -101,7 +101,7 @@ TEST(test_ppyolo_mbv3, multi_thread4_trt_fp32_bz2) {
   std::cout << "finish multi-thread test" << std::endl;
 }
 
-TEST(test_ppyolo_mbv3, multi_thread4_mkl_bz2) {
+TEST(mkldnn_tester_ppyolo_mbv3, multi_thread4_mkl_bz2) {
   // TODO(OliverLPH): mkldnn multi thread will fail
   int thread_num = 4;
   // init input data
diff --git a/paddle/fluid/inference/tests/infer_ut/test_ppyolov2_r50vd.cc b/paddle/fluid/inference/tests/infer_ut/test_ppyolov2_r50vd.cc
index b2cb4ca322..d74a333232 100644
--- a/paddle/fluid/inference/tests/infer_ut/test_ppyolov2_r50vd.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_ppyolov2_r50vd.cc
@@ -55,7 +55,7 @@ std::map<std::string, paddle::test::Record> PrepareInput(int batch_size) {
   return input_data_map;
 }
 
-TEST(test_ppyolov2_r50vd, multi_thread2_trt_fp32_bz1) {
+TEST(tensorrt_tester_ppyolov2_r50vd, multi_thread2_trt_fp32_bz1) {
   int thread_num = 2;  // thread > 2 may OOM
   // init input data
   auto input_data_map = PrepareInput(1);
@@ -100,7 +100,7 @@ TEST(test_ppyolov2_r50vd, multi_thread2_trt_fp32_bz1) {
   std::cout << "finish multi-thread test" << std::endl;
 }
 
-TEST(test_ppyolov2_r50vd, multi_thread2_mkl_bz2) {
+TEST(mkldnn_tester_ppyolov2_r50vd, multi_thread2_mkl_bz2) {
   int thread_num = 2;
   // init input data
   auto input_data_map = PrepareInput(2);
diff --git a/paddle/fluid/inference/tests/infer_ut/test_resnet50.cc b/paddle/fluid/inference/tests/infer_ut/test_resnet50.cc
index 035bc3f34f..6157fdbdb1 100644
--- a/paddle/fluid/inference/tests/infer_ut/test_resnet50.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_resnet50.cc
@@ -32,7 +32,7 @@ paddle::test::Record PrepareInput(int batch_size) {
   return image_Record;
 }
 
-TEST(test_resnet50, analysis_gpu_bz1) {
+TEST(gpu_tester_resnet50, analysis_gpu_bz1) {
   // init input data
   std::map<std::string, paddle::test::Record> my_input_data_map;
   my_input_data_map["inputs"] = PrepareInput(1);
@@ -60,7 +60,7 @@ TEST(test_resnet50, analysis_gpu_bz1) {
   std::cout << "finish test" << std::endl;
 }
 
-TEST(test_resnet50, trt_fp32_bz2) {
+TEST(tensorrt_tester_resnet50, trt_fp32_bz2) {
   // init input data
   std::map<std::string, paddle::test::Record> my_input_data_map;
   my_input_data_map["inputs"] = PrepareInput(2);
@@ -91,7 +91,7 @@ TEST(test_resnet50, trt_fp32_bz2) {
   std::cout << "finish test" << std::endl;
 }
 
-TEST(test_resnet50, serial_diff_batch_trt_fp32) {
+TEST(tensorrt_tester_resnet50, serial_diff_batch_trt_fp32) {
   int max_batch_size = 5;
   // prepare groudtruth config
   paddle_infer::Config config, config_no_ir;
@@ -127,7 +127,7 @@ TEST(test_resnet50, serial_diff_batch_trt_fp32) {
   std::cout << "finish test" << std::endl;
 }
 
-TEST(test_resnet50, multi_thread4_trt_fp32_bz2) {
+TEST(tensorrt_tester_resnet50, multi_thread4_trt_fp32_bz2) {
   int thread_num = 4;
   // init input data
   std::map<std::string, paddle::test::Record> my_input_data_map;
@@ -170,7 +170,7 @@ TEST(test_resnet50, multi_thread4_trt_fp32_bz2) {
   std::cout << "finish multi-thread test" << std::endl;
 }
 
-TEST(test_resnet50, trt_int8_bz2) {
+TEST(tensorrt_tester_resnet50, trt_int8_bz2) {
   // init input data
   std::map<std::string, paddle::test::Record> my_input_data_map;
   my_input_data_map["inputs"] = PrepareInput(2);
@@ -199,6 +199,39 @@ TEST(test_resnet50, trt_int8_bz2) {
   std::cout << "finish test" << std::endl;
 }
 
+TEST(DISABLED_tensorrt_tester_resnet50, profile_multi_thread_trt_fp32) {
+  int batch_size = 2;
+  int thread_num = 4;
+  int repeat_time = 1000;
+  // init input data
+  std::map<std::string, paddle::test::Record> my_input_data_map;
+  my_input_data_map["inputs"] = PrepareInput(batch_size);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data;
+  // prepare inference config
+  paddle_infer::Config config;
+  config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                  FLAGS_modeldir + "/inference.pdiparams");
+  config.EnableUseGpu(100, 0);
+  config.EnableTensorRtEngine(
+      1 << 20, 2, 3, paddle_infer::PrecisionType::kFloat32, false, false);
+  // get infer results from multi threads
+  services::PredictorPool pred_pool(config, thread_num);
+  std::vector<std::future<double>> calcs;
+  for (int i = 0; i < thread_num; ++i) {
+    calcs.push_back(std::async(&paddle::test::SingleThreadProfile,
+                               pred_pool.Retrive(i), &my_input_data_map,
+                               repeat_time));
+  }
+  double total_time_ = 0.0;
+  for (auto&& fut : calcs) {
+    total_time_ += fut.get();
+  }
+  std::cout << total_time_ << std::endl;
+
+  std::cout << "finish multi-thread profile" << std::endl;
+}
+
 }  // namespace paddle_infer
 
 int main(int argc, char** argv) {
diff --git a/paddle/fluid/inference/tests/infer_ut/test_resnet50_quant.cc b/paddle/fluid/inference/tests/infer_ut/test_resnet50_quant.cc
index bc33c817b3..ed7ab7b5ee 100644
--- a/paddle/fluid/inference/tests/infer_ut/test_resnet50_quant.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_resnet50_quant.cc
@@ -52,7 +52,7 @@ paddle::test::Record PrepareInput(int batch_size) {
   return image_Record;
 }
 
-TEST(DISABLED_test_resnet50_quant, multi_thread4_trt_int8_bz1) {
+TEST(DISABLED_tensorrt_tester_resnet50_quant, multi_thread4_trt_int8_bz1) {
   int thread_num = 4;
   // init input data
   std::map<std::string, paddle::test::Record> input_data_map;
@@ -94,7 +94,7 @@ TEST(DISABLED_test_resnet50_quant, multi_thread4_trt_int8_bz1) {
   std::cout << "finish test" << std::endl;
 }
 
-TEST(DISABLED_test_resnet50_quant, multi_thread_multi_instance) {
+TEST(DISABLED_tensorrt_tester_resnet50_quant, multi_thread_multi_instance) {
   int thread_num = 4;
   // init input data
   std::map<std::string, paddle::test::Record> input_data_fp32, input_data_quant;
diff --git a/paddle/fluid/inference/tests/infer_ut/test_suite.h b/paddle/fluid/inference/tests/infer_ut/test_suite.h
index b2546b180b..0b580cd7c7 100644
--- a/paddle/fluid/inference/tests/infer_ut/test_suite.h
+++ b/paddle/fluid/inference/tests/infer_ut/test_suite.h
@@ -15,6 +15,7 @@
 #include <math.h>
 #include <algorithm>
 #include <fstream>
+#include <future>
 #include <iostream>
 #include <numeric>
 #include <string>
@@ -148,5 +149,97 @@ void CompareRecord(std::map<std::string, Record> *truth_output_data,
   }
 }
 
+// Timer, count in ms
+class Timer {
+ public:
+  Timer() { reset(); }
+  void start() { start_t = std::chrono::high_resolution_clock::now(); }
+  void stop() {
+    auto end_t = std::chrono::high_resolution_clock::now();
+    typedef std::chrono::microseconds ms;
+    auto diff = end_t - start_t;
+    ms counter = std::chrono::duration_cast<ms>(diff);
+    total_time += counter.count();
+  }
+  void reset() { total_time = 0.; }
+  double report() { return total_time / 1000.0; }
+
+ private:
+  double total_time;
+  std::chrono::high_resolution_clock::time_point start_t;
+};
+
+// single thread inference benchmark, return double time in ms
+double SingleThreadProfile(paddle_infer::Predictor *predictor,
+                           std::map<std::string, Record> *input_data_map,
+                           int repeat_times = 2) {
+  // prepare input tensor
+  auto input_names = predictor->GetInputNames();
+  for (const auto & [ key, value ] : *input_data_map) {
+    switch (value.type) {
+      case paddle::PaddleDType::INT64: {
+        std::vector<int64_t> input_value =
+            std::vector<int64_t>(value.data.begin(), value.data.end());
+        auto input_tensor = predictor->GetInputHandle(key);
+        input_tensor->Reshape(value.shape);
+        input_tensor->CopyFromCpu(input_value.data());
+        break;
+      }
+      case paddle::PaddleDType::INT32: {
+        std::vector<int32_t> input_value =
+            std::vector<int32_t>(value.data.begin(), value.data.end());
+        auto input_tensor = predictor->GetInputHandle(key);
+        input_tensor->Reshape(value.shape);
+        input_tensor->CopyFromCpu(input_value.data());
+        break;
+      }
+      case paddle::PaddleDType::FLOAT32: {
+        std::vector<float> input_value =
+            std::vector<float>(value.data.begin(), value.data.end());
+        auto input_tensor = predictor->GetInputHandle(key);
+        input_tensor->Reshape(value.shape);
+        input_tensor->CopyFromCpu(input_value.data());
+        break;
+      }
+    }
+  }
+
+  Timer timer;  // init prediction timer
+  timer.start();
+  // inference
+  for (size_t i = 0; i < repeat_times; ++i) {
+    CHECK(predictor->Run());
+    auto output_names = predictor->GetOutputNames();
+    for (auto &output_name : output_names) {
+      auto output_tensor = predictor->GetOutputHandle(output_name);
+      std::vector<int> output_shape = output_tensor->shape();
+      int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
+                                    std::multiplies<int>());
+      switch (output_tensor->type()) {
+        case paddle::PaddleDType::INT64: {
+          std::vector<int64_t> out_data;
+          out_data.resize(out_num);
+          output_tensor->CopyToCpu(out_data.data());
+          break;
+        }
+        case paddle::PaddleDType::FLOAT32: {
+          std::vector<float> out_data;
+          out_data.resize(out_num);
+          output_tensor->CopyToCpu(out_data.data());
+          break;
+        }
+        case paddle::PaddleDType::INT32: {
+          std::vector<int32_t> out_data;
+          out_data.resize(out_num);
+          output_tensor->CopyToCpu(out_data.data());
+          break;
+        }
+      }
+    }
+  }
+  timer.stop();
+  return timer.report();
+}
+
 }  // namespace test
 }  // namespace paddle
-- 
GitLab