support test different infer_ut suite type (#35435)

* notest,test=inference;support test different suite type * notest,test=inference;fix script bugs * notest,test=inference;fix count time issue * test=document_fix; fix readme grammar

support test different infer_ut suite type (#35435)
* notest,test=inference;support test different suite type * notest,test=inference;fix script bugs * notest,test=inference;fix count time issue * test=document_fix; fix readme grammar
5bb12853 · Peihan · GitHub · 3c8eeb5d · 5bb12853 · 5bb12853
10 changed file
--- a/paddle/fluid/inference/tests/infer_ut/README.md
+++ b/paddle/fluid/inference/tests/infer_ut/README.md
+# Inference Model UT
+
+There are several model tests currently:
+- test_ernie_text_cls.cc
+- test_LeViT.cc
+- test_ppyolo_mbv3.cc
+- test_ppyolov2_r50vd.cc
+- test_resnet50.cc
+- test_resnet50_quant.cc
+- test_yolov3.cc
+
+To build and execute tests on Linux, simply run 
+```
+./run.sh $PADDLE_ROOT $TURN_ON_MKL $TEST_GPU_CPU $DATA_DIR
+```
+To build on windows, run command with busybox
+```
+busybox bash ./run.sh $PADDLE_ROOT $TURN_ON_MKL $TEST_GPU_CPU $DATA_DIR
+```
+
+- After run command, it will build and execute tests and download to ${DATA_DIR} automatically.
+- `$PADDLE_ROOT`: paddle library path
+- `$TURN_ON_MKL`: use MKL or Openblas
+- `$TEST_GPU_CPU`: test both GPU/CPU mode or only CPU mode
+- `$DATA_DIR`: download data path
+
+now only support 4 kinds of tests which controled by `--gtest_filter` argument, test suite name should be same as following. 
+- `TEST(gpu_tester_*, test_name)`
+- `TEST(cpu_tester_*, test_name)`
+- `TEST(mkldnn_tester_*, test_name)`
+- `TEST(tensorrt_tester_*, test_name)`
+
+skpied test suite name.
+- `TEST(DISABLED_gpu_tester_*, test_name)`
+- `TEST(DISABLED_cpu_tester_*, test_name)`
+- `TEST(DISABLED_mkldnn_tester_*, test_name)`
+- `TEST(DISABLED_tensorrt_tester_*, test_name)`
--- a/paddle/fluid/inference/tests/infer_ut/run.sh
+++ b/paddle/fluid/inference/tests/infer_ut/run.sh
@@ -24,6 +24,7 @@ MSVC_STATIC_CRT=$6
 inference_install_dir=${PADDLE_ROOT}/build/paddle_inference_install_dir
 EXIT_CODE=0 # init default exit code
 WIN_DETECT=$(echo `uname` | grep "Win") # detect current platform
+test_suite_list="cpu_tester*" # init test suite list, pass to --gtest_filter

 export RED='\033[0;31m' # red color
 export NC='\033[0m' # no color
@@ -33,23 +34,30 @@ cd `dirname $0`
 current_dir=`pwd`
 build_dir=${current_dir}/build
 log_dir=${current_dir}/log
+
+# check mkldnn installation
 if [ $2 == ON ]; then
  # You can export yourself if move the install path
  MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${MKL_LIB}
+  test_suite_list="${test_suite_list}:mkldnn_tester*"
 fi
+
 if [ $3 == ON ]; then
  use_gpu_list='true false'
+  test_suite_list="${test_suite_list}:gpu_tester*"
 else
  use_gpu_list='false'
 fi

+# check tensorrt installation
+TENSORRT_COMPILED=$(cat "${inference_install_dir}/version.txt" | grep "WITH_TENSORRT")
 USE_TENSORRT=OFF
-if [ -d "$TENSORRT_ROOT_DIR" ]; then
+if [ -d "$TENSORRT_ROOT_DIR" ] && [ ! -z "$TENSORRT_COMPILED" ]  ; then
  USE_TENSORRT=ON
+  test_suite_list="${test_suite_list}:tensorrt_tester*"
 fi

-
 function download() {
  url_prefix=$1
  model_name=$2
@@ -146,104 +154,119 @@ mkdir -p ${log_dir}
 cd ${build_dir}
 rm -rf *

-# ---------tensorrt gpu tests on linux---------
-if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
-    rm -rf *
+if [ $WIN_DETECT != "" ]; then
+    exe_dir=${build_dir}/Release
+else
+    exe_dir=${build_dir}
+fi;

-    if [ $WIN_DETECT != "" ]; then
-        exe_dir=${build_dir}/Release
-    else
-        exe_dir=${build_dir}
-    fi;
+printf "${YELLOW} start test_resnet50 ${NC} \n";
+compile_test "test_resnet50"
+${exe_dir}/test_resnet50 \
+    --modeldir=$DATA_DIR/resnet50/resnet50 \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_resnet50.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_resnet50 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi

-    printf "${YELLOW} start test_resnet50 ${NC} \n";
-    compile_test "test_resnet50"
-    ${exe_dir}/test_resnet50 \
-        --modeldir=$DATA_DIR/resnet50/resnet50 \
-        --gtest_output=xml:${log_dir}/test_resnet50.xml
-    if [ $? -ne 0 ]; then
-        echo "${RED} test_resnet50 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-        EXIT_CODE=8
-    fi
+printf "${YELLOW} start test_det_mv3_db ${NC} \n";
+compile_test "test_det_mv3_db"
+${exe_dir}/test_det_mv3_db \
+    --modeldir=$DATA_DIR/ocr_det_mv3_db/ocr_det_mv3_db \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_det_mv3_db.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_det_mv3_db runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi

-    printf "${YELLOW} start test_det_mv3_db ${NC} \n";
-    compile_test "test_det_mv3_db"
-    ${exe_dir}/test_det_mv3_db \
-        --modeldir=$DATA_DIR/ocr_det_mv3_db/ocr_det_mv3_db \
-        --gtest_output=xml:${log_dir}/test_det_mv3_db.xml
-    if [ $? -ne 0 ]; then
-        echo "${RED} test_det_mv3_db runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-        EXIT_CODE=8
-    fi
+printf "${YELLOW} start test_LeViT ${NC} \n";
+compile_test "test_LeViT"
+${exe_dir}/test_LeViT \
+    --modeldir=$DATA_DIR/LeViT/LeViT \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_LeViT.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_LeViT runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi

-    printf "${YELLOW} start test_LeViT ${NC} \n";
-    compile_test "test_LeViT"
-    ${exe_dir}/test_LeViT \
-        --modeldir=$DATA_DIR/LeViT/LeViT \
-        --gtest_output=xml:${log_dir}/test_LeViT.xml
+if [ $WIN_DETECT != "" ]; then
+    #TODO(OliverLPH): enable test_ernie_text_cls on windows after fix compile issue
+    echo "  skip test_ernie_text_cls  "
+else
+    printf "${YELLOW} start test_ernie_text_cls ${NC} \n";
+    compile_test "test_ernie_text_cls"
+    ${exe_dir}/test_ernie_text_cls \
+        --modeldir=$DATA_DIR/ernie_text_cls/ernie_text_cls \
+        --gtest_filter=${test_suite_list} \
+        --gtest_output=xml:${log_dir}/test_ernie_text_cls.xml
    if [ $? -ne 0 ]; then
-        echo "${RED} test_LeViT runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+        echo "${RED} test_ernie_text_cls runs failed ${NC}" >> ${exe_dir}/test_summary.txt
        EXIT_CODE=8
    fi
+fi;

-    if [ $WIN_DETECT != "" ]; then
-        echo "  skip test_ernie_text_cls  "
-    else
-        printf "${YELLOW} start test_ernie_text_cls ${NC} \n";
-        compile_test "test_ernie_text_cls"
-        ${exe_dir}/test_ernie_text_cls \
-            --modeldir=$DATA_DIR/ernie_text_cls/ernie_text_cls \
-            --gtest_output=xml:${log_dir}/test_ernie_text_cls.xml
-        if [ $? -ne 0 ]; then
-            echo "${RED} test_ernie_text_cls runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-            EXIT_CODE=8
-        fi
-    fi;
-
-    printf "${YELLOW} start test_yolov3 ${NC} \n";
-    compile_test "test_yolov3"
-    ${exe_dir}/test_yolov3 \
-        --modeldir=$DATA_DIR/yolov3/yolov3 \
-        --gtest_output=xml:${log_dir}/test_yolov3.xml
-    if [ $? -ne 0 ]; then
-        echo "${RED} test_yolov3 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-        EXIT_CODE=8
-    fi
+printf "${YELLOW} start test_yolov3 ${NC} \n";
+compile_test "test_yolov3"
+${exe_dir}/test_yolov3 \
+    --modeldir=$DATA_DIR/yolov3/yolov3 \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_yolov3.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_yolov3 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi

-    printf "${YELLOW} start test_ppyolo_mbv3 ${NC} \n";
-    compile_test "test_ppyolo_mbv3"
-    ${exe_dir}/test_ppyolo_mbv3 \
-        --modeldir=$DATA_DIR/ppyolo_mbv3/ppyolo_mbv3 \
-        --gtest_output=xml:${log_dir}/test_ppyolo_mbv3.xml
-    if [ $? -ne 0 ]; then
-        echo "${RED} test_ppyolo_mbv3 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-        EXIT_CODE=8
-    fi
+printf "${YELLOW} start test_ppyolo_mbv3 ${NC} \n";
+compile_test "test_ppyolo_mbv3"
+${exe_dir}/test_ppyolo_mbv3 \
+    --modeldir=$DATA_DIR/ppyolo_mbv3/ppyolo_mbv3 \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_ppyolo_mbv3.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_ppyolo_mbv3 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi

-    printf "${YELLOW} start test_ppyolov2_r50vd ${NC} \n";
-    compile_test "test_ppyolov2_r50vd"
-    ${exe_dir}/test_ppyolov2_r50vd \
-        --modeldir=$DATA_DIR/ppyolov2_r50vd/ppyolov2_r50vd \
-        --gtest_output=xml:${log_dir}/test_ppyolov2_r50vd.xml
-    if [ $? -ne 0 ]; then
-        echo "${RED} test_ppyolov2_r50vd runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-        EXIT_CODE=8
-    fi
+printf "${YELLOW} start test_ppyolov2_r50vd ${NC} \n";
+compile_test "test_ppyolov2_r50vd"
+${exe_dir}/test_ppyolov2_r50vd \
+    --modeldir=$DATA_DIR/ppyolov2_r50vd/ppyolov2_r50vd \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_ppyolov2_r50vd.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_ppyolov2_r50vd runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi

-    printf "${YELLOW} start test_resnet50_quant ${NC} \n";
-    compile_test "test_resnet50_quant"
-    ${exe_dir}/test_resnet50_quant \
-        --int8dir=$DATA_DIR/resnet50_quant/resnet50_quant/resnet50_quant \
-        --modeldir=$DATA_DIR/resnet50/resnet50 \
-        --datadir=$DATA_DIR/resnet50_quant/resnet50_quant/imagenet-eval-binary/9.data \
-        --gtest_output=xml:${log_dir}/test_resnet50_quant.xml
-    if [ $? -ne 0 ]; then
-        echo "${RED} test_resnet50_quant runs failed ${NC}" >> ${exe_dir}/test_summary.txt
-        EXIT_CODE=8
-    fi
+printf "${YELLOW} start test_resnet50_quant ${NC} \n";
+compile_test "test_resnet50_quant"
+${exe_dir}/test_resnet50_quant \
+    --int8dir=$DATA_DIR/resnet50_quant/resnet50_quant/resnet50_quant \
+    --modeldir=$DATA_DIR/resnet50/resnet50 \
+    --datadir=$DATA_DIR/resnet50_quant/resnet50_quant/imagenet-eval-binary/9.data \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_resnet50_quant.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_resnet50_quant runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
 fi

 set +x
+
+test_suites=$(echo ${test_suite_list} | sed 's/:/ /g')
+echo " "
+echo "CI Tested Following Patterns: "
+echo "=====================test patterns======================"
+for test_suite in ${test_suites}; do
+  echo "  ${test_suite}"
+done
+echo "========================================================"
+echo " "
+
 if [[ -f ${exe_dir}/test_summary.txt ]];then
  echo " "
  echo "Summary Failed Tests ..."

--- a/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc
@@ -32,7 +32,7 @@ paddle::test::Record PrepareInput(int batch_size) {
  return image_Record;
 }

-TEST(test_LeViT, analysis_gpu_bz1) {
+TEST(gpu_tester_LeViT, analysis_gpu_bz1) {
  // init input data
  std::map<std::string, paddle::test::Record> my_input_data_map;
  my_input_data_map["x"] = PrepareInput(1);
@@ -60,7 +60,7 @@ TEST(test_LeViT, analysis_gpu_bz1) {
  std::cout << "finish test" << std::endl;
 }

-TEST(test_LeViT, trt_fp32_bz2) {
+TEST(tensorrt_tester_LeViT, trt_fp32_bz2) {
  // init input data
  std::map<std::string, paddle::test::Record> my_input_data_map;
  my_input_data_map["x"] = PrepareInput(2);
@@ -91,7 +91,7 @@ TEST(test_LeViT, trt_fp32_bz2) {
  std::cout << "finish test" << std::endl;
 }

-TEST(test_LeViT, serial_diff_batch_trt_fp32) {
+TEST(tensorrt_tester_LeViT, serial_diff_batch_trt_fp32) {
  int max_batch_size = 5;
  // prepare groudtruth config
  paddle_infer::Config config, config_no_ir;
@@ -127,7 +127,7 @@ TEST(test_LeViT, serial_diff_batch_trt_fp32) {
  std::cout << "finish test" << std::endl;
 }

-TEST(test_LeViT, multi_thread4_trt_fp32_bz2) {
+TEST(tensorrt_tester_LeViT, multi_thread4_trt_fp32_bz2) {
  int thread_num = 4;
  // init input data
  std::map<std::string, paddle::test::Record> my_input_data_map;

--- a/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc
@@ -77,7 +77,7 @@ void PrepareDynamicShape(paddle_infer::Config* config, int max_batch_size = 4) {
                                 opt_input_shape);
 }

-TEST(test_det_mv3_db, analysis_gpu_bz4) {
+TEST(gpu_tester_det_mv3_db, analysis_gpu_bz4) {
  // init input data
  std::map<std::string, paddle::test::Record> my_input_data_map;
  my_input_data_map["x"] = PrepareInput(4, 640);
@@ -105,7 +105,7 @@ TEST(test_det_mv3_db, analysis_gpu_bz4) {
  std::cout << "finish test" << std::endl;
 }

-TEST(test_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) {
+TEST(tensorrt_tester_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) {
  int thread_num = 2;  // thread > 2 may OOM
  // init input data
  std::map<std::string, paddle::test::Record> my_input_data_map;
@@ -149,7 +149,7 @@ TEST(test_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) {
  std::cout << "finish multi-thread test" << std::endl;
 }

-TEST(test_det_mv3_db, multi_thread2_mkl_fp32_bz2) {
+TEST(mkldnn_tester_det_mv3_db, multi_thread2_mkl_fp32_bz2) {
  int thread_num = 2;  // thread > 2 may OOM
  // init input data
  std::map<std::string, paddle::test::Record> my_input_data_map;

--- a/paddle/fluid/inference/tests/infer_ut/test_ernie_text_cls.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_ernie_text_cls.cc
@@ -52,7 +52,7 @@ std::map<std::string, paddle::test::Record> PrepareInput(int batch_size) {
  return my_input_data_map;
 }

-TEST(test_ernie_text_cls, analysis_gpu_bz2_buffer) {
+TEST(gpu_tester_ernie_text_cls, analysis_gpu_bz2_buffer) {
  // init input data
  auto my_input_data_map = PrepareInput(2);
  // init output data
@@ -84,7 +84,7 @@ TEST(test_ernie_text_cls, analysis_gpu_bz2_buffer) {
  std::cout << "finish test" << std::endl;
 }

-TEST(test_ernie_text_cls, multi_thread4_mkl_fp32_bz2) {
+TEST(mkldnn_tester_ernie_text_cls, multi_thread4_mkl_fp32_bz2) {
  int thread_num = 4;
  // init input data
  auto my_input_data_map = PrepareInput(2);

--- a/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc
@@ -55,7 +55,7 @@ std::map<std::string, paddle::test::Record> PrepareInput(int batch_size) {
  return input_data_map;
 }

-TEST(test_ppyolo_mbv3, multi_thread4_trt_fp32_bz2) {
+TEST(tensorrt_tester_ppyolo_mbv3, multi_thread4_trt_fp32_bz2) {
  int thread_num = 4;
  // init input data
  auto input_data_map = PrepareInput(2);
@@ -101,7 +101,7 @@ TEST(test_ppyolo_mbv3, multi_thread4_trt_fp32_bz2) {
  std::cout << "finish multi-thread test" << std::endl;
 }

-TEST(test_ppyolo_mbv3, multi_thread4_mkl_bz2) {
+TEST(mkldnn_tester_ppyolo_mbv3, multi_thread4_mkl_bz2) {
  // TODO(OliverLPH): mkldnn multi thread will fail
  int thread_num = 4;
  // init input data

--- a/paddle/fluid/inference/tests/infer_ut/test_ppyolov2_r50vd.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_ppyolov2_r50vd.cc
@@ -55,7 +55,7 @@ std::map<std::string, paddle::test::Record> PrepareInput(int batch_size) {
  return input_data_map;
 }

-TEST(test_ppyolov2_r50vd, multi_thread2_trt_fp32_bz1) {
+TEST(tensorrt_tester_ppyolov2_r50vd, multi_thread2_trt_fp32_bz1) {
  int thread_num = 2;  // thread > 2 may OOM
  // init input data
  auto input_data_map = PrepareInput(1);
@@ -100,7 +100,7 @@ TEST(test_ppyolov2_r50vd, multi_thread2_trt_fp32_bz1) {
  std::cout << "finish multi-thread test" << std::endl;
 }

-TEST(test_ppyolov2_r50vd, multi_thread2_mkl_bz2) {
+TEST(mkldnn_tester_ppyolov2_r50vd, multi_thread2_mkl_bz2) {
  int thread_num = 2;
  // init input data
  auto input_data_map = PrepareInput(2);

--- a/paddle/fluid/inference/tests/infer_ut/test_resnet50.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_resnet50.cc
@@ -32,7 +32,7 @@ paddle::test::Record PrepareInput(int batch_size) {
  return image_Record;
 }

-TEST(test_resnet50, analysis_gpu_bz1) {
+TEST(gpu_tester_resnet50, analysis_gpu_bz1) {
  // init input data
  std::map<std::string, paddle::test::Record> my_input_data_map;
  my_input_data_map["inputs"] = PrepareInput(1);
@@ -60,7 +60,7 @@ TEST(test_resnet50, analysis_gpu_bz1) {
  std::cout << "finish test" << std::endl;
 }

-TEST(test_resnet50, trt_fp32_bz2) {
+TEST(tensorrt_tester_resnet50, trt_fp32_bz2) {
  // init input data
  std::map<std::string, paddle::test::Record> my_input_data_map;
  my_input_data_map["inputs"] = PrepareInput(2);
@@ -91,7 +91,7 @@ TEST(test_resnet50, trt_fp32_bz2) {
  std::cout << "finish test" << std::endl;
 }

-TEST(test_resnet50, serial_diff_batch_trt_fp32) {
+TEST(tensorrt_tester_resnet50, serial_diff_batch_trt_fp32) {
  int max_batch_size = 5;
  // prepare groudtruth config
  paddle_infer::Config config, config_no_ir;
@@ -127,7 +127,7 @@ TEST(test_resnet50, serial_diff_batch_trt_fp32) {
  std::cout << "finish test" << std::endl;
 }

-TEST(test_resnet50, multi_thread4_trt_fp32_bz2) {
+TEST(tensorrt_tester_resnet50, multi_thread4_trt_fp32_bz2) {
  int thread_num = 4;
  // init input data
  std::map<std::string, paddle::test::Record> my_input_data_map;
@@ -170,7 +170,7 @@ TEST(test_resnet50, multi_thread4_trt_fp32_bz2) {
  std::cout << "finish multi-thread test" << std::endl;
 }

-TEST(test_resnet50, trt_int8_bz2) {
+TEST(tensorrt_tester_resnet50, trt_int8_bz2) {
  // init input data
  std::map<std::string, paddle::test::Record> my_input_data_map;
  my_input_data_map["inputs"] = PrepareInput(2);
@@ -199,6 +199,39 @@ TEST(test_resnet50, trt_int8_bz2) {
  std::cout << "finish test" << std::endl;
 }

+TEST(DISABLED_tensorrt_tester_resnet50, profile_multi_thread_trt_fp32) {
+  int batch_size = 2;
+  int thread_num = 4;
+  int repeat_time = 1000;
+  // init input data
+  std::map<std::string, paddle::test::Record> my_input_data_map;
+  my_input_data_map["inputs"] = PrepareInput(batch_size);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data;
+  // prepare inference config
+  paddle_infer::Config config;
+  config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                  FLAGS_modeldir + "/inference.pdiparams");
+  config.EnableUseGpu(100, 0);
+  config.EnableTensorRtEngine(
+      1 << 20, 2, 3, paddle_infer::PrecisionType::kFloat32, false, false);
+  // get infer results from multi threads
+  services::PredictorPool pred_pool(config, thread_num);
+  std::vector<std::future<double>> calcs;
+  for (int i = 0; i < thread_num; ++i) {
+    calcs.push_back(std::async(&paddle::test::SingleThreadProfile,
+                               pred_pool.Retrive(i), &my_input_data_map,
+                               repeat_time));
+  }
+  double total_time_ = 0.0;
+  for (auto&& fut : calcs) {
+    total_time_ += fut.get();
+  }
+  std::cout << total_time_ << std::endl;
+
+  std::cout << "finish multi-thread profile" << std::endl;
+}
+
 }  // namespace paddle_infer

 int main(int argc, char** argv) {

--- a/paddle/fluid/inference/tests/infer_ut/test_resnet50_quant.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_resnet50_quant.cc
@@ -52,7 +52,7 @@ paddle::test::Record PrepareInput(int batch_size) {
  return image_Record;
 }

-TEST(DISABLED_test_resnet50_quant, multi_thread4_trt_int8_bz1) {
+TEST(DISABLED_tensorrt_tester_resnet50_quant, multi_thread4_trt_int8_bz1) {
  int thread_num = 4;
  // init input data
  std::map<std::string, paddle::test::Record> input_data_map;
@@ -94,7 +94,7 @@ TEST(DISABLED_test_resnet50_quant, multi_thread4_trt_int8_bz1) {
  std::cout << "finish test" << std::endl;
 }

-TEST(DISABLED_test_resnet50_quant, multi_thread_multi_instance) {
+TEST(DISABLED_tensorrt_tester_resnet50_quant, multi_thread_multi_instance) {
  int thread_num = 4;
  // init input data
  std::map<std::string, paddle::test::Record> input_data_fp32, input_data_quant;

--- a/paddle/fluid/inference/tests/infer_ut/test_suite.h
+++ b/paddle/fluid/inference/tests/infer_ut/test_suite.h
@@ -15,6 +15,7 @@
 #include <math.h>
 #include <algorithm>
 #include <fstream>
+#include <future>
 #include <iostream>
 #include <numeric>
 #include <string>
@@ -148,5 +149,97 @@ void CompareRecord(std::map<std::string, Record> *truth_output_data,
  }
 }

+// Timer, count in ms
+class Timer {
+ public:
+  Timer() { reset(); }
+  void start() { start_t = std::chrono::high_resolution_clock::now(); }
+  void stop() {
+    auto end_t = std::chrono::high_resolution_clock::now();
+    typedef std::chrono::microseconds ms;
+    auto diff = end_t - start_t;
+    ms counter = std::chrono::duration_cast<ms>(diff);
+    total_time += counter.count();
+  }
+  void reset() { total_time = 0.; }
+  double report() { return total_time / 1000.0; }
+
+ private:
+  double total_time;
+  std::chrono::high_resolution_clock::time_point start_t;
+};
+
+// single thread inference benchmark, return double time in ms
+double SingleThreadProfile(paddle_infer::Predictor *predictor,
+                           std::map<std::string, Record> *input_data_map,
+                           int repeat_times = 2) {
+  // prepare input tensor
+  auto input_names = predictor->GetInputNames();
+  for (const auto & [ key, value ] : *input_data_map) {
+    switch (value.type) {
+      case paddle::PaddleDType::INT64: {
+        std::vector<int64_t> input_value =
+            std::vector<int64_t>(value.data.begin(), value.data.end());
+        auto input_tensor = predictor->GetInputHandle(key);
+        input_tensor->Reshape(value.shape);
+        input_tensor->CopyFromCpu(input_value.data());
+        break;
+      }
+      case paddle::PaddleDType::INT32: {
+        std::vector<int32_t> input_value =
+            std::vector<int32_t>(value.data.begin(), value.data.end());
+        auto input_tensor = predictor->GetInputHandle(key);
+        input_tensor->Reshape(value.shape);
+        input_tensor->CopyFromCpu(input_value.data());
+        break;
+      }
+      case paddle::PaddleDType::FLOAT32: {
+        std::vector<float> input_value =
+            std::vector<float>(value.data.begin(), value.data.end());
+        auto input_tensor = predictor->GetInputHandle(key);
+        input_tensor->Reshape(value.shape);
+        input_tensor->CopyFromCpu(input_value.data());
+        break;
+      }
+    }
+  }
+
+  Timer timer;  // init prediction timer
+  timer.start();
+  // inference
+  for (size_t i = 0; i < repeat_times; ++i) {
+    CHECK(predictor->Run());
+    auto output_names = predictor->GetOutputNames();
+    for (auto &output_name : output_names) {
+      auto output_tensor = predictor->GetOutputHandle(output_name);
+      std::vector<int> output_shape = output_tensor->shape();
+      int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
+                                    std::multiplies<int>());
+      switch (output_tensor->type()) {
+        case paddle::PaddleDType::INT64: {
+          std::vector<int64_t> out_data;
+          out_data.resize(out_num);
+          output_tensor->CopyToCpu(out_data.data());
+          break;
+        }
+        case paddle::PaddleDType::FLOAT32: {
+          std::vector<float> out_data;
+          out_data.resize(out_num);
+          output_tensor->CopyToCpu(out_data.data());
+          break;
+        }
+        case paddle::PaddleDType::INT32: {
+          std::vector<int32_t> out_data;
+          out_data.resize(out_num);
+          output_tensor->CopyToCpu(out_data.data());
+          break;
+        }
+      }
+    }
+  }
+  timer.stop();
+  return timer.report();
+}
+
 }  // namespace test
 }  // namespace paddle