diff --git a/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt b/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt
index f546ef2b45e0a7d11c06afefe559c1c495b0b48d..16d1f211a860f94dda0c507feb40678dff5446e8 100644
--- a/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt
@@ -66,7 +66,7 @@ else()
   if(WITH_MKL)
     set(FLAG_OPENMP "-fopenmp")
   endif()
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 ${FLAG_OPENMP}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 ${FLAG_OPENMP}")
 endif()
 
 if(WITH_GPU)
diff --git a/paddle/fluid/inference/tests/infer_ut/run.sh b/paddle/fluid/inference/tests/infer_ut/run.sh
index ba38a4489035fb8179e695f245015fce61f83a5a..64ada23767f1fad487ad1b375cbe33b9940d103f 100755
--- a/paddle/fluid/inference/tests/infer_ut/run.sh
+++ b/paddle/fluid/inference/tests/infer_ut/run.sh
@@ -65,6 +65,18 @@ for model_name in $download_list; do
     download $url_prefix $model_name
 done
 
+ocr_download_list='ocr_det_mv3_db'
+for model_name in $ocr_download_list; do
+    url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/2.1.1/ocr"
+    download $url_prefix $model_name
+done
+
+clas_download_list='LeViT'
+for model_name in $clas_download_list; do
+    url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/2.1.1/class"
+    download $url_prefix $model_name
+done
+
 # compile and run test
 cd $current_dir
 mkdir -p build
@@ -92,6 +104,46 @@ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
     fi
 fi
 
+# ---------tensorrt det_mv3_db on linux---------
+if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
+    cmake .. -DPADDLE_LIB=${inference_install_dir} \
+        -DWITH_MKL=$TURN_ON_MKL \
+        -DDEMO_NAME=test_det_mv3_db \
+        -DWITH_GPU=$TEST_GPU_CPU \
+        -DWITH_STATIC_LIB=OFF \
+        -DUSE_TENSORRT=$USE_TENSORRT \
+        -DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
+        -DWITH_GTEST=ON
+    make -j$(nproc)
+    ./test_det_mv3_db \
+        --modeldir=$DATA_DIR/ocr_det_mv3_db/ocr_det_mv3_db \
+        --gtest_output=xml:test_det_mv3_db.xml
+    if [ $? -ne 0 ]; then
+        echo "test_det_mv3_db runs failed" >> ${current_dir}/build/test_summary.txt
+        EXIT_CODE=1
+    fi
+fi
+
+# ---------tensorrt LeViT on linux---------
+if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
+    cmake .. -DPADDLE_LIB=${inference_install_dir} \
+        -DWITH_MKL=$TURN_ON_MKL \
+        -DDEMO_NAME=test_LeViT \
+        -DWITH_GPU=$TEST_GPU_CPU \
+        -DWITH_STATIC_LIB=OFF \
+        -DUSE_TENSORRT=$USE_TENSORRT \
+        -DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
+        -DWITH_GTEST=ON
+    make -j$(nproc)
+    ./test_LeViT \
+        --modeldir=$DATA_DIR/LeViT/LeViT \
+        --gtest_output=xml:test_LeViT.xml
+    if [ $? -ne 0 ]; then
+        echo "test_LeViT runs failed" >> ${current_dir}/build/test_summary.txt
+        EXIT_CODE=1
+    fi
+fi
+
 if [[ -f ${current_dir}/build/test_summary.txt ]];then
   echo "=====================test summary======================"
   cat ${current_dir}/build/test_summary.txt
diff --git a/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc b/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f115d1f898c94aded303e71740e109507cdb64f5
--- /dev/null
+++ b/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc
@@ -0,0 +1,179 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test_suite.h"  // NOLINT
+
+DEFINE_string(modeldir, "", "Directory of the inference model.");
+
+namespace paddle_infer {
+
+paddle::test::Record PrepareInput(int batch_size) {
+  // init input data
+  int channel = 3;
+  int width = 224;
+  int height = 224;
+  paddle::test::Record image_Record;
+  int input_num = batch_size * channel * width * height;
+  std::vector<float> input_data(input_num, 1);
+  image_Record.data = input_data;
+  image_Record.shape = std::vector<int>{batch_size, channel, width, height};
+  image_Record.type = paddle::PaddleDType::FLOAT32;
+  return image_Record;
+}
+
+TEST(test_LeViT, analysis_gpu_bz1) {
+  // init input data
+  std::map<std::string, paddle::test::Record> my_input_data_map;
+  my_input_data_map["x"] = PrepareInput(1);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  // prepare groudtruth config
+  paddle_infer::Config config, config_no_ir;
+  config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                        FLAGS_modeldir + "/inference.pdiparams");
+  config_no_ir.SwitchIrOptim(false);
+  // prepare inference config
+  config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                  FLAGS_modeldir + "/inference.pdiparams");
+  // get groudtruth by disbale ir
+  paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
+  SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map,
+                         &truth_output_data, 1);
+  // get infer results
+  paddle_infer::services::PredictorPool pred_pool(config, 1);
+  SingleThreadPrediction(pred_pool.Retrive(0), &my_input_data_map,
+                         &infer_output_data);
+  // check outputs
+  CompareRecord(&truth_output_data, &infer_output_data);
+  std::cout << "finish test" << std::endl;
+}
+
+TEST(test_LeViT, trt_fp32_bz2) {
+  // init input data
+  std::map<std::string, paddle::test::Record> my_input_data_map;
+  my_input_data_map["x"] = PrepareInput(2);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  // prepare groudtruth config
+  paddle_infer::Config config, config_no_ir;
+  config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                        FLAGS_modeldir + "/inference.pdiparams");
+  config_no_ir.SwitchIrOptim(false);
+  // prepare inference config
+  config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                  FLAGS_modeldir + "/inference.pdiparams");
+  config.EnableUseGpu(100, 0);
+  config.EnableTensorRtEngine(
+      1 << 20, 2, 6, paddle_infer::PrecisionType::kFloat32, false, false);
+  // get groudtruth by disbale ir
+  paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
+  SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map,
+                         &truth_output_data, 1);
+  // get infer results
+  paddle_infer::services::PredictorPool pred_pool(config, 1);
+  SingleThreadPrediction(pred_pool.Retrive(0), &my_input_data_map,
+                         &infer_output_data);
+  // check outputs
+  CompareRecord(&truth_output_data, &infer_output_data);
+  std::cout << "finish test" << std::endl;
+}
+
+TEST(test_LeViT, serial_diff_batch_trt_fp32) {
+  int max_batch_size = 5;
+  // prepare groudtruth config
+  paddle_infer::Config config, config_no_ir;
+  config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                        FLAGS_modeldir + "/inference.pdiparams");
+  config_no_ir.SwitchIrOptim(false);
+  paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
+  // prepare inference config
+  config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                  FLAGS_modeldir + "/inference.pdiparams");
+  config.EnableUseGpu(100, 0);
+  config.EnableTensorRtEngine(1 << 20, max_batch_size, 6,
+                              paddle_infer::PrecisionType::kFloat32, false,
+                              false);
+  paddle_infer::services::PredictorPool pred_pool(config, 1);
+
+  for (int i = 1; i < max_batch_size; i++) {
+    // init input data
+    std::map<std::string, paddle::test::Record> my_input_data_map;
+    my_input_data_map["x"] = PrepareInput(i);
+    // init output data
+    std::map<std::string, paddle::test::Record> infer_output_data,
+        truth_output_data;
+    // get groudtruth by disbale ir
+    SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map,
+                           &truth_output_data, 1);
+    // get infer results
+    SingleThreadPrediction(pred_pool.Retrive(0), &my_input_data_map,
+                           &infer_output_data);
+    // check outputs
+    CompareRecord(&truth_output_data, &infer_output_data);
+  }
+  std::cout << "finish test" << std::endl;
+}
+
+TEST(test_LeViT, multi_thread4_trt_fp32_bz2) {
+  int thread_num = 4;
+  // init input data
+  std::map<std::string, paddle::test::Record> my_input_data_map;
+  my_input_data_map["x"] = PrepareInput(2);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  // prepare groudtruth config
+  paddle_infer::Config config, config_no_ir;
+  config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                        FLAGS_modeldir + "/inference.pdiparams");
+  config_no_ir.SwitchIrOptim(false);
+  // prepare inference config
+  config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                  FLAGS_modeldir + "/inference.pdiparams");
+  config.EnableUseGpu(100, 0);
+  config.EnableTensorRtEngine(
+      1 << 20, 2, 6, paddle_infer::PrecisionType::kFloat32, false, false);
+  // get groudtruth by disbale ir
+  paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
+  SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map,
+                         &truth_output_data, 1);
+
+  // get infer results from multi threads
+  std::vector<std::thread> threads;
+  services::PredictorPool pred_pool(config, thread_num);
+  for (int i = 0; i < thread_num; ++i) {
+    threads.emplace_back(paddle::test::SingleThreadPrediction,
+                         pred_pool.Retrive(i), &my_input_data_map,
+                         &infer_output_data, 2);
+  }
+
+  // thread join & check outputs
+  for (int i = 0; i < thread_num; ++i) {
+    LOG(INFO) << "join tid : " << i;
+    threads[i].join();
+    CompareRecord(&truth_output_data, &infer_output_data);
+  }
+
+  std::cout << "finish multi-thread test" << std::endl;
+}
+
+}  // namespace paddle_infer
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  ::google::ParseCommandLineFlags(&argc, &argv, true);
+  return RUN_ALL_TESTS();
+}
diff --git a/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc b/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c5920d3b2d8d557f86e0537df8aa5d2a060f0792
--- /dev/null
+++ b/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc
@@ -0,0 +1,158 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test_suite.h"  // NOLINT
+
+DEFINE_string(modeldir, "", "Directory of the inference model.");
+
+namespace paddle_infer {
+
+paddle::test::Record PrepareInput(int batch_size, int image_shape = 640) {
+  // init input data
+  int channel = 3;
+  int width = image_shape;
+  int height = image_shape;
+  paddle::test::Record image_Record;
+  int input_num = batch_size * channel * width * height;
+  std::vector<float> input_data(input_num, 1);
+  image_Record.data = input_data;
+  image_Record.shape = std::vector<int>{batch_size, channel, width, height};
+  image_Record.type = paddle::PaddleDType::FLOAT32;
+  return image_Record;
+}
+
+void PrepareDynamicShape(paddle_infer::Config* config, int max_batch_size = 4) {
+  // set dynamic shape range
+  std::map<std::string, std::vector<int>> min_input_shape = {
+      {"x", {1, 3, 50, 50}},
+      {"conv2d_92.tmp_0", {1, 120, 20, 20}},
+      {"conv2d_91.tmp_0", {1, 24, 10, 10}},
+      {"conv2d_59.tmp_0", {1, 96, 20, 20}},
+      {"nearest_interp_v2_1.tmp_0", {1, 256, 10, 10}},
+      {"nearest_interp_v2_2.tmp_0", {1, 256, 20, 20}},
+      {"conv2d_124.tmp_0", {1, 256, 20, 20}},
+      {"nearest_interp_v2_3.tmp_0", {1, 64, 20, 20}},
+      {"nearest_interp_v2_4.tmp_0", {1, 64, 20, 20}},
+      {"nearest_interp_v2_5.tmp_0", {1, 64, 20, 20}},
+      {"elementwise_add_7", {1, 56, 2, 2}},
+      {"nearest_interp_v2_0.tmp_0", {1, 256, 2, 2}}};
+  std::map<std::string, std::vector<int>> max_input_shape = {
+      {"x", {max_batch_size, 3, 2000, 2000}},
+      {"conv2d_92.tmp_0", {max_batch_size, 120, 400, 400}},
+      {"conv2d_91.tmp_0", {max_batch_size, 24, 200, 200}},
+      {"conv2d_59.tmp_0", {max_batch_size, 96, 400, 400}},
+      {"nearest_interp_v2_1.tmp_0", {max_batch_size, 256, 200, 200}},
+      {"nearest_interp_v2_2.tmp_0", {max_batch_size, 256, 400, 400}},
+      {"conv2d_124.tmp_0", {max_batch_size, 256, 400, 400}},
+      {"nearest_interp_v2_3.tmp_0", {max_batch_size, 64, 400, 400}},
+      {"nearest_interp_v2_4.tmp_0", {max_batch_size, 64, 400, 400}},
+      {"nearest_interp_v2_5.tmp_0", {max_batch_size, 64, 400, 400}},
+      {"elementwise_add_7", {max_batch_size, 56, 400, 400}},
+      {"nearest_interp_v2_0.tmp_0", {max_batch_size, 256, 400, 400}}};
+  std::map<std::string, std::vector<int>> opt_input_shape = {
+      {"x", {1, 3, 640, 640}},
+      {"conv2d_92.tmp_0", {1, 120, 160, 160}},
+      {"conv2d_91.tmp_0", {1, 24, 80, 80}},
+      {"conv2d_59.tmp_0", {1, 96, 160, 160}},
+      {"nearest_interp_v2_1.tmp_0", {1, 256, 80, 80}},
+      {"nearest_interp_v2_2.tmp_0", {1, 256, 160, 160}},
+      {"conv2d_124.tmp_0", {1, 256, 160, 160}},
+      {"nearest_interp_v2_3.tmp_0", {1, 64, 160, 160}},
+      {"nearest_interp_v2_4.tmp_0", {1, 64, 160, 160}},
+      {"nearest_interp_v2_5.tmp_0", {1, 64, 160, 160}},
+      {"elementwise_add_7", {1, 56, 40, 40}},
+      {"nearest_interp_v2_0.tmp_0", {1, 256, 40, 40}}};
+  config->SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
+                                 opt_input_shape);
+}
+
+TEST(test_det_mv3_db, analysis_gpu_bz4) {
+  // init input data
+  std::map<std::string, paddle::test::Record> my_input_data_map;
+  my_input_data_map["x"] = PrepareInput(4, 640);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  // prepare groudtruth config
+  paddle_infer::Config config, config_no_ir;
+  config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                        FLAGS_modeldir + "/inference.pdiparams");
+  config_no_ir.SwitchIrOptim(false);
+  // prepare inference config
+  config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                  FLAGS_modeldir + "/inference.pdiparams");
+  // get groudtruth by disbale ir
+  paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
+  SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map,
+                         &truth_output_data, 1);
+  // get infer results
+  paddle_infer::services::PredictorPool pred_pool(config, 1);
+  SingleThreadPrediction(pred_pool.Retrive(0), &my_input_data_map,
+                         &infer_output_data);
+  // check outputs
+  CompareRecord(&truth_output_data, &infer_output_data, 1e-4);
+  std::cout << "finish test" << std::endl;
+}
+
+TEST(test_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) {
+  int thread_num = 2;  // thread > 2 may OOM
+  // init input data
+  std::map<std::string, paddle::test::Record> my_input_data_map;
+  my_input_data_map["x"] = PrepareInput(2, 640);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  // prepare groudtruth config
+  paddle_infer::Config config, config_no_ir;
+  config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                        FLAGS_modeldir + "/inference.pdiparams");
+  config_no_ir.SwitchIrOptim(false);
+  // prepare inference config
+  config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                  FLAGS_modeldir + "/inference.pdiparams");
+  config.EnableUseGpu(100, 0);
+  config.EnableTensorRtEngine(
+      1 << 20, 2, 3, paddle_infer::PrecisionType::kFloat32, false, false);
+  PrepareDynamicShape(&config, 4);
+  // get groudtruth by disbale ir
+  paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
+  SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map,
+                         &truth_output_data, 1);
+
+  // get infer results from multi threads
+  std::vector<std::thread> threads;
+  services::PredictorPool pred_pool(config, thread_num);
+  for (int i = 0; i < thread_num; ++i) {
+    threads.emplace_back(paddle::test::SingleThreadPrediction,
+                         pred_pool.Retrive(i), &my_input_data_map,
+                         &infer_output_data, 2);
+  }
+
+  // thread join & check outputs
+  for (int i = 0; i < thread_num; ++i) {
+    LOG(INFO) << "join tid : " << i;
+    threads[i].join();
+    CompareRecord(&truth_output_data, &infer_output_data, 1e-4);
+  }
+
+  std::cout << "finish multi-thread test" << std::endl;
+}
+
+}  // namespace paddle_infer
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  ::google::ParseCommandLineFlags(&argc, &argv, true);
+  return RUN_ALL_TESTS();
+}
diff --git a/paddle/fluid/inference/tests/infer_ut/test_suite.h b/paddle/fluid/inference/tests/infer_ut/test_suite.h
index 0e116b01847bfb9c89d52ab49c2b2a7334de9a93..b0da828998ca247a2990d69d09dbafeb8671ad95 100644
--- a/paddle/fluid/inference/tests/infer_ut/test_suite.h
+++ b/paddle/fluid/inference/tests/infer_ut/test_suite.h
@@ -51,7 +51,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor,
 
   // inference
   for (size_t i = 0; i < repeat_times; ++i) {
-    predictor->Run();
+    CHECK(predictor->Run());
   }
 
   // get output data to Record