diff --git a/paddle/fluid/inference/tests/infer_ut/run.sh b/paddle/fluid/inference/tests/infer_ut/run.sh
index c1694c76a7d2c8a7db99868ab0aae4ec12a3cfe2..627cd56f4830c9142c8d0b36ac9577e7d8514391 100755
--- a/paddle/fluid/inference/tests/infer_ut/run.sh
+++ b/paddle/fluid/inference/tests/infer_ut/run.sh
@@ -95,6 +95,12 @@ for model_name in $det_download_list; do
     download $url_prefix $model_name
 done
 
+unknown_download_list='resnet50_quant'
+for model_name in $unknown_download_list; do
+    url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/unknown"
+    download $url_prefix $model_name
+done
+
 function compile_test() {
     mkdir -p ${build_dir}
     cd ${build_dir}
@@ -194,6 +200,18 @@ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
         EXIT_CODE=1
     fi
 
+    printf "${YELLOW} start test_resnet50_quant ${NC} \n";
+    compile_test "test_resnet50_quant"
+    ./test_resnet50_quant \
+        --int8dir=$DATA_DIR/resnet50_quant/resnet50_quant/resnet50_quant \
+        --modeldir=$DATA_DIR/resnet50/resnet50 \
+        --datadir=$DATA_DIR/resnet50_quant/resnet50_quant/imagenet-eval-binary/9.data \
+        --gtest_output=xml:test_resnet50_quant.xml
+    if [ $? -ne 0 ]; then
+        echo "test_resnet50_quant runs failed" >> ${current_dir}/build/test_summary.txt
+        EXIT_CODE=1
+    fi
+
     cp ./*.xml ${log_dir};
 fi
 
diff --git a/paddle/fluid/inference/tests/infer_ut/test_resnet50_quant.cc b/paddle/fluid/inference/tests/infer_ut/test_resnet50_quant.cc
new file mode 100644
index 0000000000000000000000000000000000000000..bf26f38c083fa22d96ffbe14a815469971211c43
--- /dev/null
+++ b/paddle/fluid/inference/tests/infer_ut/test_resnet50_quant.cc
@@ -0,0 +1,170 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test_suite.h"  // NOLINT
+
+DEFINE_string(modeldir, "", "Directory of the inference model.");
+DEFINE_string(int8dir, "", "Directory of the quant inference model.");
+DEFINE_string(datadir, "", "Directory of the infer data.");
+
+namespace paddle_infer {
+
+paddle::test::Record PrepareInput(int batch_size) {
+  // init input data
+  int channel = 3;
+  int width = 224;
+  int height = 224;
+  paddle::test::Record image_Record;
+  int input_num = batch_size * channel * width * height;
+
+  // load from binary data
+  std::ifstream fs(FLAGS_datadir, std::ifstream::binary);
+  EXPECT_TRUE(fs.is_open());
+  CHECK(fs.is_open());
+
+  float* input = new float[input_num];
+  memset(input, 0, input_num * sizeof(float));
+  auto input_data_tmp = input;
+  for (int i = 0; i < input_num; ++i) {
+    fs.read(reinterpret_cast<char*>(input_data_tmp), sizeof(*input_data_tmp));
+    input_data_tmp++;
+  }
+  int label = 0;
+  fs.read(reinterpret_cast<char*>(&label), sizeof(label));
+  fs.close();
+
+  std::vector<float> input_data{input, input + input_num};
+  image_Record.data = input_data;
+  image_Record.shape = std::vector<int>{batch_size, channel, width, height};
+  image_Record.type = paddle::PaddleDType::FLOAT32;
+  image_Record.label = label;
+  return image_Record;
+}
+
+TEST(test_resnet50_quant, multi_thread4_trt_int8_bz1) {
+  int thread_num = 4;
+  // init input data
+  std::map<std::string, paddle::test::Record> input_data_map;
+  input_data_map["image"] = PrepareInput(1);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data;
+  // prepare inference config
+  paddle_infer::Config config;
+  config.SetModel(FLAGS_int8dir);
+  config.EnableUseGpu(1000, 0);
+  config.EnableTensorRtEngine(1 << 20, 10, 3,
+                              paddle_infer::PrecisionType::kInt8, true, false);
+  // get infer results from multi threads
+  std::vector<std::thread> threads;
+  services::PredictorPool pred_pool(config, thread_num);
+  for (int i = 0; i < thread_num; ++i) {
+    threads.emplace_back(paddle::test::SingleThreadPrediction,
+                         pred_pool.Retrive(i), &input_data_map,
+                         &infer_output_data, 5);
+  }
+
+  // thread join & check outputs
+  for (int i = 0; i < thread_num; ++i) {
+    LOG(INFO) << "join tid : " << i;
+    threads[i].join();
+
+    // check outputs
+    std::vector<int> index(1000);
+    std::iota(index.begin(), index.end(), 0);
+    auto out_data =
+        infer_output_data["save_infer_model/scale_0.tmp_0"].data.data();
+    std::sort(index.begin(), index.end(), [out_data](size_t i1, size_t i2) {
+      return out_data[i1] > out_data[i2];
+    });
+    // compare inference & groundtruth label
+    ASSERT_EQ(index[0], input_data_map["image"].label);
+  }
+
+  std::cout << "finish test" << std::endl;
+}
+
+TEST(test_resnet50_quant, multi_thread_multi_instance) {
+  int thread_num = 4;
+  // init input data
+  std::map<std::string, paddle::test::Record> input_data_fp32, input_data_quant;
+  input_data_quant["image"] = PrepareInput(1);
+  input_data_fp32["inputs"] = PrepareInput(1);
+
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data;
+  // prepare inference config
+  paddle_infer::Config config_fp32, config_quant;
+  config_fp32.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                       FLAGS_modeldir + "/inference.pdiparams");
+  config_fp32.EnableUseGpu(1000, 0);
+  config_fp32.EnableTensorRtEngine(
+      1 << 20, 10, 3, paddle_infer::PrecisionType::kFloat32, true, false);
+
+  config_quant.SetModel(FLAGS_int8dir);
+  config_quant.EnableUseGpu(1000, 0);
+  config_quant.EnableTensorRtEngine(
+      1 << 20, 10, 3, paddle_infer::PrecisionType::kInt8, true, false);
+
+  // get infer results from multi threads
+  std::vector<std::thread> threads;
+  services::PredictorPool pred_pool_fp32(config_fp32, thread_num);
+  services::PredictorPool pred_pool_quant(config_quant, thread_num);
+  for (int i = 0; i < thread_num; ++i) {
+    if (i % 2 == 0) {
+      threads.emplace_back(paddle::test::SingleThreadPrediction,
+                           pred_pool_fp32.Retrive(i), &input_data_fp32,
+                           &infer_output_data, 5);
+    } else {
+      threads.emplace_back(paddle::test::SingleThreadPrediction,
+                           pred_pool_quant.Retrive(i), &input_data_quant,
+                           &infer_output_data, 5);
+    }
+  }
+
+  // thread join & check outputs
+  for (int i = 0; i < thread_num; ++i) {
+    LOG(INFO) << "join tid : " << i;
+    std::vector<int> index(1000);
+    threads[i].join();
+    if (i % 2 == 0) {
+      // check outputs
+      std::iota(index.begin(), index.end(), 0);
+      auto out_data =
+          infer_output_data["save_infer_model/scale_0.tmp_0"].data.data();
+      std::sort(index.begin(), index.end(), [out_data](size_t i1, size_t i2) {
+        return out_data[i1] > out_data[i2];
+      });
+      // compare inference & groundtruth label
+      ASSERT_EQ(index[0], input_data_fp32["inputs"].label);
+    } else {
+      // check outputs
+      std::iota(index.begin(), index.end(), 0);
+      auto out_data =
+          infer_output_data["save_infer_model/scale_0.tmp_0"].data.data();
+      std::sort(index.begin(), index.end(), [out_data](size_t i1, size_t i2) {
+        return out_data[i1] > out_data[i2];
+      });
+      // compare inference & groundtruth label
+      ASSERT_EQ(index[0], input_data_quant["image"].label);
+    }
+  }
+}
+
+}  // namespace paddle_infer
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  ::google::ParseCommandLineFlags(&argc, &argv, true);
+  return RUN_ALL_TESTS();
+}
diff --git a/paddle/fluid/inference/tests/infer_ut/test_suite.h b/paddle/fluid/inference/tests/infer_ut/test_suite.h
index 2f1034d4df9a65bc506c492ffba594888ccd5c9b..b2546b180b976a16df6b81bd0f909d9c1e7463a7 100644
--- a/paddle/fluid/inference/tests/infer_ut/test_suite.h
+++ b/paddle/fluid/inference/tests/infer_ut/test_suite.h
@@ -35,6 +35,7 @@ class Record {
   std::vector<float> data;
   std::vector<int32_t> shape;
   paddle::PaddleDType type;
+  int label;
 };
 
 std::string read_file(std::string filename) {