diff --git a/deploy/cpp/include/config_parser.h b/deploy/cpp/include/config_parser.h
index 5fdaad408b8398b3a3186c9b480759d0e7b3136e..f2102dcc9f2902d319790ebae705a6d3fa3a4993 100644
--- a/deploy/cpp/include/config_parser.h
+++ b/deploy/cpp/include/config_parser.h
@@ -42,12 +42,12 @@ class ConfigPaser {
     YAML::Node config;
     config = YAML::LoadFile(model_dir + OS_PATH_SEP + cfg);
 
-    // Get runtime mode : fluid, trt_int8, trt_fp16, trt_fp32
+    // Get runtime mode : fluid, trt_fp16, trt_fp32
     if (config["mode"].IsDefined()) {
       mode_ = config["mode"].as<std::string>();
     } else {
       std::cerr << "Please set mode, "
-                << "support value : fluid/trt_int8/trt_fp16/trt_fp32."
+                << "support value : fluid/trt_fp16/trt_fp32."
                 << std::endl;
       return false;
     }
diff --git a/deploy/cpp/src/object_detector.cc b/deploy/cpp/src/object_detector.cc
index 65d5d4e1f1f68255a66a55c0be73922ba00c9d06..31f2d6d2dc91c1095abc9f8e1edd00f961f8fe1a 100644
--- a/deploy/cpp/src/object_detector.cc
+++ b/deploy/cpp/src/object_detector.cc
@@ -33,7 +33,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
       if (run_mode == "trt_fp16") {
         precision = paddle::AnalysisConfig::Precision::kHalf;
       } else if (run_mode == "trt_int8") {
-        precision = paddle::AnalysisConfig::Precision::kInt8;
+        printf("TensorRT int8 mode is not supported now, "
+               "please use 'trt_fp32' or 'trt_fp16' instead");
       } else {
         if (run_mode != "trt_32") {
           printf("run_mode should be 'fluid', 'trt_fp32' or 'trt_fp16'");
@@ -45,7 +46,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
           min_subgraph_size,
           precision,
           false,
-          run_mode == "trt_int8");
+          false);
     }
   } else {
     config.DisableGpu();
diff --git a/deploy/python/README.md b/deploy/python/README.md
index a69b32fe50ed63375c021a738a433260efdb9a05..105f6285228a04afac4369f33a1fa25d27350bf9 100644
--- a/deploy/python/README.md
+++ b/deploy/python/README.md
@@ -45,7 +45,7 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/
 | --image_file | Yes |需要预测的图片 |
 | --video_file | Yes |需要预测的视频 |
 | --use_gpu |No|是否GPU，默认为False|
-| --run_mode |No|使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
+| --run_mode |No|使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16）|
 | --threshold |No|预测得分的阈值，默认为0.5|
 | --output_dir |No|可视化结果保存的根目录，默认为output/|
 
diff --git a/deploy/python/infer.py b/deploy/python/infer.py
index 8e3bd82b4c372f4340a1206e2cee4a7bff9ae261..77d10bf4e41a5fc0c4fc7599e9913a7ede8ffbee 100644
--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -318,8 +318,10 @@ def load_predictor(model_dir,
         raise ValueError(
             "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
             .format(run_mode, use_gpu))
+    if run_mode == 'trt_int8':
+        raise ValueError("TensorRT int8 mode is not supported now, "
+                         "please use trt_fp32 or trt_fp16 instead.")
     precision_map = {
-        'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
         'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
         'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
     }
@@ -341,7 +343,7 @@ def load_predictor(model_dir,
             min_subgraph_size=min_subgraph_size,
             precision_mode=precision_map[run_mode],
             use_static=False,
-            use_calib_mode=run_mode == 'trt_int8')
+            use_calib_mode=False)
 
     # disable print log when predict
     config.disable_glog_info()
@@ -482,8 +484,6 @@ class Detector():
             t1 = time.time()
             self.predictor.zero_copy_run()
             t2 = time.time()
-            ms = (t2 - t1) * 1000.0
-            print("Inference: {} ms per batch image".format(ms))
 
             output_names = self.predictor.get_output_names()
             boxes_tensor = self.predictor.get_output_tensor(output_names[0])
@@ -491,6 +491,10 @@ class Detector():
             if self.config.mask_resolution is not None:
                 masks_tensor = self.predictor.get_output_tensor(output_names[1])
                 np_masks = masks_tensor.copy_to_cpu()
+
+            ms = (t2 - t1) * 1000.0
+            print("Inference: {} ms per batch image".format(ms))
+
         results = self.postprocess(
             np_boxes, np_masks, im_info, threshold=threshold)
         return results
@@ -556,7 +560,7 @@ if __name__ == '__main__':
         "--run_mode",
         type=str,
         default='fluid',
-        help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
+        help="mode of running(fluid/trt_fp32/trt_fp16)")
     parser.add_argument(
         "--use_gpu", default=False, help="Whether to predict with GPU.")
     parser.add_argument(