remove trt_int8 (#708)

0ce6b7e1 · wangguanzhong · GitHub · aa217a9f · 0ce6b7e1 · 0ce6b7e1
4 changed file
--- a/deploy/cpp/include/config_parser.h
+++ b/deploy/cpp/include/config_parser.h
@@ -42,12 +42,12 @@ class ConfigPaser {
    YAML::Node config;
    config = YAML::LoadFile(model_dir + OS_PATH_SEP + cfg);
-    // Get runtime mode : fluid, trt_int8, trt_fp16, trt_fp32
+    // Get runtime mode : fluid, trt_fp16, trt_fp32
    if (config["mode"].IsDefined()) {
      mode_ = config["mode"].as<std::string>();
    } else {
      std::cerr << "Please set mode, "
-                << "support value : fluid/trt_int8/trt_fp16/trt_fp32."
+                << "support value : fluid/trt_fp16/trt_fp32."
                << std::endl;
      return false;
    }

--- a/deploy/cpp/src/object_detector.cc
+++ b/deploy/cpp/src/object_detector.cc
@@ -33,7 +33,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
      if (run_mode == "trt_fp16") {
        precision = paddle::AnalysisConfig::Precision::kHalf;
      } else if (run_mode == "trt_int8") {
-        precision = paddle::AnalysisConfig::Precision::kInt8;
+        printf("TensorRT int8 mode is not supported now, "
+               "please use 'trt_fp32' or 'trt_fp16' instead");
      } else {
        if (run_mode != "trt_32") {
          printf("run_mode should be 'fluid', 'trt_fp32' or 'trt_fp16'");
@@ -45,7 +46,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
          min_subgraph_size,
          precision,
          false,
-          run_mode == "trt_int8");
+          false);
    }
  } else {
    config.DisableGpu();

--- a/deploy/python/README.md
+++ b/deploy/python/README.md
@@ -45,7 +45,7 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/
 | --image_file | Yes |需要预测的图片 |
 | --video_file | Yes |需要预测的视频 |
 | --use_gpu |No|是否GPU，默认为False|
-| --run_mode |No|使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
+| --run_mode |No|使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16）|
 | --threshold |No|预测得分的阈值，默认为0.5|
 | --output_dir |No|可视化结果保存的根目录，默认为output/|

--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -318,8 +318,10 @@ def load_predictor(model_dir,
        raise ValueError(
            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
            .format(run_mode, use_gpu))
+    if run_mode == 'trt_int8':
+        raise ValueError("TensorRT int8 mode is not supported now, "
+                         "please use trt_fp32 or trt_fp16 instead.")
    precision_map = {
-        'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
        'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
        'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
    }
@@ -341,7 +343,7 @@ def load_predictor(model_dir,
            min_subgraph_size=min_subgraph_size,
            precision_mode=precision_map[run_mode],
            use_static=False,
-            use_calib_mode=run_mode == 'trt_int8')
+            use_calib_mode=False)
    # disable print log when predict
    config.disable_glog_info()
@@ -482,8 +484,6 @@ class Detector():
            t1 = time.time()
            self.predictor.zero_copy_run()
            t2 = time.time()
-            ms = (t2 - t1) * 1000.0
-            print("Inference: {} ms per batch image".format(ms))
            output_names = self.predictor.get_output_names()
            boxes_tensor = self.predictor.get_output_tensor(output_names[0])
@@ -491,6 +491,10 @@ class Detector():
            if self.config.mask_resolution is not None:
                masks_tensor = self.predictor.get_output_tensor(output_names[1])
                np_masks = masks_tensor.copy_to_cpu()
+            ms = (t2 - t1) * 1000.0
+            print("Inference: {} ms per batch image".format(ms))
        results = self.postprocess(
            np_boxes, np_masks, im_info, threshold=threshold)
        return results
@@ -556,7 +560,7 @@ if __name__ == '__main__':
        "--run_mode",
        type=str,
        default='fluid',
-        help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
+        help="mode of running(fluid/trt_fp32/trt_fp16)")
    parser.add_argument(
        "--use_gpu", default=False, help="Whether to predict with GPU.")
    parser.add_argument(