supplement trt_int8 function (#2619)

133375eb · Guanghua Yu · GitHub · a718694c · 133375eb · 133375eb
14 changed file
--- a/deploy/cpp/docs/Jetson_build.md
+++ b/deploy/cpp/docs/Jetson_build.md
@@ -158,7 +158,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1（表示不使用摄像头预测）|
 | --use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
 | --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
-| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16）|
+| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --run_benchmark | 是否重复预测来进行benchmark测速 ｜
 | --output_dir | 输出图片所在的文件夹, 默认为output ｜


--- a/deploy/cpp/docs/linux_build.md
+++ b/deploy/cpp/docs/linux_build.md
@@ -102,7 +102,7 @@ make
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1（表示不使用摄像头预测）|
 | --use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
 | --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
-| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16）|
+| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --run_benchmark | 是否重复预测来进行benchmark测速 ｜
 | --output_dir | 输出图片所在的文件夹, 默认为output ｜


--- a/deploy/cpp/docs/windows_vs2019_build.md
+++ b/deploy/cpp/docs/windows_vs2019_build.md
@@ -97,7 +97,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1（表示不使用摄像头预测）|
 | --use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
 | --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
-| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16）|
+| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --run_benchmark | 是否重复预测来进行benchmark测速 |
 | --output_dir | 输出图片所在的文件夹, 默认为output |


--- a/deploy/cpp/src/main.cc
+++ b/deploy/cpp/src/main.cc
@@ -37,7 +37,7 @@ DEFINE_string(image_path, "", "Path of input image");
 DEFINE_string(video_path, "", "Path of input video");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_camera, false, "Use camera or not");
-DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16)");
+DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)");
 DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
 DEFINE_int32(camera_id, -1, "Device id of camera to predict");
 DEFINE_bool(run_benchmark, false, "Whether to predict a image_file repeatedly for benchmark");

--- a/deploy/python/README.md
+++ b/deploy/python/README.md
@@ -43,7 +43,7 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/
 | --video_file | Option |需要预测的视频 |
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1(表示不使用摄像头预测，可设置为：0 - (摄像头数目-1) )，预测过程中在可视化界面按`q`退出输出预测结果到：output/output.mp4|
 | --use_gpu |No|是否GPU，默认为False|
-| --run_mode |No|使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16）|
+| --run_mode |No|使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --threshold |No|预测得分的阈值，默认为0.5|
 | --output_dir |No|可视化结果保存的根目录，默认为output/|
 | --run_benchmark |No|是否运行benchmark，同时需指定--image_file|

--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -321,7 +321,7 @@ def load_predictor(model_dir,
    Args:
        model_dir (str): root path of __model__ and __params__
        use_gpu (bool): whether use gpu
-        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
+        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
        use_dynamic_shape (bool): use dynamic shape or not
        trt_min_shape (int): min shape for dynamic shape in trt
        trt_max_shape (int): max shape for dynamic shape in trt
@@ -335,11 +335,6 @@ def load_predictor(model_dir,
        raise ValueError(
            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
            .format(run_mode, use_gpu))
-    if run_mode == 'trt_int8' and not os.path.exists(
-            os.path.join(model_dir, '_opt_cache')):
-        raise ValueError(
-            "TensorRT int8 must calibration first, and model_dir must has _opt_cache dir"
-        )
    use_calib_mode = True if run_mode == 'trt_int8' else False
    config = Config(
        os.path.join(model_dir, 'model.pdmodel'),
@@ -512,7 +507,7 @@ if __name__ == '__main__':
        "--run_mode",
        type=str,
        default='fluid',
-        help="mode of running(fluid/trt_fp32/trt_fp16)")
+        help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
    parser.add_argument(
        "--use_gpu",
        type=ast.literal_eval,

--- a/deploy/python/trt_int8_calib.py
+++ b/deploy/python/trt_int8_calib.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import argparse
-import time
-import yaml
-import ast
-from functools import reduce
-
-from PIL import Image
-import cv2
-import numpy as np
-import glob
-import paddle
-from preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride
-from visualize import visualize_box_mask
-from paddle.inference import Config
-from paddle.inference import create_predictor
-
-# Global dictionary
-SUPPORT_MODELS = {
-    'YOLO',
-    'RCNN',
-    'SSD',
-    'FCOS',
-    'SOLOv2',
-    'TTFNet',
-}
-
-
-class Detector(object):
-    """
-    Args:
-        config (object): config of model, defined by `Config(model_dir)`
-        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        use_gpu (bool): whether use gpu
-    """
-
-    def __init__(self, pred_config, model_dir, use_gpu=False):
-        self.pred_config = pred_config
-        self.predictor = load_predictor(
-            model_dir,
-            min_subgraph_size=self.pred_config.min_subgraph_size,
-            use_gpu=use_gpu)
-
-    def preprocess(self, im):
-        preprocess_ops = []
-        for op_info in self.pred_config.preprocess_infos:
-            new_op_info = op_info.copy()
-            op_type = new_op_info.pop('type')
-            preprocess_ops.append(eval(op_type)(**new_op_info))
-        im, im_info = preprocess(im, preprocess_ops,
-                                 self.pred_config.input_shape)
-        inputs = create_inputs(im, im_info)
-        return inputs
-
-    def postprocess(self, np_boxes, np_masks, inputs, threshold=0.5):
-        # postprocess output of predictor
-        results = {}
-        if self.pred_config.arch in ['Face']:
-            h, w = inputs['im_shape']
-            scale_y, scale_x = inputs['scale_factor']
-            w, h = float(h) / scale_y, float(w) / scale_x
-            np_boxes[:, 2] *= h
-            np_boxes[:, 3] *= w
-            np_boxes[:, 4] *= h
-            np_boxes[:, 5] *= w
-        results['boxes'] = np_boxes
-        if np_masks is not None:
-            results['masks'] = np_masks
-        return results
-
-    def predict(self,
-                image,
-                threshold=0.5,
-                warmup=0,
-                repeats=1,
-                run_benchmark=False):
-        '''
-        Args:
-            image (str/np.ndarray): path of image/ np.ndarray read by cv2
-            threshold (float): threshold of predicted box' score
-        Returns:
-            results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
-                            matix element:[class, score, x_min, y_min, x_max, y_max]
-                            MaskRCNN's results include 'masks': np.ndarray:
-                            shape: [N, im_h, im_w]
-        '''
-        inputs = self.preprocess(image)
-        np_boxes, np_masks = None, None
-        input_names = self.predictor.get_input_names()
-        for i in range(len(input_names)):
-            input_tensor = self.predictor.get_input_handle(input_names[i])
-            input_tensor.copy_from_cpu(inputs[input_names[i]])
-
-        for i in range(warmup):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            boxes_tensor = self.predictor.get_output_handle(output_names[0])
-            np_boxes = boxes_tensor.copy_to_cpu()
-            if self.pred_config.mask:
-                masks_tensor = self.predictor.get_output_handle(output_names[2])
-                np_masks = masks_tensor.copy_to_cpu()
-
-        t1 = time.time()
-        for i in range(repeats):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            boxes_tensor = self.predictor.get_output_handle(output_names[0])
-            np_boxes = boxes_tensor.copy_to_cpu()
-            if self.pred_config.mask:
-                masks_tensor = self.predictor.get_output_handle(output_names[2])
-                np_masks = masks_tensor.copy_to_cpu()
-        t2 = time.time()
-        ms = (t2 - t1) * 1000.0 / repeats
-        print("Inference: {} ms per batch image".format(ms))
-
-        # do not perform postprocess in benchmark mode
-        results = []
-        if not run_benchmark:
-            if reduce(lambda x, y: x * y, np_boxes.shape) < 6:
-                print('[WARNNING] No object detected.')
-                results = {'boxes': np.array([])}
-            else:
-                results = self.postprocess(
-                    np_boxes, np_masks, inputs, threshold=threshold)
-
-        return results
-
-
-def create_inputs(im, im_info):
-    """generate input for different model type
-    Args:
-        im (np.ndarray): image (np.ndarray)
-        im_info (dict): info of image
-        model_arch (str): model type
-    Returns:
-        inputs (dict): input of model
-    """
-    inputs = {}
-    inputs['image'] = np.array((im, )).astype('float32')
-    inputs['im_shape'] = np.array((im_info['im_shape'], )).astype('float32')
-    inputs['scale_factor'] = np.array(
-        (im_info['scale_factor'], )).astype('float32')
-
-    return inputs
-
-
-class PredictConfig():
-    """set config of preprocess, postprocess and visualize
-    Args:
-        model_dir (str): root path of model.yml
-    """
-
-    def __init__(self, model_dir):
-        # parsing Yaml config for Preprocess
-        deploy_file = os.path.join(model_dir, 'infer_cfg.yml')
-        with open(deploy_file) as f:
-            yml_conf = yaml.safe_load(f)
-        self.check_model(yml_conf)
-        self.arch = yml_conf['arch']
-        self.preprocess_infos = yml_conf['Preprocess']
-        self.min_subgraph_size = yml_conf['min_subgraph_size']
-        self.labels = yml_conf['label_list']
-        self.mask = False
-        if 'mask' in yml_conf:
-            self.mask = yml_conf['mask']
-        self.input_shape = yml_conf['image_shape']
-        self.print_config()
-
-    def check_model(self, yml_conf):
-        """
-        Raises:
-            ValueError: loaded model not in supported model type 
-        """
-        for support_model in SUPPORT_MODELS:
-            if support_model in yml_conf['arch']:
-                return True
-        raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf[
-            'arch'], SUPPORT_MODELS))
-
-    def print_config(self):
-        print('-----------  Model Configuration -----------')
-        print('%s: %s' % ('Model Arch', self.arch))
-        print('%s: ' % ('Transform Order'))
-        for op_info in self.preprocess_infos:
-            print('--%s: %s' % ('transform op', op_info['type']))
-        print('--------------------------------------------')
-
-
-def load_predictor(model_dir, batch_size=1, use_gpu=False, min_subgraph_size=3):
-    """set AnalysisConfig, generate AnalysisPredictor
-    Args:
-        model_dir (str): root path of __model__ and __params__
-        use_gpu (bool): whether use gpu
-    Returns:
-        predictor (PaddlePredictor): AnalysisPredictor
-    Raises:
-        ValueError: predict by TensorRT need use_gpu == True.
-    """
-    run_mode = 'trt_int8'
-    if not use_gpu and not run_mode == 'fluid':
-        raise ValueError(
-            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
-            .format(run_mode, use_gpu))
-    config = Config(
-        os.path.join(model_dir, 'model.pdmodel'),
-        os.path.join(model_dir, 'model.pdiparams'))
-    precision_map = {
-        'trt_int8': Config.Precision.Int8,
-        'trt_fp32': Config.Precision.Float32,
-        'trt_fp16': Config.Precision.Half
-    }
-    if use_gpu:
-        # initial GPU memory(M), device ID
-        config.enable_use_gpu(200, 0)
-        # optimize graph and fuse op
-        config.switch_ir_optim(True)
-    else:
-        config.disable_gpu()
-
-    if run_mode in precision_map.keys():
-        config.enable_tensorrt_engine(
-            workspace_size=1 << 10,
-            max_batch_size=batch_size,
-            min_subgraph_size=min_subgraph_size,
-            precision_mode=precision_map[run_mode],
-            use_static=False,
-            use_calib_mode=True)
-
-    # disable print log when predict
-    config.disable_glog_info()
-    # enable shared memory
-    config.enable_memory_optim()
-    # disable feed, fetch OP, needed by zero_copy_run
-    config.switch_use_feed_fetch_ops(False)
-    predictor = create_predictor(config)
-    return predictor
-
-
-def print_arguments(args):
-    print('-----------  Running Arguments -----------')
-    for arg, value in sorted(vars(args).items()):
-        print('%s: %s' % (arg, value))
-    print('------------------------------------------')
-
-
-def predict_image_dir(detector):
-    for image_file in glob.glob(FLAGS.image_dir + '/*.jpg'):
-        print('image_file is', image_file)
-        results = detector.predict(image_file, threshold=0.5)
-
-
-def main():
-    pred_config = PredictConfig(FLAGS.model_dir)
-    detector = Detector(pred_config, FLAGS.model_dir, use_gpu=FLAGS.use_gpu)
-    # predict from image
-    if FLAGS.image_dir != '':
-        predict_image_dir(detector)
-
-
-if __name__ == '__main__':
-    paddle.enable_static()
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument(
-        "--model_dir",
-        type=str,
-        default=None,
-        help=("Directory include:'model.pdiparams', 'model.pdmodel', "
-              "'infer_cfg.yml', created by tools/export_model.py."),
-        required=True)
-    parser.add_argument(
-        "--image_dir", type=str, default='', help="Directory of image file.")
-    parser.add_argument(
-        "--use_gpu",
-        type=ast.literal_eval,
-        default=False,
-        help="Whether to predict with GPU.")
-    print('err?')
-    parser.add_argument(
-        "--output_dir",
-        type=str,
-        default="output",
-        help="Directory of output visualization files.")
-    FLAGS = parser.parse_args()
-    print_arguments(FLAGS)
-
-    main()
--- a/static/deploy/cpp/docs/Jetson_build.md
+++ b/static/deploy/cpp/docs/Jetson_build.md
@@ -155,7 +155,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1（表示不使用摄像头预测）|
 | --use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
 | --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
-| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16）|
+| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --run_benchmark | 是否重复预测来进行benchmark测速 ｜
 | --output_dir | 输出图片所在的文件夹, 默认为output ｜


--- a/static/deploy/cpp/docs/linux_build.md
+++ b/static/deploy/cpp/docs/linux_build.md
@@ -102,7 +102,7 @@ make
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1（表示不使用摄像头预测）|
 | --use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
 | --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
-| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16）|
+| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --run_benchmark | 是否重复预测来进行benchmark测速 ｜
 | --output_dir | 输出图片所在的文件夹, 默认为output ｜


--- a/static/deploy/cpp/docs/windows_vs2019_build.md
+++ b/static/deploy/cpp/docs/windows_vs2019_build.md
@@ -97,7 +97,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1（表示不使用摄像头预测）|
 | --use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
 | --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
-| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16）|
+| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --run_benchmark | 是否重复预测来进行benchmark测速 |
 | --output_dir | 输出图片所在的文件夹, 默认为output |


--- a/static/deploy/cpp/src/main.cc
+++ b/static/deploy/cpp/src/main.cc
@@ -199,8 +199,8 @@ int main(int argc, char** argv) {
    return -1;
  }
  if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32"
-      || FLAGS_run_mode == "trt_fp16")) {
-    std::cout << "run_mode should be 'fluid', 'trt_fp32' or 'trt_fp16'.";
+      || FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) {
+    std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
    return -1;
  }


--- a/static/deploy/cpp/src/object_detector.cc
+++ b/static/deploy/cpp/src/object_detector.cc
@@ -32,17 +32,16 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
  config.SetModel(prog_file, params_file);
  if (use_gpu) {
    config.EnableUseGpu(100, gpu_id);
+    config.SwitchIrOptim(true);
    if (run_mode != "fluid") {
      auto precision = paddle::AnalysisConfig::Precision::kFloat32;
      if (run_mode == "trt_fp16") {
        precision = paddle::AnalysisConfig::Precision::kHalf;
      } else if (run_mode == "trt_int8") {
-        printf("TensorRT int8 mode is not supported now, "
-               "please use 'trt_fp32' or 'trt_fp16' instead");
+        precision = paddle::AnalysisConfig::Precision::kInt8;
+        use_calib_mode = true;
      } else {
-        if (run_mode != "trt_fp32") {
-          printf("run_mode should be 'fluid', 'trt_fp32' or 'trt_fp16'");
-        }
+        printf("run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'");
      }
      config.EnableTensorRtEngine(
          1 << 10,
@@ -50,7 +49,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
          min_subgraph_size,
          precision,
          false,
-          false);
+          use_calib_mode);
   }
  } else {
    config.DisableGpu();

--- a/static/deploy/python/README.md
+++ b/static/deploy/python/README.md
@@ -46,7 +46,7 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/
 | --video_file | Option |需要预测的视频 |
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1(表示不使用摄像头预测，可设置为：0 - (摄像头数目-1) )，预测过程中在可视化界面按`q`退出输出预测结果到：output/output.mp4|
 | --use_gpu |No|是否GPU，默认为False|
-| --run_mode |No|使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16）|
+| --run_mode |No|使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --threshold |No|预测得分的阈值，默认为0.5|
 | --output_dir |No|可视化结果保存的根目录，默认为output/|
 | --run_benchmark |No|是否运行benchmark，同时需指定--image_file|

--- a/static/deploy/python/infer.py
+++ b/static/deploy/python/infer.py
@@ -393,9 +393,7 @@ def load_predictor(model_dir,
        raise ValueError(
            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
            .format(run_mode, use_gpu))
-    if run_mode == 'trt_int8':
-        raise ValueError("TensorRT int8 mode is not supported now, "
-                         "please use trt_fp32 or trt_fp16 instead.")
+    use_calib_mode = True if run_mode == 'trt_int8' else False
    precision_map = {
        'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
        'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
@@ -419,7 +417,7 @@ def load_predictor(model_dir,
            min_subgraph_size=min_subgraph_size,
            precision_mode=precision_map[run_mode],
            use_static=False,
-            use_calib_mode=False)
+            use_calib_mode=use_calib_mode)

    # disable print log when predict
    config.disable_glog_info()
@@ -574,7 +572,7 @@ if __name__ == '__main__':
        "--run_mode",
        type=str,
        default='fluid',
-        help="mode of running(fluid/trt_fp32/trt_fp16)")
+        help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
    parser.add_argument(
        "--use_gpu",
        type=ast.literal_eval,