From df3c8cd722ace54005d0d7d299585831cfed430b Mon Sep 17 00:00:00 2001
From: channings <chen_lingchi@163.com>
Date: Mon, 11 May 2020 15:15:06 +0800
Subject: [PATCH] add runtime log& script support set run_mode (#630)

* add runtime log& script support set run_mode
* update code
---
 deploy/python/README.md |  7 +++++--
 deploy/python/infer.py  | 38 ++++++++++++++++++++++++++++++--------
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/deploy/python/README.md b/deploy/python/README.md
index 2206d454e..e44de8722 100644
--- a/deploy/python/README.md
+++ b/deploy/python/README.md
@@ -1,7 +1,7 @@
 ## PaddleDetection Python 预测部署方案
 本篇教程使用AnalysisPredictor对[导出模型](../../docs/advanced_tutorials/inference/EXPORT_MODEL.md)进行高性能预测。
 
-在PaddlePaddle中预测引擎和训练引擎底层有着不同的优化方法, 下面列出了两种不同的预测方式。Executor同时支持训练和预测，AnalysisPredictor则专门针对推理进行了优化，是基于[C++预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/native_infer.html)的Python接口，该引擎可以对模型进行多项图优化，减少不必要的内存拷贝。如果用户在部署已训练模型的过程中对性能有较高的要求，于是我们提供了独立于PaddleDetection的预测脚本，方便用户直接集成部署。
+在PaddlePaddle中预测引擎和训练引擎底层有着不同的优化方法, 下面列出了两种不同的预测方式。Executor同时支持训练和预测，AnalysisPredictor则专门针对推理进行了优化，是基于[C++预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/native_infer.html)的Python接口，该引擎可以对模型进行多项图优化，减少不必要的内存拷贝。如果用户在部署已训练模型的过程中对性能有较高的要求，我们提供了独立于PaddleDetection的预测脚本，方便用户直接集成部署。
 
 - Executor：[Executor](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/executor.html#executor)
 - AnalysisPredictor：[AnalysisPredictor](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/python_infer_cn.html#analysispredictor)
@@ -40,10 +40,13 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/
 | --image_file | Yes |需要预测的图片 |
 | --video_file | Yes |需要预测的视频 |
 | --use_gpu |No|是否GPU，默认为False|
+| --run_mode |No|使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --threshold |No|预测得分的阈值，默认为0.5|
-| --visualize |No|是否可视化结果，默认为False|
 | --output_dir |No|可视化结果保存的根目录，默认为output/|
 
+说明：
+
+run_mode：fluid代表使用AnalysisPredictor，精度float32来推理，其他参数指用AnalysisPredictor，TensorRT不同精度来推理。
 
 ## 3. 部署性能对比测试
 对比AnalysisPredictor相对Executor的推理速度
diff --git a/deploy/python/infer.py b/deploy/python/infer.py
index e2febfd2a..d0181a2d1 100644
--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -14,7 +14,9 @@
 
 import os
 import argparse
+import time
 import yaml
+
 from PIL import Image
 import cv2
 import numpy as np
@@ -279,7 +281,6 @@ class Config():
         self.arch = yml_conf['arch']
         self.preprocess_infos = yml_conf['Preprocess']
         self.use_python_inference = yml_conf['use_python_inference']
-        self.run_mode = yml_conf['mode']
         self.min_subgraph_size = yml_conf['min_subgraph_size']
         self.labels = yml_conf['label_list']
         if not yml_conf['with_background']:
@@ -337,7 +338,7 @@ def load_predictor(model_dir,
 
     if run_mode in precision_map.keys():
         config.enable_tensorrt_engine(
-            workspace_size=1 << 30,
+            workspace_size=1 << 10,
             max_batch_size=batch_size,
             min_subgraph_size=min_subgraph_size,
             precision_mode=precision_map[run_mode],
@@ -391,7 +392,11 @@ class Detector():
         use_gpu (bool): whether use gpu
     """
 
-    def __init__(self, model_dir, use_gpu=False, threshold=0.5):
+    def __init__(self,
+                 model_dir,
+                 use_gpu=False,
+                 run_mode='fluid',
+                 threshold=0.5):
         self.config = Config(model_dir)
         if self.config.use_python_inference:
             self.executor, self.program, self.fecth_targets = load_executor(
@@ -399,7 +404,7 @@ class Detector():
         else:
             self.predictor = load_predictor(
                 model_dir,
-                run_mode=self.config.run_mode,
+                run_mode=run_mode,
                 min_subgraph_size=self.config.min_subgraph_size,
                 use_gpu=use_gpu)
         self.preprocess_ops = []
@@ -459,19 +464,29 @@ class Detector():
         inputs, im_info = self.preprocess(image)
         np_boxes, np_masks = None, None
         if self.config.use_python_inference:
+            t1 = time.time()
             outs = self.executor.run(self.program,
                                      feed=inputs,
                                      fetch_list=self.fecth_targets,
                                      return_numpy=False)
+            t2 = time.time()
+            ms = (t2 - t1) * 1000.0
+            print("Inference: {} ms per batch image".format(ms))
+
             np_boxes = np.array(outs[0])
             if self.config.mask_resolution is not None:
-                np_masks = np.arrya(outs[1])
+                np_masks = np.array(outs[1])
         else:
             input_names = self.predictor.get_input_names()
             for i in range(len(inputs)):
                 input_tensor = self.predictor.get_input_tensor(input_names[i])
                 input_tensor.copy_from_cpu(inputs[input_names[i]])
+            t1 = time.time()
             self.predictor.zero_copy_run()
+            t2 = time.time()
+            ms = (t2 - t1) * 1000.0
+            print("Inference: {} ms per batch image".format(ms))
+
             output_names = self.predictor.get_output_names()
             boxes_tensor = self.predictor.get_output_tensor(output_names[0])
             np_boxes = boxes_tensor.copy_to_cpu()
@@ -484,7 +499,8 @@ class Detector():
 
 
 def predict_image():
-    detector = Detector(FLAGS.model_dir, use_gpu=FLAGS.use_gpu)
+    detector = Detector(
+        FLAGS.model_dir, use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode)
     results = detector.predict(FLAGS.image_file, FLAGS.threshold)
     visualize(
         FLAGS.image_file,
@@ -495,12 +511,13 @@ def predict_image():
 
 
 def predict_video():
-    detector = Detector(FLAGS.model_dir, use_gpu=FLAGS.use_gpu)
+    detector = Detector(
+        FLAGS.model_dir, use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode)
     capture = cv2.VideoCapture(FLAGS.video_file)
     fps = 30
     width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
     video_name = os.path.split(FLAGS.video_file)[-1]
     if not os.path.exists(FLAGS.output_dir):
         os.makedirs(FLAGES.output_dir)
@@ -537,6 +554,11 @@ if __name__ == '__main__':
         "--image_file", type=str, default='', help="Path of image file.")
     parser.add_argument(
         "--video_file", type=str, default='', help="Path of video file.")
+    parser.add_argument(
+        "--run_mode",
+        type=str,
+        default='fluid',
+        help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
     parser.add_argument(
         "--use_gpu", default=False, help="Whether to predict with GPU.")
     parser.add_argument(
-- 
GitLab