fix detection paddle trt infer (#1390)

8c46869c · Guanghua Yu · GitHub · 0e487626 · 8c46869c · 8c46869c
2 changed file
--- a/example/auto_compression/detection/paddle_trt_infer.py
+++ b/example/auto_compression/detection/paddle_trt_infer.py
@@ -80,7 +80,7 @@ def image_preprocess(img_path, target_shape):
    img -= img_mean
    img /= img_std
    scale_factor = np.array([[im_scale_y, im_scale_x]])
-    return img.astype(np.float32), scale_factor
+    return img.astype(np.float32), scale_factor.astype(np.float32)


 def get_color_map_list(num_classes):
@@ -130,7 +130,7 @@ def load_predictor(model_dir,
                   device='CPU',
                   min_subgraph_size=3,
                   use_dynamic_shape=False,
-                   trt_min_shape=1,
+                   trt_min_shape=3,
                   trt_max_shape=1280,
                   trt_opt_shape=640,
                   trt_calib_mode=False,
@@ -215,8 +215,6 @@ def load_predictor(model_dir,
                                              opt_input_shape)
            print('trt set dynamic shape done!')

-    # disable print log when predict
-    config.disable_glog_info()
    # enable shared memory
    config.enable_memory_optim()
    # disable feed, fetch OP, needed by zero_copy_run
@@ -233,10 +231,12 @@ def predict_image(predictor,
                  warmup=1,
                  repeats=1,
                  threshold=0.5,
-                  arch='YOLOv5'):
+                  include_nms=True):
    img, scale_factor = image_preprocess(image_file, image_shape)
    inputs = {}
    inputs['image'] = img
+    if include_nms:
+        inputs['scale_factor'] = scale_factor
    input_names = predictor.get_input_names()
    for i in range(len(input_names)):
        input_tensor = predictor.get_input_handle(input_names[i])
@@ -245,7 +245,7 @@ def predict_image(predictor,
    for i in range(warmup):
        predictor.run()

-    np_boxes = None
+    np_boxes, np_boxes_num = None, None
    predict_time = 0.
    time_min = float("inf")
    time_max = float('-inf')
@@ -255,6 +255,9 @@ def predict_image(predictor,
        output_names = predictor.get_output_names()
        boxes_tensor = predictor.get_output_handle(output_names[0])
        np_boxes = boxes_tensor.copy_to_cpu()
+        if include_nms:
+            boxes_num = predictor.get_output_handle(output_names[1])
+            np_boxes_num = boxes_num.copy_to_cpu()
        end_time = time.time()
        timed = end_time - start_time
        time_min = min(time_min, timed)
@@ -265,8 +268,11 @@ def predict_image(predictor,
    print('Inference time(ms): min={}, max={}, avg={}'.format(
        round(time_min * 1000, 2),
        round(time_max * 1000, 1), round(time_avg * 1000, 1)))
-    postprocess = PPYOLOEPostProcess(score_threshold=0.3, nms_threshold=0.6)
-    res = postprocess(np_boxes, scale_factor)
+    if not include_nms:
+        postprocess = PPYOLOEPostProcess(score_threshold=0.3, nms_threshold=0.6)
+        res = postprocess(np_boxes, scale_factor)
+    else:
+        res = {'bbox': np_boxes, 'bbox_num': np_boxes_num}
    res_img = draw_box(
        image_file, res['bbox'], CLASS_LABEL, threshold=threshold)
    cv2.imwrite('result.jpg', res_img)
@@ -296,6 +302,11 @@ if __name__ == '__main__':
        help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is GPU"
    )
    parser.add_argument('--img_shape', type=int, default=640, help="input_size")
+    parser.add_argument(
+        '--include_nms',
+        type=bool,
+        default=True,
+        help="Whether include nms or not.")
    args = parser.parse_args()

    predictor = load_predictor(
@@ -308,4 +319,5 @@ if __name__ == '__main__':
        args.image_file,
        image_shape=[args.img_shape, args.img_shape],
        warmup=warmup,
-        repeats=repeats)
+        repeats=repeats,
+        include_nms=args.include_nms)
--- a/example/auto_compression/pytorch_yolo_series/README.md
+++ b/example/auto_compression/pytorch_yolo_series/README.md
@@ -147,9 +147,9 @@ python eval.py --config_path=./configs/yolov7_tiny_qat_dis.yaml

 #### 导出至ONNX使用TensorRT部署

-加载`quant_model.onnx`和`calibration.cache`，可以直接使用TensorRT测试脚本进行验证，详细代码可参考[./TensorRT]
+加载`quant_model.onnx`和`calibration.cache`，可以直接使用TensorRT测试脚本进行验证，详细代码可参考[TensorRT部署](/TensorRT)

- 进行测试：
+- python测试：
 ```shell
 cd TensorRT
 python trt_eval.py --onnx_model_file=output/ONNX/quant_model.onnx \
@@ -158,6 +158,11 @@ python trt_eval.py --onnx_model_file=output/ONNX/quant_model.onnx \
                   --precision_mode=int8
 ```

+- 速度测试
+```shell
+trtexec --onnx=output/ONNX/quant_model.onnx --avgRuns=1000 --workspace=1024 --calib=output/ONNX/calibration.cache --int8
+```
+
 #### Paddle-TensorRT部署
 - C++部署