update detection ACT Infer demo (#1489)

6ef0a6ba · Guanghua Yu · GitHub · d2bd1d28 · 6ef0a6ba · 6ef0a6ba
2 changed file
--- a/example/auto_compression/detection/onnxruntime_eval.py
+++ b/example/auto_compression/detection/onnxruntime_eval.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import numpy as np
+import argparse
+import time
+import paddle
+from ppdet.core.workspace import load_config
+from ppdet.core.workspace import create
+from ppdet.metrics import COCOMetric
+import onnxruntime as ort
+
+from post_process import PPYOLOEPostProcess
+
+
+def argsparser():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        '--reader_config',
+        type=str,
+        default='configs/picodet_reader.yml',
+        help="path of compression strategy config.",
+        required=True)
+    parser.add_argument(
+        '--model_path',
+        type=str,
+        default='onnx_file/picodet_s_416_npu_postprocessed.onnx',
+        help="onnx filepath")
+    parser.add_argument(
+        '--include_post_process',
+        type=bool,
+        default=False,
+        help="Whether include post_process or not.")
+
+    return parser
+
+
+def eval(val_loader, metric, sess):
+    inputs_name = [a.name for a in sess.get_inputs()]
+    predict_time = 0.0
+    time_min = float("inf")
+    time_max = float("-inf")
+    sample_nums = len(val_loader)
+    for batch_id, data in enumerate(val_loader):
+        data_all = {k: np.array(v) for k, v in data.items()}
+        data_input = {}
+        for k, v in data.items():
+            if k in inputs_name:
+                data_input[k] = np.array(v)
+
+        start_time = time.time()
+
+        outs = sess.run(None, data_input)
+
+        end_time = time.time()
+        timed = end_time - start_time
+        time_min = min(time_min, timed)
+        time_max = max(time_max, timed)
+        predict_time += timed
+
+        res = {}
+        if not FLAGS.include_post_process:
+            postprocess = PPYOLOEPostProcess(
+                score_threshold=0.01, nms_threshold=0.6)
+            res = postprocess(np.array(outs[0]), data_all['scale_factor'])
+        else:
+            for out in outs:
+                v = np.array(out)
+                if len(v.shape) > 1:
+                    res['bbox'] = v
+                else:
+                    res['bbox_num'] = v
+
+        metric.update(data_all, res)
+        if batch_id % 100 == 0:
+            print('Eval iter:', batch_id)
+    metric.accumulate()
+    metric.log()
+    map_res = metric.get_results()
+    metric.reset()
+    time_avg = predict_time / sample_nums
+    print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
+        round(time_min * 1000, 2),
+        round(time_max * 1000, 1), round(time_avg * 1000, 1)))
+    print("[Benchmark] COCO mAP: {}".format(map_res["bbox"][0]))
+    sys.stdout.flush()
+
+
+def main():
+
+    reader_cfg = load_config(FLAGS.reader_config)
+
+    dataset = reader_cfg['EvalDataset']
+    val_loader = create('EvalReader')(reader_cfg['EvalDataset'],
+                                      reader_cfg['worker_num'],
+                                      return_list=True)
+    clsid2catid = {v: k for k, v in dataset.catid2clsid.items()}
+    anno_file = dataset.get_anno()
+    metric = COCOMetric(
+        anno_file=anno_file, clsid2catid=clsid2catid, IouType='bbox')
+
+    providers = ['CPUExecutionProvider']
+    sess_options = ort.SessionOptions()
+    sess_options.optimized_model_filepath = "./optimize_model.onnx"
+    sess = ort.InferenceSession(
+        FLAGS.model_path, providers=providers, sess_options=sess_options)
+    eval(val_loader, metric, sess)
+
+
+if __name__ == '__main__':
+    paddle.enable_static()
+    parser = argsparser()
+    FLAGS = parser.parse_args()
+
+    # DataLoader need run on cpu
+    paddle.set_device("cpu")
+
+    main()
--- a/example/auto_compression/pytorch_yolo_series/cpp_infer/trt_run.cc
+++ b/example/auto_compression/pytorch_yolo_series/cpp_infer/trt_run.cc
@@ -84,6 +84,7 @@ void run(Predictor *predictor, const std::vector<type> &input,
  for (int i = 0; i < FLAGS_repeats; ++i) {
    auto input_names = predictor->GetInputNames();
    auto input_t = predictor->GetInputHandle(input_names[0]);
+
    input_t->Reshape(input_shape);
    input_t->CopyFromCpu(input.data());

@@ -92,30 +93,34 @@ void run(Predictor *predictor, const std::vector<type> &input,
    auto output_names = predictor->GetOutputNames();
    auto output_t = predictor->GetOutputHandle(output_names[0]);
    std::vector<int> output_shape = output_t->shape();
-    output_t -> ShareExternalData<type>(out_data, out_shape, paddle_infer::PlaceType::kGPU);
+    output_t->CopyToCpu(out_data);
+    
  }
-  
+
  LOG(INFO) << "[" << FLAGS_run_mode << " bs-" << FLAGS_batch_size << " ] run avg time is " << time_diff(st, time()) / FLAGS_repeats
            << " ms";
 }

-int main(int argc, char *argv[]) {
+int main(int argc, char *argv[])
+{
  google::ParseCommandLineFlags(&argc, &argv, true);
  auto predictor = InitPredictor();
+
+  std::cout << "====== Use float instead of FP16 data ======" << std::endl;
+  std::vector<float> input_data(FLAGS_batch_size * 3 * 640 * 640, float(1.0));
  std::vector<int> input_shape = {FLAGS_batch_size, 3, 640, 640};
-  // float16
-  using dtype = float16;
-  std::vector<dtype> input_data(FLAGS_batch_size * 3 * 640 * 640, dtype(1.0));

  int out_box_shape = 25200;
  if (FLAGS_arch == "YOLOv6"){
    out_box_shape = 8400;
  }
-  dtype *out_data;
+  float* out_data;
+  std::vector<int> out_shape{ FLAGS_batch_size, 1, out_box_shape, 85};
  int out_data_size = FLAGS_batch_size * out_box_shape * 85;
+  
+  // Only use Pinned mem for D2H.
  cudaHostAlloc((void**)&out_data, sizeof(float) * out_data_size, cudaHostAllocMapped);

-  std::vector<int> out_shape{ FLAGS_batch_size, 1, out_box_shape, 85};
-  run<dtype>(predictor.get(), input_data, input_shape, out_data, out_shape);
+  run<float>(predictor.get(), input_data, input_shape, out_data, out_shape);
  return 0;
-}
+}
\ No newline at end of file