From f00a4c00101415809c2e65e07f28eab452cdbcf4 Mon Sep 17 00:00:00 2001
From: wangguanzhong <jerrywgz@126.com>
Date: Thu, 9 Dec 2021 12:14:17 +0800
Subject: [PATCH] [cherry-pick] fix timer in deploy (#4857)

* fix timer in deploy

* fix mot_keypoint deploy
---
 deploy/pptracking/python/mot_jde_infer.py |  39 ++++---
 deploy/pptracking/python/mot_sde_infer.py | 119 +++++++++++---------
 deploy/python/det_keypoint_unite_infer.py |  12 ++-
 deploy/python/infer.py                    | 110 +++++++++----------
 deploy/python/keypoint_infer.py           |  48 ++++-----
 deploy/python/mot_jde_infer.py            |  52 +++++----
 deploy/python/mot_keypoint_unite_infer.py |  25 +++--
 deploy/python/mot_sde_infer.py            | 126 +++++++++++++---------
 8 files changed, 308 insertions(+), 223 deletions(-)

diff --git a/deploy/pptracking/python/mot_jde_infer.py b/deploy/pptracking/python/mot_jde_infer.py
index 7eb0d8b4c..bcad3a241 100644
--- a/deploy/pptracking/python/mot_jde_infer.py
+++ b/deploy/pptracking/python/mot_jde_infer.py
@@ -121,32 +121,32 @@ class JDE_Detector(Detector):
                 online_scores[cls_id].append(tscore)
         return online_tlwhs, online_scores, online_ids
 
-    def predict(self, image_list, threshold=0.5, warmup=0, repeats=1):
+    def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True):
         '''
         Args:
             image_list (list[str]): path of images, only support one image path
                 (batch_size=1) in tracking model
             threshold (float): threshold of predicted box' score
+            repeats (int): repeat number for prediction
+            add_timer (bool): whether add timer during prediction
         Returns:
             online_tlwhs, online_scores, online_ids (dict[np.array])
         '''
-        self.det_times.preprocess_time_s.start()
+        # preprocess
+        if add_timer:
+            self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(image_list)
-        self.det_times.preprocess_time_s.end()
 
         pred_dets, pred_embs = None, None
         input_names = self.predictor.get_input_names()
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
+        if add_timer:
+            self.det_times.preprocess_time_s.end()
+            self.det_times.inference_time_s.start()
 
-        for i in range(warmup):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            boxes_tensor = self.predictor.get_output_handle(output_names[0])
-            pred_dets = boxes_tensor.copy_to_cpu()
-
-        self.det_times.inference_time_s.start()
+        # model prediction
         for i in range(repeats):
             self.predictor.run()
             output_names = self.predictor.get_output_names()
@@ -154,14 +154,16 @@ class JDE_Detector(Detector):
             pred_dets = boxes_tensor.copy_to_cpu()
             embs_tensor = self.predictor.get_output_handle(output_names[1])
             pred_embs = embs_tensor.copy_to_cpu()
-        self.det_times.inference_time_s.end(repeats=repeats)
+        if add_timer:
+            self.det_times.inference_time_s.end(repeats=repeats)
+            self.det_times.postprocess_time_s.start()
 
-        self.det_times.postprocess_time_s.start()
+        # postprocess
         online_tlwhs, online_scores, online_ids = self.postprocess(
             pred_dets, pred_embs, threshold)
-        self.det_times.postprocess_time_s.end()
-        self.det_times.img_num += 1
-
+        if add_timer:
+            self.det_times.postprocess_time_s.end()
+            self.det_times.img_num += 1
         return online_tlwhs, online_scores, online_ids
 
 
@@ -175,7 +177,12 @@ def predict_image(detector, image_list):
     for frame_id, img_file in enumerate(image_list):
         frame = cv2.imread(img_file)
         if FLAGS.run_benchmark:
-            detector.predict([img_file], FLAGS.threshold, warmup=10, repeats=10)
+            # warmup
+            detector.predict(
+                [img_file], FLAGS.threshold, repeats=10, add_timer=False)
+            # run benchmark
+            detector.predict(
+                [img_file], FLAGS.threshold, repeats=10, add_timer=True)
             cm, gm, gu = get_current_memory_mb()
             detector.cpu_mem += cm
             detector.gpu_mem += gm
diff --git a/deploy/pptracking/python/mot_sde_infer.py b/deploy/pptracking/python/mot_sde_infer.py
index 25f2df425..1a3110a46 100644
--- a/deploy/pptracking/python/mot_sde_infer.py
+++ b/deploy/pptracking/python/mot_sde_infer.py
@@ -154,8 +154,8 @@ class SDE_Detector(Detector):
                 ori_image_shape,
                 threshold=0.5,
                 scaled=False,
-                warmup=0,
-                repeats=1):
+                repeats=1,
+                add_timer=True):
         '''
         Args:
             image_path (list[str]): path of images, only support one image path
@@ -164,43 +164,46 @@ class SDE_Detector(Detector):
             threshold (float): threshold of predicted box' score
             scaled (bool): whether the coords after detector outputs are scaled,
                 default False in jde yolov3, set True in general detector.
+            repeats (int): repeat number for prediction
+            add_timer (bool): whether add timer during prediction
+           
         Returns:
             pred_dets (np.ndarray, [N, 6]): 'x,y,w,h,score,cls_id'
             pred_xyxys (np.ndarray, [N, 4]): 'x1,y1,x2,y2'
         '''
-        self.det_times.preprocess_time_s.start()
+        # preprocess
+        if add_timer:
+            self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(image_path)
-        self.det_times.preprocess_time_s.end()
 
         input_names = self.predictor.get_input_names()
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
+        if add_timer:
+            self.det_times.preprocess_time_s.end()
+            self.det_times.inference_time_s.start()
 
-        for i in range(warmup):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            boxes_tensor = self.predictor.get_output_handle(output_names[0])
-            boxes = boxes_tensor.copy_to_cpu()
-
-        self.det_times.inference_time_s.start()
+        # model prediction
         for i in range(repeats):
             self.predictor.run()
             output_names = self.predictor.get_output_names()
             boxes_tensor = self.predictor.get_output_handle(output_names[0])
             boxes = boxes_tensor.copy_to_cpu()
-        self.det_times.inference_time_s.end(repeats=repeats)
+        if add_timer:
+            self.det_times.inference_time_s.end(repeats=repeats)
+            self.det_times.postprocess_time_s.start()
 
-        self.det_times.postprocess_time_s.start()
+        # postprocess
         if len(boxes) == 0:
             pred_dets = np.zeros((1, 6), dtype=np.float32)
             pred_xyxys = np.zeros((1, 4), dtype=np.float32)
         else:
             pred_dets, pred_xyxys = self.postprocess(
                 boxes, ori_image_shape, threshold, inputs, scaled=scaled)
-        self.det_times.postprocess_time_s.end()
-        self.det_times.img_num += 1
-
+        if add_timer:
+            self.det_times.postprocess_time_s.end()
+            self.det_times.img_num += 1
         return pred_dets, pred_xyxys
 
 
@@ -284,8 +287,8 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
                 ori_image_shape,
                 threshold=0.5,
                 scaled=False,
-                warmup=0,
-                repeats=1):
+                repeats=1,
+                add_timer=True):
         '''
         Args:
             image_path (list[str]): path of images, only support one image path
@@ -294,27 +297,26 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
             threshold (float): threshold of predicted box' score
             scaled (bool): whether the coords after detector outputs are scaled,
                 default False in jde yolov3, set True in general detector.
+            repeats (int): repeat number for prediction
+            add_timer (bool): whether add timer during prediction
         Returns:
             pred_dets (np.ndarray, [N, 6]): 'x,y,w,h,score,cls_id'
             pred_xyxys (np.ndarray, [N, 4]): 'x1,y1,x2,y2'
         '''
-        self.det_times.preprocess_time_s.start()
+        # preprocess
+        if add_timer:
+            self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(image_path)
-        self.det_times.preprocess_time_s.end()
 
         input_names = self.predictor.get_input_names()
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
+        if add_timer:
+            self.det_times.preprocess_time_s.end()
+            self.det_times.inference_time_s.start()
 
-        np_score_list, np_boxes_list = [], []
-        for i in range(warmup):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            boxes_tensor = self.predictor.get_output_handle(output_names[0])
-            boxes = boxes_tensor.copy_to_cpu()
-
-        self.det_times.inference_time_s.start()
+        # model prediction
         for i in range(repeats):
             self.predictor.run()
             np_score_list.clear()
@@ -328,9 +330,11 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
                 np_boxes_list.append(
                     self.predictor.get_output_handle(output_names[
                         out_idx + num_outs]).copy_to_cpu())
-        self.det_times.inference_time_s.end(repeats=repeats)
+        if add_timer:
+            self.det_times.inference_time_s.end(repeats=repeats)
+            self.det_times.postprocess_time_s.start()
 
-        self.det_times.postprocess_time_s.start()
+        # postprocess
         self.picodet_postprocess = PicoDetPostProcess(
             inputs['image'].shape[2:],
             inputs['im_shape'],
@@ -346,8 +350,9 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
         else:
             pred_dets, pred_xyxys = self.postprocess(boxes, ori_image_shape,
                                                      threshold)
-        self.det_times.postprocess_time_s.end()
-        self.det_times.img_num += 1
+        if add_timer:
+            self.det_times.postprocess_time_s.end()
+            self.det_times.img_num += 1
 
         return pred_dets, pred_xyxys
 
@@ -503,42 +508,43 @@ class SDE_ReID(object):
     def predict(self,
                 crops,
                 pred_dets,
-                warmup=0,
                 repeats=1,
+                add_timer=True,
                 MTMCT=False,
                 frame_id=0,
                 seq_name=''):
-        self.det_times.preprocess_time_s.start()
+        # preprocess
+        if add_timer:
+            self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(crops)
-        self.det_times.preprocess_time_s.end()
-
         input_names = self.predictor.get_input_names()
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
 
-        for i in range(warmup):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            feature_tensor = self.predictor.get_output_handle(output_names[0])
-            pred_embs = feature_tensor.copy_to_cpu()
+        if add_timer:
+            self.det_times.preprocess_time_s.end()
+            self.det_times.inference_time_s.start()
 
-        self.det_times.inference_time_s.start()
+        # model prediction
         for i in range(repeats):
             self.predictor.run()
             output_names = self.predictor.get_output_names()
             feature_tensor = self.predictor.get_output_handle(output_names[0])
             pred_embs = feature_tensor.copy_to_cpu()
-        self.det_times.inference_time_s.end(repeats=repeats)
+        if add_timer:
+            self.det_times.inference_time_s.end(repeats=repeats)
+            self.det_times.postprocess_time_s.start()
 
-        self.det_times.postprocess_time_s.start()
+        # postprocess
         if MTMCT == False:
             tracking_outs = self.postprocess(pred_dets, pred_embs)
         else:
             tracking_outs = self.postprocess_mtmct(pred_dets, pred_embs,
                                                    frame_id, seq_name)
-        self.det_times.postprocess_time_s.end()
-        self.det_times.img_num += 1
+        if add_timer:
+            self.det_times.postprocess_time_s.end()
+            self.det_times.img_num += 1
 
         return tracking_outs
 
@@ -549,13 +555,23 @@ def predict_image(detector, reid_model, image_list):
         frame = cv2.imread(img_file)
         ori_image_shape = list(frame.shape[:2])
         if FLAGS.run_benchmark:
+            # warmup
             pred_dets, pred_xyxys = detector.predict(
                 [img_file],
                 ori_image_shape,
                 FLAGS.threshold,
                 FLAGS.scaled,
-                warmup=10,
-                repeats=10)
+                repeats=10,
+                add_timer=False)
+            # run benchmark
+            pred_dets, pred_xyxys = detector.predict(
+                [img_file],
+                ori_image_shape,
+                FLAGS.threshold,
+                FLAGS.scaled,
+                repeats=10,
+                add_timer=True)
+
             cm, gm, gu = get_current_memory_mb()
             detector.cpu_mem += cm
             detector.gpu_mem += gm
@@ -574,8 +590,13 @@ def predict_image(detector, reid_model, image_list):
             crops = reid_model.get_crops(pred_xyxys, frame)
 
             if FLAGS.run_benchmark:
+                # warmup
                 tracking_outs = reid_model.predict(
-                    crops, pred_dets, warmup=10, repeats=10)
+                    crops, pred_dets, repeats=10, add_timer=False)
+                # run benchmark 
+                tracking_outs = reid_model.predict(
+                    crops, pred_dets, repeats=10, add_timer=True)
+
             else:
                 tracking_outs = reid_model.predict(crops, pred_dets)
 
diff --git a/deploy/python/det_keypoint_unite_infer.py b/deploy/python/det_keypoint_unite_infer.py
index 5be63a72b..a695a9f0f 100644
--- a/deploy/python/det_keypoint_unite_infer.py
+++ b/deploy/python/det_keypoint_unite_infer.py
@@ -68,8 +68,12 @@ def predict_with_given_det(image, det_res, keypoint_detector,
         batch_images = rec_images[start_index:end_index]
         batch_records = np.array(records[start_index:end_index])
         if run_benchmark:
+            # warmup
             keypoint_result = keypoint_detector.predict(
-                batch_images, keypoint_threshold, warmup=10, repeats=10)
+                batch_images, keypoint_threshold, repeats=10, add_timer=False)
+            # run benchmark
+            keypoint_result = keypoint_detector.predict(
+                batch_images, keypoint_threshold, repeats=10, add_timer=True)
         else:
             keypoint_result = keypoint_detector.predict(batch_images,
                                                         keypoint_threshold)
@@ -100,8 +104,12 @@ def topdown_unite_predict(detector,
         det_timer.preprocess_time_s.end()
 
         if FLAGS.run_benchmark:
+            # warmup
+            results = detector.predict(
+                [image], FLAGS.det_threshold, repeats=10, add_timer=False)
+            # run benchmark
             results = detector.predict(
-                [image], FLAGS.det_threshold, warmup=10, repeats=10)
+                [image], FLAGS.det_threshold, repeats=10, add_timer=True)
             cm, gm, gu = get_current_memory_mb()
             detector.cpu_mem += cm
             detector.gpu_mem += gm
diff --git a/deploy/python/infer.py b/deploy/python/infer.py
index 77f509c24..38af04197 100644
--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -125,35 +125,33 @@ class Detector(object):
             results['masks'] = np_masks
         return results
 
-    def predict(self, image_list, threshold=0.5, warmup=0, repeats=1):
+    def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True):
         '''
         Args:
             image_list (list): list of image
             threshold (float): threshold of predicted box' score
+            repeats (int): repeat number for prediction
+            add_timer (bool): whether add timer during prediction
         Returns:
             results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
                             matix element:[class, score, x_min, y_min, x_max, y_max]
                             MaskRCNN's results include 'masks': np.ndarray:
                             shape: [N, im_h, im_w]
         '''
-        self.det_times.preprocess_time_s.start()
+        # preprocess
+        if add_timer:
+            self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(image_list)
-        self.det_times.preprocess_time_s.end()
         np_boxes, np_masks = None, None
         input_names = self.predictor.get_input_names()
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
-        for i in range(warmup):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            boxes_tensor = self.predictor.get_output_handle(output_names[0])
-            np_boxes = boxes_tensor.copy_to_cpu()
-            if self.pred_config.mask:
-                masks_tensor = self.predictor.get_output_handle(output_names[2])
-                np_masks = masks_tensor.copy_to_cpu()
+        if add_timer:
+            self.det_times.preprocess_time_s.end()
+            self.det_times.inference_time_s.start()
 
-        self.det_times.inference_time_s.start()
+        # model prediction
         for i in range(repeats):
             self.predictor.run()
             output_names = self.predictor.get_output_names()
@@ -164,9 +162,12 @@ class Detector(object):
             if self.pred_config.mask:
                 masks_tensor = self.predictor.get_output_handle(output_names[2])
                 np_masks = masks_tensor.copy_to_cpu()
-        self.det_times.inference_time_s.end(repeats=repeats)
 
-        self.det_times.postprocess_time_s.start()
+        if add_timer:
+            self.det_times.inference_time_s.end(repeats=repeats)
+            self.det_times.postprocess_time_s.start()
+
+        # postprocess
         results = []
         if reduce(lambda x, y: x * y, np_boxes.shape) < 6:
             print('[WARNNING] No object detected.')
@@ -174,8 +175,9 @@ class Detector(object):
         else:
             results = self.postprocess(
                 np_boxes, np_masks, inputs, np_boxes_num, threshold=threshold)
-        self.det_times.postprocess_time_s.end()
-        self.det_times.img_num += len(image_list)
+        if add_timer:
+            self.det_times.postprocess_time_s.end()
+            self.det_times.img_num += len(image_list)
         return results
 
     def get_timer(self):
@@ -228,36 +230,30 @@ class DetectorSOLOv2(Detector):
         self.det_times = Timer()
         self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
 
-    def predict(self, image, threshold=0.5, warmup=0, repeats=1):
+    def predict(self, image, threshold=0.5, repeats=1, add_timer=True):
         '''
         Args:
             image (str/np.ndarray): path of image/ np.ndarray read by cv2
             threshold (float): threshold of predicted box' score
+            repeats (int): repeat number for prediction
+            add_timer (bool): whether add timer during prediction
         Returns:
             results (dict): 'segm': np.ndarray,shape:[N, im_h, im_w]
                             'cate_label': label of segm, shape:[N]
                             'cate_score': confidence score of segm, shape:[N]
         '''
-        self.det_times.preprocess_time_s.start()
+        # preprocess
+        if add_timer:
+            self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(image)
-        self.det_times.preprocess_time_s.end()
         np_label, np_score, np_segms = None, None, None
         input_names = self.predictor.get_input_names()
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
-        for i in range(warmup):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            np_boxes_num = self.predictor.get_output_handle(output_names[
-                0]).copy_to_cpu()
-            np_label = self.predictor.get_output_handle(output_names[
-                1]).copy_to_cpu()
-            np_score = self.predictor.get_output_handle(output_names[
-                2]).copy_to_cpu()
-            np_segms = self.predictor.get_output_handle(output_names[
-                3]).copy_to_cpu()
-        self.det_times.inference_time_s.start()
+        if add_timer:
+            self.det_times.preprocess_time_s.end()
+            self.det_times.inference_time_s.start()
         for i in range(repeats):
             self.predictor.run()
             output_names = self.predictor.get_output_names()
@@ -269,8 +265,9 @@ class DetectorSOLOv2(Detector):
                 2]).copy_to_cpu()
             np_segms = self.predictor.get_output_handle(output_names[
                 3]).copy_to_cpu()
-        self.det_times.inference_time_s.end(repeats=repeats)
-        self.det_times.img_num += 1
+        if add_timer:
+            self.det_times.inference_time_s.end(repeats=repeats)
+            self.det_times.img_num += 1
 
         return dict(
             segm=np_segms,
@@ -325,38 +322,32 @@ class DetectorPicoDet(Detector):
         self.det_times = Timer()
         self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
 
-    def predict(self, image, threshold=0.5, warmup=0, repeats=1):
+    def predict(self, image, threshold=0.5, repeats=1, add_timer=True):
         '''
         Args:
             image (str/np.ndarray): path of image/ np.ndarray read by cv2
             threshold (float): threshold of predicted box' score
+            repeats (int): repeat number for prediction
+            add_timer (bool): whether add timer during prediction
         Returns:
             results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
                             matix element:[class, score, x_min, y_min, x_max, y_max]
         '''
-        self.det_times.preprocess_time_s.start()
+        # preprocess
+        if add_timer:
+            self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(image)
-        self.det_times.preprocess_time_s.end()
         input_names = self.predictor.get_input_names()
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
+
         np_score_list, np_boxes_list = [], []
-        for i in range(warmup):
-            self.predictor.run()
-            np_score_list.clear()
-            np_boxes_list.clear()
-            output_names = self.predictor.get_output_names()
-            num_outs = int(len(output_names) / 2)
-            for out_idx in range(num_outs):
-                np_score_list.append(
-                    self.predictor.get_output_handle(output_names[out_idx])
-                    .copy_to_cpu())
-                np_boxes_list.append(
-                    self.predictor.get_output_handle(output_names[
-                        out_idx + num_outs]).copy_to_cpu())
+        if add_timer:
+            self.det_times.preprocess_time_s.end()
+            self.det_times.inference_time_s.start()
 
-        self.det_times.inference_time_s.start()
+        # model_prediction
         for i in range(repeats):
             self.predictor.run()
             np_score_list.clear()
@@ -370,9 +361,12 @@ class DetectorPicoDet(Detector):
                 np_boxes_list.append(
                     self.predictor.get_output_handle(output_names[
                         out_idx + num_outs]).copy_to_cpu())
-        self.det_times.inference_time_s.end(repeats=repeats)
-        self.det_times.img_num += 1
-        self.det_times.postprocess_time_s.start()
+        if add_timer:
+            self.det_times.inference_time_s.end(repeats=repeats)
+            self.det_times.img_num += 1
+            self.det_times.postprocess_time_s.start()
+
+        # postprocess
         self.postprocess = PicoDetPostProcess(
             inputs['image'].shape[2:],
             inputs['im_shape'],
@@ -380,7 +374,8 @@ class DetectorPicoDet(Detector):
             strides=self.pred_config.fpn_stride,
             nms_threshold=self.pred_config.nms['nms_threshold'])
         np_boxes, np_boxes_num = self.postprocess(np_score_list, np_boxes_list)
-        self.det_times.postprocess_time_s.end()
+        if add_timer:
+            self.det_times.postprocess_time_s.end()
         return dict(boxes=np_boxes, boxes_num=np_boxes_num)
 
 
@@ -647,8 +642,13 @@ def predict_image(detector, image_list, batch_size=1):
         end_index = min((i + 1) * batch_size, len(image_list))
         batch_image_list = image_list[start_index:end_index]
         if FLAGS.run_benchmark:
+            # warmup
             detector.predict(
-                batch_image_list, FLAGS.threshold, warmup=10, repeats=10)
+                batch_image_list, FLAGS.threshold, repeats=10, add_timer=False)
+            # run benchmark
+            detector.predict(
+                batch_image_list, FLAGS.threshold, repeats=10, add_timer=True)
+
             cm, gm, gu = get_current_memory_mb()
             detector.cpu_mem += cm
             detector.gpu_mem += gm
diff --git a/deploy/python/keypoint_infer.py b/deploy/python/keypoint_infer.py
index 6594bdbfd..3f663c81f 100644
--- a/deploy/python/keypoint_infer.py
+++ b/deploy/python/keypoint_infer.py
@@ -145,41 +145,33 @@ class KeyPoint_Detector(Detector):
             raise ValueError("Unsupported arch: {}, expect {}".format(
                 self.pred_config.arch, KEYPOINT_SUPPORT_MODELS))
 
-    def predict(self, image_list, threshold=0.5, warmup=0, repeats=1):
+    def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True):
         '''
         Args:
             image_list (list): list of image 
             threshold (float): threshold of predicted box' score
+            repeats (int): repeat number for prediction
+            add_timer (bool): whether add timer during prediction
         Returns:
             results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
                             matix element:[class, score, x_min, y_min, x_max, y_max]
                             MaskRCNN's results include 'masks': np.ndarray:
                             shape: [N, im_h, im_w]
         '''
-        self.det_times.preprocess_time_s.start()
+        # preprocess
+        if add_timer:
+            self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(image_list)
         np_boxes, np_masks = None, None
         input_names = self.predictor.get_input_names()
-
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
-        self.det_times.preprocess_time_s.end()
-        for i in range(warmup):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            boxes_tensor = self.predictor.get_output_handle(output_names[0])
-            np_boxes = boxes_tensor.copy_to_cpu()
-            if self.pred_config.tagmap:
-                masks_tensor = self.predictor.get_output_handle(output_names[1])
-                heat_k = self.predictor.get_output_handle(output_names[2])
-                inds_k = self.predictor.get_output_handle(output_names[3])
-                np_masks = [
-                    masks_tensor.copy_to_cpu(), heat_k.copy_to_cpu(),
-                    inds_k.copy_to_cpu()
-                ]
+        if add_timer:
+            self.det_times.preprocess_time_s.end()
+            self.det_times.inference_time_s.start()
 
-        self.det_times.inference_time_s.start()
+        # model prediction
         for i in range(repeats):
             self.predictor.run()
             output_names = self.predictor.get_output_names()
@@ -193,13 +185,16 @@ class KeyPoint_Detector(Detector):
                     masks_tensor.copy_to_cpu(), heat_k.copy_to_cpu(),
                     inds_k.copy_to_cpu()
                 ]
-        self.det_times.inference_time_s.end(repeats=repeats)
+        if add_timer:
+            self.det_times.inference_time_s.end(repeats=repeats)
+            self.det_times.postprocess_time_s.start()
 
-        self.det_times.postprocess_time_s.start()
+        # postprocess
         results = self.postprocess(
             np_boxes, np_masks, inputs, threshold=threshold)
-        self.det_times.postprocess_time_s.end()
-        self.det_times.img_num += len(image_list)
+        if add_timer:
+            self.det_times.postprocess_time_s.end()
+            self.det_times.img_num += len(image_list)
         return results
 
 
@@ -266,7 +261,12 @@ class PredictConfig_KeyPoint():
 def predict_image(detector, image_list):
     for i, img_file in enumerate(image_list):
         if FLAGS.run_benchmark:
-            detector.predict([img_file], FLAGS.threshold, warmup=10, repeats=10)
+            # warmup 
+            detector.predict(
+                [img_file], FLAGS.threshold, repeats=10, add_timer=False)
+            # run benchmark
+            detector.predict(
+                [img_file], FLAGS.threshold, repeats=10, add_timer=True)
             cm, gm, gu = get_current_memory_mb()
             detector.cpu_mem += cm
             detector.gpu_mem += gm
@@ -300,7 +300,7 @@ def predict_video(detector, camera_id):
     if not os.path.exists(FLAGS.output_dir):
         os.makedirs(FLAGS.output_dir)
     out_path = os.path.join(FLAGS.output_dir, video_name + '.mp4')
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
     writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
     index = 1
     while (1):
diff --git a/deploy/python/mot_jde_infer.py b/deploy/python/mot_jde_infer.py
index c7006a7cd..f646d911c 100644
--- a/deploy/python/mot_jde_infer.py
+++ b/deploy/python/mot_jde_infer.py
@@ -120,31 +120,31 @@ class JDE_Detector(Detector):
                 online_scores[cls_id].append(tscore)
         return online_tlwhs, online_scores, online_ids
 
-    def predict(self, image_list, threshold=0.5, warmup=0, repeats=1):
+    def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True):
         '''
         Args:
             image_list (list): list of image
             threshold (float): threshold of predicted box' score
+            repeats (int): repeat number for prediction
+            add_timer (bool): whether add timer during prediction
         Returns:
             online_tlwhs, online_scores, online_ids (dict[np.array])
         '''
-        self.det_times.preprocess_time_s.start()
+        # preprocess
+        if add_timer:
+            self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(image_list)
-        self.det_times.preprocess_time_s.end()
 
         pred_dets, pred_embs = None, None
         input_names = self.predictor.get_input_names()
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
+        if add_timer:
+            self.det_times.preprocess_time_s.end()
+            self.det_times.inference_time_s.start()
 
-        for i in range(warmup):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            boxes_tensor = self.predictor.get_output_handle(output_names[0])
-            pred_dets = boxes_tensor.copy_to_cpu()
-
-        self.det_times.inference_time_s.start()
+        # model prediction
         for i in range(repeats):
             self.predictor.run()
             output_names = self.predictor.get_output_names()
@@ -152,13 +152,17 @@ class JDE_Detector(Detector):
             pred_dets = boxes_tensor.copy_to_cpu()
             embs_tensor = self.predictor.get_output_handle(output_names[1])
             pred_embs = embs_tensor.copy_to_cpu()
-        self.det_times.inference_time_s.end(repeats=repeats)
 
-        self.det_times.postprocess_time_s.start()
+        if add_timer:
+            self.det_times.inference_time_s.end(repeats=repeats)
+            self.det_times.postprocess_time_s.start()
+
+        # postprocess
         online_tlwhs, online_scores, online_ids = self.postprocess(
             pred_dets, pred_embs, threshold)
-        self.det_times.postprocess_time_s.end()
-        self.det_times.img_num += 1
+        if add_timer:
+            self.det_times.postprocess_time_s.end()
+            self.det_times.img_num += 1
         return online_tlwhs, online_scores, online_ids
 
 
@@ -172,7 +176,12 @@ def predict_image(detector, image_list):
     for frame_id, img_file in enumerate(image_list):
         frame = cv2.imread(img_file)
         if FLAGS.run_benchmark:
-            detector.predict([frame], FLAGS.threshold, warmup=10, repeats=10)
+            # warmup
+            detector.predict(
+                [frame], FLAGS.threshold, repeats=10, add_timer=False)
+            # run benchmark
+            detector.predict(
+                [frame], FLAGS.threshold, repeats=10, add_timer=True)
             cm, gm, gu = get_current_memory_mb()
             detector.cpu_mem += cm
             detector.gpu_mem += gm
@@ -181,9 +190,14 @@ def predict_image(detector, image_list):
         else:
             online_tlwhs, online_scores, online_ids = detector.predict(
                 [frame], FLAGS.threshold)
-            online_im = plot_tracking_dict(frame, num_classes, online_tlwhs,
-                                           online_ids, online_scores, frame_id,
-                                           ids2names=ids2names)
+            online_im = plot_tracking_dict(
+                frame,
+                num_classes,
+                online_tlwhs,
+                online_ids,
+                online_scores,
+                frame_id,
+                ids2names=ids2names)
             if FLAGS.save_images:
                 if not os.path.exists(FLAGS.output_dir):
                     os.makedirs(FLAGS.output_dir)
@@ -211,7 +225,7 @@ def predict_video(detector, camera_id):
         os.makedirs(FLAGS.output_dir)
     out_path = os.path.join(FLAGS.output_dir, video_name)
     if not FLAGS.save_images:
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
         writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
     frame_id = 0
     timer = MOTTimer()
diff --git a/deploy/python/mot_keypoint_unite_infer.py b/deploy/python/mot_keypoint_unite_infer.py
index f3fda1e6a..70f03db76 100644
--- a/deploy/python/mot_keypoint_unite_infer.py
+++ b/deploy/python/mot_keypoint_unite_infer.py
@@ -64,8 +64,12 @@ def mot_keypoint_unite_predict_image(mot_model,
         frame = cv2.imread(img_file)
 
         if FLAGS.run_benchmark:
+            # warmup
             online_tlwhs, online_scores, online_ids = mot_model.predict(
-                [frame], FLAGS.mot_threshold, warmup=10, repeats=10)
+                [frame], FLAGS.mot_threshold, repeats=10, add_timer=False)
+            # run benchmark
+            online_tlwhs, online_scores, online_ids = mot_model.predict(
+                [frame], FLAGS.mot_threshold, repeats=10, add_timer=True)
             cm, gm, gu = get_current_memory_mb()
             mot_model.cpu_mem += cm
             mot_model.gpu_mem += gm
@@ -84,13 +88,16 @@ def mot_keypoint_unite_predict_image(mot_model,
                 FLAGS.run_benchmark)
 
         else:
-            warmup = 10 if FLAGS.run_benchmark else 0
+            if FLAGS.run_benchmark:
+                keypoint_results = keypoint_model.predict(
+                    [frame],
+                    FLAGS.keypoint_threshold,
+                    repeats=10,
+                    add_timer=False)
+
             repeats = 10 if FLAGS.run_benchmark else 1
             keypoint_results = keypoint_model.predict(
-                [frame],
-                FLAGS.keypoint_threshold,
-                warmup=warmup,
-                repeats=repeats)
+                [frame], FLAGS.keypoint_threshold, repeats=repeats)
 
         if FLAGS.run_benchmark:
             cm, gm, gu = get_current_memory_mb()
@@ -103,7 +110,7 @@ def mot_keypoint_unite_predict_image(mot_model,
                 keypoint_results,
                 visual_thread=FLAGS.keypoint_threshold,
                 returnimg=True,
-                ids=online_ids
+                ids=online_ids[0]
                 if KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown'
                 else None)
 
@@ -144,7 +151,7 @@ def mot_keypoint_unite_predict_video(mot_model,
         os.makedirs(FLAGS.output_dir)
     out_path = os.path.join(FLAGS.output_dir, video_name)
     if not FLAGS.save_images:
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
         writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
     frame_id = 0
     timer_mot = FPSTimer()
@@ -193,7 +200,7 @@ def mot_keypoint_unite_predict_video(mot_model,
             keypoint_results,
             visual_thread=FLAGS.keypoint_threshold,
             returnimg=True,
-            ids=online_ids
+            ids=online_ids[0]
             if KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown' else
             None)
 
diff --git a/deploy/python/mot_sde_infer.py b/deploy/python/mot_sde_infer.py
index a6af02065..16b9a85f0 100644
--- a/deploy/python/mot_sde_infer.py
+++ b/deploy/python/mot_sde_infer.py
@@ -178,40 +178,43 @@ class SDE_Detector(Detector):
 
         return pred_dets, pred_xyxys
 
-    def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1):
+    def predict(self, image, scaled, threshold=0.5, repeats=1, add_timer=True):
         '''
         Args:
             image (np.ndarray): image numpy data
-            threshold (float): threshold of predicted box' score
             scaled (bool): whether the coords after detector outputs are scaled,
                 default False in jde yolov3, set True in general detector.
+            threshold (float): threshold of predicted box' score
+            repeats (int): repeat number for prediction
+            add_timer (bool): whether add timer during prediction
         Returns:
             pred_dets (np.ndarray, [N, 6])
         '''
-        self.det_times.preprocess_time_s.start()
+        # preprocess
+        if add_timer:
+            self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(image)
-        self.det_times.preprocess_time_s.end()
 
         input_names = self.predictor.get_input_names()
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
 
-        for i in range(warmup):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            boxes_tensor = self.predictor.get_output_handle(output_names[0])
-            boxes = boxes_tensor.copy_to_cpu()
-
-        self.det_times.inference_time_s.start()
+        if add_timer:
+            self.det_times.preprocess_time_s.end()
+            self.det_times.inference_time_s.start()
+        # model prediction
         for i in range(repeats):
             self.predictor.run()
             output_names = self.predictor.get_output_names()
             boxes_tensor = self.predictor.get_output_handle(output_names[0])
             boxes = boxes_tensor.copy_to_cpu()
-        self.det_times.inference_time_s.end(repeats=repeats)
 
-        self.det_times.postprocess_time_s.start()
+        if add_timer:
+            self.det_times.inference_time_s.end(repeats=repeats)
+            self.det_times.postprocess_time_s.start()
+
+        # postprocess
         if len(boxes) == 0:
             pred_dets = np.zeros((1, 6), dtype=np.float32)
             pred_xyxys = np.zeros((1, 4), dtype=np.float32)
@@ -223,8 +226,9 @@ class SDE_Detector(Detector):
             pred_dets, pred_xyxys = self.postprocess(
                 boxes, input_shape, im_shape, scale_factor, threshold, scaled)
 
-        self.det_times.postprocess_time_s.end()
-        self.det_times.img_num += 1
+        if add_timer:
+            self.det_times.postprocess_time_s.end()
+            self.det_times.img_num += 1
         return pred_dets, pred_xyxys
 
 
@@ -271,7 +275,8 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
         assert batch_size == 1, "The JDE Detector only supports batch size=1 now"
         self.pred_config = pred_config
 
-    def postprocess_bboxes(self, boxes, input_shape, im_shape, scale_factor, threshold):
+    def postprocess_bboxes(self, boxes, input_shape, im_shape, scale_factor,
+                           threshold):
         over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0]
         if len(over_thres_idx) == 0:
             pred_dets = np.zeros((1, 6), dtype=np.float32)
@@ -299,33 +304,35 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
             (pred_tlwhs, pred_scores, pred_cls_ids), axis=1)
         return pred_dets, pred_xyxys
 
-    def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1):
+    def predict(self, image, scaled, threshold=0.5, repeats=1, add_timer=True):
         '''
         Args:
             image (np.ndarray): image numpy data
-            threshold (float): threshold of predicted box' score
             scaled (bool): whether the coords after detector outputs are scaled,
                 default False in jde yolov3, set True in general detector.
+            threshold (float): threshold of predicted box' score
+            repeats (int): repeat number for prediction
+            add_timer (bool): whether add timer during prediction
+           
         Returns:
             pred_dets (np.ndarray, [N, 6])
         '''
-        self.det_times.preprocess_time_s.start()
+        # preprocess
+        if add_timer:
+            self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(image)
-        self.det_times.preprocess_time_s.end()
 
         input_names = self.predictor.get_input_names()
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
 
-        np_score_list, np_boxes_list = [], []
-        for i in range(warmup):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            boxes_tensor = self.predictor.get_output_handle(output_names[0])
-            boxes = boxes_tensor.copy_to_cpu()
+        if add_timer:
+            self.det_times.preprocess_time_s.end()
+            self.det_times.inference_time_s.start()
 
-        self.det_times.inference_time_s.start()
+        # model prediction
+        np_score_list, np_boxes_list = [], []
         for i in range(repeats):
             self.predictor.run()
             np_score_list.clear()
@@ -340,9 +347,12 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
                     self.predictor.get_output_handle(output_names[
                         out_idx + num_outs]).copy_to_cpu())
 
-        self.det_times.inference_time_s.end(repeats=repeats)
-        self.det_times.img_num += 1
-        self.det_times.postprocess_time_s.start()
+        if add_timer:
+            self.det_times.inference_time_s.end(repeats=repeats)
+            self.det_times.img_num += 1
+            self.det_times.postprocess_time_s.start()
+
+        # postprocess
         self.postprocess = PicoDetPostProcess(
             inputs['image'].shape[2:],
             inputs['im_shape'],
@@ -360,9 +370,10 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
             scale_factor = inputs['scale_factor']
             pred_dets, pred_xyxys = self.postprocess_bboxes(
                 boxes, input_shape, im_shape, scale_factor, threshold)
-
+        if add_timer:
+            self.det_times.postprocess_time_s.end()
         return pred_dets, pred_xyxys
-        
+
 
 class SDE_ReID(object):
     def __init__(self,
@@ -445,35 +456,36 @@ class SDE_ReID(object):
 
         return online_tlwhs, online_scores, online_ids
 
-    def predict(self, crops, pred_dets, warmup=0, repeats=1):
-        self.det_times.preprocess_time_s.start()
+    def predict(self, crops, pred_dets, repeats=1, add_timer=True):
+        # preprocess
+        if add_timer:
+            self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(crops)
-        self.det_times.preprocess_time_s.end()
 
         input_names = self.predictor.get_input_names()
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
+        if add_timer:
+            self.det_times.preprocess_time_s.end()
+            self.det_times.inference_time_s.start()
 
-        for i in range(warmup):
-            self.predictor.run()
-            output_names = self.predictor.get_output_names()
-            feature_tensor = self.predictor.get_output_handle(output_names[0])
-            pred_embs = feature_tensor.copy_to_cpu()
-
-        self.det_times.inference_time_s.start()
+        # model prediction
         for i in range(repeats):
             self.predictor.run()
             output_names = self.predictor.get_output_names()
             feature_tensor = self.predictor.get_output_handle(output_names[0])
             pred_embs = feature_tensor.copy_to_cpu()
-        self.det_times.inference_time_s.end(repeats=repeats)
+        if add_timer:
+            self.det_times.inference_time_s.end(repeats=repeats)
+            self.det_times.postprocess_time_s.start()
 
-        self.det_times.postprocess_time_s.start()
+        # postprocess
         online_tlwhs, online_scores, online_ids = self.postprocess(pred_dets,
                                                                    pred_embs)
-        self.det_times.postprocess_time_s.end()
-        self.det_times.img_num += 1
+        if add_timer:
+            self.det_times.postprocess_time_s.end()
+            self.det_times.img_num += 1
 
         return online_tlwhs, online_scores, online_ids
 
@@ -483,8 +495,20 @@ def predict_image(detector, reid_model, image_list):
     for i, img_file in enumerate(image_list):
         frame = cv2.imread(img_file)
         if FLAGS.run_benchmark:
+            # warmup
             pred_dets, pred_xyxys = detector.predict(
-                [frame], FLAGS.scaled, FLAGS.threshold, warmup=10, repeats=10)
+                [frame],
+                FLAGS.scaled,
+                FLAGS.threshold,
+                repeats=10,
+                add_timer=True)
+            # run benchmark
+            pred_dets, pred_xyxys = detector.predict(
+                [frame],
+                FLAGS.scaled,
+                FLAGS.threshold,
+                repeats=10,
+                add_timer=True)
             cm, gm, gu = get_current_memory_mb()
             detector.cpu_mem += cm
             detector.gpu_mem += gm
@@ -503,8 +527,12 @@ def predict_image(detector, reid_model, image_list):
             crops = reid_model.get_crops(pred_xyxys, frame)
 
             if FLAGS.run_benchmark:
+                # warmup
+                online_tlwhs, online_scores, online_ids = reid_model.predict(
+                    crops, pred_dets, repeats=10, add_timer=False)
+                # run benchmark
                 online_tlwhs, online_scores, online_ids = reid_model.predict(
-                    crops, pred_dets, warmup=10, repeats=10)
+                    crops, pred_dets, repeats=10, add_timer=False)
             else:
                 online_tlwhs, online_scores, online_ids = reid_model.predict(
                     crops, pred_dets)
@@ -538,7 +566,7 @@ def predict_video(detector, reid_model, camera_id):
         os.makedirs(FLAGS.output_dir)
     out_path = os.path.join(FLAGS.output_dir, video_name)
     if not FLAGS.save_images:
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
         writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
     frame_id = 0
     timer = MOTTimer()
-- 
GitLab