From f00a4c00101415809c2e65e07f28eab452cdbcf4 Mon Sep 17 00:00:00 2001 From: wangguanzhong Date: Thu, 9 Dec 2021 12:14:17 +0800 Subject: [PATCH] [cherry-pick] fix timer in deploy (#4857) * fix timer in deploy * fix mot_keypoint deploy --- deploy/pptracking/python/mot_jde_infer.py | 39 ++++--- deploy/pptracking/python/mot_sde_infer.py | 119 +++++++++++--------- deploy/python/det_keypoint_unite_infer.py | 12 ++- deploy/python/infer.py | 110 +++++++++---------- deploy/python/keypoint_infer.py | 48 ++++----- deploy/python/mot_jde_infer.py | 52 +++++---- deploy/python/mot_keypoint_unite_infer.py | 25 +++-- deploy/python/mot_sde_infer.py | 126 +++++++++++++--------- 8 files changed, 308 insertions(+), 223 deletions(-) diff --git a/deploy/pptracking/python/mot_jde_infer.py b/deploy/pptracking/python/mot_jde_infer.py index 7eb0d8b4c..bcad3a241 100644 --- a/deploy/pptracking/python/mot_jde_infer.py +++ b/deploy/pptracking/python/mot_jde_infer.py @@ -121,32 +121,32 @@ class JDE_Detector(Detector): online_scores[cls_id].append(tscore) return online_tlwhs, online_scores, online_ids - def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): + def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True): ''' Args: image_list (list[str]): path of images, only support one image path (batch_size=1) in tracking model threshold (float): threshold of predicted box' score + repeats (int): repeat number for prediction + add_timer (bool): whether add timer during prediction Returns: online_tlwhs, online_scores, online_ids (dict[np.array]) ''' - self.det_times.preprocess_time_s.start() + # preprocess + if add_timer: + self.det_times.preprocess_time_s.start() inputs = self.preprocess(image_list) - self.det_times.preprocess_time_s.end() pred_dets, pred_embs = None, None input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) + if add_timer: + self.det_times.preprocess_time_s.end() + self.det_times.inference_time_s.start() - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_handle(output_names[0]) - pred_dets = boxes_tensor.copy_to_cpu() - - self.det_times.inference_time_s.start() + # model prediction for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() @@ -154,14 +154,16 @@ class JDE_Detector(Detector): pred_dets = boxes_tensor.copy_to_cpu() embs_tensor = self.predictor.get_output_handle(output_names[1]) pred_embs = embs_tensor.copy_to_cpu() - self.det_times.inference_time_s.end(repeats=repeats) + if add_timer: + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.postprocess_time_s.start() - self.det_times.postprocess_time_s.start() + # postprocess online_tlwhs, online_scores, online_ids = self.postprocess( pred_dets, pred_embs, threshold) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += 1 - + if add_timer: + self.det_times.postprocess_time_s.end() + self.det_times.img_num += 1 return online_tlwhs, online_scores, online_ids @@ -175,7 +177,12 @@ def predict_image(detector, image_list): for frame_id, img_file in enumerate(image_list): frame = cv2.imread(img_file) if FLAGS.run_benchmark: - detector.predict([img_file], FLAGS.threshold, warmup=10, repeats=10) + # warmup + detector.predict( + [img_file], FLAGS.threshold, repeats=10, add_timer=False) + # run benchmark + detector.predict( + [img_file], FLAGS.threshold, repeats=10, add_timer=True) cm, gm, gu = get_current_memory_mb() detector.cpu_mem += cm detector.gpu_mem += gm diff --git a/deploy/pptracking/python/mot_sde_infer.py b/deploy/pptracking/python/mot_sde_infer.py index 25f2df425..1a3110a46 100644 --- a/deploy/pptracking/python/mot_sde_infer.py +++ b/deploy/pptracking/python/mot_sde_infer.py @@ -154,8 +154,8 @@ class SDE_Detector(Detector): ori_image_shape, threshold=0.5, scaled=False, - warmup=0, - repeats=1): + repeats=1, + add_timer=True): ''' Args: image_path (list[str]): path of images, only support one image path @@ -164,43 +164,46 @@ class SDE_Detector(Detector): threshold (float): threshold of predicted box' score scaled (bool): whether the coords after detector outputs are scaled, default False in jde yolov3, set True in general detector. + repeats (int): repeat number for prediction + add_timer (bool): whether add timer during prediction + Returns: pred_dets (np.ndarray, [N, 6]): 'x,y,w,h,score,cls_id' pred_xyxys (np.ndarray, [N, 4]): 'x1,y1,x2,y2' ''' - self.det_times.preprocess_time_s.start() + # preprocess + if add_timer: + self.det_times.preprocess_time_s.start() inputs = self.preprocess(image_path) - self.det_times.preprocess_time_s.end() input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) + if add_timer: + self.det_times.preprocess_time_s.end() + self.det_times.inference_time_s.start() - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_handle(output_names[0]) - boxes = boxes_tensor.copy_to_cpu() - - self.det_times.inference_time_s.start() + # model prediction for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() boxes_tensor = self.predictor.get_output_handle(output_names[0]) boxes = boxes_tensor.copy_to_cpu() - self.det_times.inference_time_s.end(repeats=repeats) + if add_timer: + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.postprocess_time_s.start() - self.det_times.postprocess_time_s.start() + # postprocess if len(boxes) == 0: pred_dets = np.zeros((1, 6), dtype=np.float32) pred_xyxys = np.zeros((1, 4), dtype=np.float32) else: pred_dets, pred_xyxys = self.postprocess( boxes, ori_image_shape, threshold, inputs, scaled=scaled) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += 1 - + if add_timer: + self.det_times.postprocess_time_s.end() + self.det_times.img_num += 1 return pred_dets, pred_xyxys @@ -284,8 +287,8 @@ class SDE_DetectorPicoDet(DetectorPicoDet): ori_image_shape, threshold=0.5, scaled=False, - warmup=0, - repeats=1): + repeats=1, + add_timer=True): ''' Args: image_path (list[str]): path of images, only support one image path @@ -294,27 +297,26 @@ class SDE_DetectorPicoDet(DetectorPicoDet): threshold (float): threshold of predicted box' score scaled (bool): whether the coords after detector outputs are scaled, default False in jde yolov3, set True in general detector. + repeats (int): repeat number for prediction + add_timer (bool): whether add timer during prediction Returns: pred_dets (np.ndarray, [N, 6]): 'x,y,w,h,score,cls_id' pred_xyxys (np.ndarray, [N, 4]): 'x1,y1,x2,y2' ''' - self.det_times.preprocess_time_s.start() + # preprocess + if add_timer: + self.det_times.preprocess_time_s.start() inputs = self.preprocess(image_path) - self.det_times.preprocess_time_s.end() input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) + if add_timer: + self.det_times.preprocess_time_s.end() + self.det_times.inference_time_s.start() - np_score_list, np_boxes_list = [], [] - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_handle(output_names[0]) - boxes = boxes_tensor.copy_to_cpu() - - self.det_times.inference_time_s.start() + # model prediction for i in range(repeats): self.predictor.run() np_score_list.clear() @@ -328,9 +330,11 @@ class SDE_DetectorPicoDet(DetectorPicoDet): np_boxes_list.append( self.predictor.get_output_handle(output_names[ out_idx + num_outs]).copy_to_cpu()) - self.det_times.inference_time_s.end(repeats=repeats) + if add_timer: + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.postprocess_time_s.start() - self.det_times.postprocess_time_s.start() + # postprocess self.picodet_postprocess = PicoDetPostProcess( inputs['image'].shape[2:], inputs['im_shape'], @@ -346,8 +350,9 @@ class SDE_DetectorPicoDet(DetectorPicoDet): else: pred_dets, pred_xyxys = self.postprocess(boxes, ori_image_shape, threshold) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += 1 + if add_timer: + self.det_times.postprocess_time_s.end() + self.det_times.img_num += 1 return pred_dets, pred_xyxys @@ -503,42 +508,43 @@ class SDE_ReID(object): def predict(self, crops, pred_dets, - warmup=0, repeats=1, + add_timer=True, MTMCT=False, frame_id=0, seq_name=''): - self.det_times.preprocess_time_s.start() + # preprocess + if add_timer: + self.det_times.preprocess_time_s.start() inputs = self.preprocess(crops) - self.det_times.preprocess_time_s.end() - input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - feature_tensor = self.predictor.get_output_handle(output_names[0]) - pred_embs = feature_tensor.copy_to_cpu() + if add_timer: + self.det_times.preprocess_time_s.end() + self.det_times.inference_time_s.start() - self.det_times.inference_time_s.start() + # model prediction for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() feature_tensor = self.predictor.get_output_handle(output_names[0]) pred_embs = feature_tensor.copy_to_cpu() - self.det_times.inference_time_s.end(repeats=repeats) + if add_timer: + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.postprocess_time_s.start() - self.det_times.postprocess_time_s.start() + # postprocess if MTMCT == False: tracking_outs = self.postprocess(pred_dets, pred_embs) else: tracking_outs = self.postprocess_mtmct(pred_dets, pred_embs, frame_id, seq_name) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += 1 + if add_timer: + self.det_times.postprocess_time_s.end() + self.det_times.img_num += 1 return tracking_outs @@ -549,13 +555,23 @@ def predict_image(detector, reid_model, image_list): frame = cv2.imread(img_file) ori_image_shape = list(frame.shape[:2]) if FLAGS.run_benchmark: + # warmup pred_dets, pred_xyxys = detector.predict( [img_file], ori_image_shape, FLAGS.threshold, FLAGS.scaled, - warmup=10, - repeats=10) + repeats=10, + add_timer=False) + # run benchmark + pred_dets, pred_xyxys = detector.predict( + [img_file], + ori_image_shape, + FLAGS.threshold, + FLAGS.scaled, + repeats=10, + add_timer=True) + cm, gm, gu = get_current_memory_mb() detector.cpu_mem += cm detector.gpu_mem += gm @@ -574,8 +590,13 @@ def predict_image(detector, reid_model, image_list): crops = reid_model.get_crops(pred_xyxys, frame) if FLAGS.run_benchmark: + # warmup tracking_outs = reid_model.predict( - crops, pred_dets, warmup=10, repeats=10) + crops, pred_dets, repeats=10, add_timer=False) + # run benchmark + tracking_outs = reid_model.predict( + crops, pred_dets, repeats=10, add_timer=True) + else: tracking_outs = reid_model.predict(crops, pred_dets) diff --git a/deploy/python/det_keypoint_unite_infer.py b/deploy/python/det_keypoint_unite_infer.py index 5be63a72b..a695a9f0f 100644 --- a/deploy/python/det_keypoint_unite_infer.py +++ b/deploy/python/det_keypoint_unite_infer.py @@ -68,8 +68,12 @@ def predict_with_given_det(image, det_res, keypoint_detector, batch_images = rec_images[start_index:end_index] batch_records = np.array(records[start_index:end_index]) if run_benchmark: + # warmup keypoint_result = keypoint_detector.predict( - batch_images, keypoint_threshold, warmup=10, repeats=10) + batch_images, keypoint_threshold, repeats=10, add_timer=False) + # run benchmark + keypoint_result = keypoint_detector.predict( + batch_images, keypoint_threshold, repeats=10, add_timer=True) else: keypoint_result = keypoint_detector.predict(batch_images, keypoint_threshold) @@ -100,8 +104,12 @@ def topdown_unite_predict(detector, det_timer.preprocess_time_s.end() if FLAGS.run_benchmark: + # warmup + results = detector.predict( + [image], FLAGS.det_threshold, repeats=10, add_timer=False) + # run benchmark results = detector.predict( - [image], FLAGS.det_threshold, warmup=10, repeats=10) + [image], FLAGS.det_threshold, repeats=10, add_timer=True) cm, gm, gu = get_current_memory_mb() detector.cpu_mem += cm detector.gpu_mem += gm diff --git a/deploy/python/infer.py b/deploy/python/infer.py index 77f509c24..38af04197 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -125,35 +125,33 @@ class Detector(object): results['masks'] = np_masks return results - def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): + def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True): ''' Args: image_list (list): list of image threshold (float): threshold of predicted box' score + repeats (int): repeat number for prediction + add_timer (bool): whether add timer during prediction Returns: results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, matix element:[class, score, x_min, y_min, x_max, y_max] MaskRCNN's results include 'masks': np.ndarray: shape: [N, im_h, im_w] ''' - self.det_times.preprocess_time_s.start() + # preprocess + if add_timer: + self.det_times.preprocess_time_s.start() inputs = self.preprocess(image_list) - self.det_times.preprocess_time_s.end() np_boxes, np_masks = None, None input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_handle(output_names[0]) - np_boxes = boxes_tensor.copy_to_cpu() - if self.pred_config.mask: - masks_tensor = self.predictor.get_output_handle(output_names[2]) - np_masks = masks_tensor.copy_to_cpu() + if add_timer: + self.det_times.preprocess_time_s.end() + self.det_times.inference_time_s.start() - self.det_times.inference_time_s.start() + # model prediction for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() @@ -164,9 +162,12 @@ class Detector(object): if self.pred_config.mask: masks_tensor = self.predictor.get_output_handle(output_names[2]) np_masks = masks_tensor.copy_to_cpu() - self.det_times.inference_time_s.end(repeats=repeats) - self.det_times.postprocess_time_s.start() + if add_timer: + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.postprocess_time_s.start() + + # postprocess results = [] if reduce(lambda x, y: x * y, np_boxes.shape) < 6: print('[WARNNING] No object detected.') @@ -174,8 +175,9 @@ class Detector(object): else: results = self.postprocess( np_boxes, np_masks, inputs, np_boxes_num, threshold=threshold) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += len(image_list) + if add_timer: + self.det_times.postprocess_time_s.end() + self.det_times.img_num += len(image_list) return results def get_timer(self): @@ -228,36 +230,30 @@ class DetectorSOLOv2(Detector): self.det_times = Timer() self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 - def predict(self, image, threshold=0.5, warmup=0, repeats=1): + def predict(self, image, threshold=0.5, repeats=1, add_timer=True): ''' Args: image (str/np.ndarray): path of image/ np.ndarray read by cv2 threshold (float): threshold of predicted box' score + repeats (int): repeat number for prediction + add_timer (bool): whether add timer during prediction Returns: results (dict): 'segm': np.ndarray,shape:[N, im_h, im_w] 'cate_label': label of segm, shape:[N] 'cate_score': confidence score of segm, shape:[N] ''' - self.det_times.preprocess_time_s.start() + # preprocess + if add_timer: + self.det_times.preprocess_time_s.start() inputs = self.preprocess(image) - self.det_times.preprocess_time_s.end() np_label, np_score, np_segms = None, None, None input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - np_boxes_num = self.predictor.get_output_handle(output_names[ - 0]).copy_to_cpu() - np_label = self.predictor.get_output_handle(output_names[ - 1]).copy_to_cpu() - np_score = self.predictor.get_output_handle(output_names[ - 2]).copy_to_cpu() - np_segms = self.predictor.get_output_handle(output_names[ - 3]).copy_to_cpu() - self.det_times.inference_time_s.start() + if add_timer: + self.det_times.preprocess_time_s.end() + self.det_times.inference_time_s.start() for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() @@ -269,8 +265,9 @@ class DetectorSOLOv2(Detector): 2]).copy_to_cpu() np_segms = self.predictor.get_output_handle(output_names[ 3]).copy_to_cpu() - self.det_times.inference_time_s.end(repeats=repeats) - self.det_times.img_num += 1 + if add_timer: + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.img_num += 1 return dict( segm=np_segms, @@ -325,38 +322,32 @@ class DetectorPicoDet(Detector): self.det_times = Timer() self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 - def predict(self, image, threshold=0.5, warmup=0, repeats=1): + def predict(self, image, threshold=0.5, repeats=1, add_timer=True): ''' Args: image (str/np.ndarray): path of image/ np.ndarray read by cv2 threshold (float): threshold of predicted box' score + repeats (int): repeat number for prediction + add_timer (bool): whether add timer during prediction Returns: results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, matix element:[class, score, x_min, y_min, x_max, y_max] ''' - self.det_times.preprocess_time_s.start() + # preprocess + if add_timer: + self.det_times.preprocess_time_s.start() inputs = self.preprocess(image) - self.det_times.preprocess_time_s.end() input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) + np_score_list, np_boxes_list = [], [] - for i in range(warmup): - self.predictor.run() - np_score_list.clear() - np_boxes_list.clear() - output_names = self.predictor.get_output_names() - num_outs = int(len(output_names) / 2) - for out_idx in range(num_outs): - np_score_list.append( - self.predictor.get_output_handle(output_names[out_idx]) - .copy_to_cpu()) - np_boxes_list.append( - self.predictor.get_output_handle(output_names[ - out_idx + num_outs]).copy_to_cpu()) + if add_timer: + self.det_times.preprocess_time_s.end() + self.det_times.inference_time_s.start() - self.det_times.inference_time_s.start() + # model_prediction for i in range(repeats): self.predictor.run() np_score_list.clear() @@ -370,9 +361,12 @@ class DetectorPicoDet(Detector): np_boxes_list.append( self.predictor.get_output_handle(output_names[ out_idx + num_outs]).copy_to_cpu()) - self.det_times.inference_time_s.end(repeats=repeats) - self.det_times.img_num += 1 - self.det_times.postprocess_time_s.start() + if add_timer: + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.img_num += 1 + self.det_times.postprocess_time_s.start() + + # postprocess self.postprocess = PicoDetPostProcess( inputs['image'].shape[2:], inputs['im_shape'], @@ -380,7 +374,8 @@ class DetectorPicoDet(Detector): strides=self.pred_config.fpn_stride, nms_threshold=self.pred_config.nms['nms_threshold']) np_boxes, np_boxes_num = self.postprocess(np_score_list, np_boxes_list) - self.det_times.postprocess_time_s.end() + if add_timer: + self.det_times.postprocess_time_s.end() return dict(boxes=np_boxes, boxes_num=np_boxes_num) @@ -647,8 +642,13 @@ def predict_image(detector, image_list, batch_size=1): end_index = min((i + 1) * batch_size, len(image_list)) batch_image_list = image_list[start_index:end_index] if FLAGS.run_benchmark: + # warmup detector.predict( - batch_image_list, FLAGS.threshold, warmup=10, repeats=10) + batch_image_list, FLAGS.threshold, repeats=10, add_timer=False) + # run benchmark + detector.predict( + batch_image_list, FLAGS.threshold, repeats=10, add_timer=True) + cm, gm, gu = get_current_memory_mb() detector.cpu_mem += cm detector.gpu_mem += gm diff --git a/deploy/python/keypoint_infer.py b/deploy/python/keypoint_infer.py index 6594bdbfd..3f663c81f 100644 --- a/deploy/python/keypoint_infer.py +++ b/deploy/python/keypoint_infer.py @@ -145,41 +145,33 @@ class KeyPoint_Detector(Detector): raise ValueError("Unsupported arch: {}, expect {}".format( self.pred_config.arch, KEYPOINT_SUPPORT_MODELS)) - def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): + def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True): ''' Args: image_list (list): list of image threshold (float): threshold of predicted box' score + repeats (int): repeat number for prediction + add_timer (bool): whether add timer during prediction Returns: results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, matix element:[class, score, x_min, y_min, x_max, y_max] MaskRCNN's results include 'masks': np.ndarray: shape: [N, im_h, im_w] ''' - self.det_times.preprocess_time_s.start() + # preprocess + if add_timer: + self.det_times.preprocess_time_s.start() inputs = self.preprocess(image_list) np_boxes, np_masks = None, None input_names = self.predictor.get_input_names() - for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) - self.det_times.preprocess_time_s.end() - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_handle(output_names[0]) - np_boxes = boxes_tensor.copy_to_cpu() - if self.pred_config.tagmap: - masks_tensor = self.predictor.get_output_handle(output_names[1]) - heat_k = self.predictor.get_output_handle(output_names[2]) - inds_k = self.predictor.get_output_handle(output_names[3]) - np_masks = [ - masks_tensor.copy_to_cpu(), heat_k.copy_to_cpu(), - inds_k.copy_to_cpu() - ] + if add_timer: + self.det_times.preprocess_time_s.end() + self.det_times.inference_time_s.start() - self.det_times.inference_time_s.start() + # model prediction for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() @@ -193,13 +185,16 @@ class KeyPoint_Detector(Detector): masks_tensor.copy_to_cpu(), heat_k.copy_to_cpu(), inds_k.copy_to_cpu() ] - self.det_times.inference_time_s.end(repeats=repeats) + if add_timer: + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.postprocess_time_s.start() - self.det_times.postprocess_time_s.start() + # postprocess results = self.postprocess( np_boxes, np_masks, inputs, threshold=threshold) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += len(image_list) + if add_timer: + self.det_times.postprocess_time_s.end() + self.det_times.img_num += len(image_list) return results @@ -266,7 +261,12 @@ class PredictConfig_KeyPoint(): def predict_image(detector, image_list): for i, img_file in enumerate(image_list): if FLAGS.run_benchmark: - detector.predict([img_file], FLAGS.threshold, warmup=10, repeats=10) + # warmup + detector.predict( + [img_file], FLAGS.threshold, repeats=10, add_timer=False) + # run benchmark + detector.predict( + [img_file], FLAGS.threshold, repeats=10, add_timer=True) cm, gm, gu = get_current_memory_mb() detector.cpu_mem += cm detector.gpu_mem += gm @@ -300,7 +300,7 @@ def predict_video(detector, camera_id): if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) out_path = os.path.join(FLAGS.output_dir, video_name + '.mp4') - fourcc = cv2.VideoWriter_fourcc(*'mp4v') + fourcc = cv2.VideoWriter_fourcc(* 'mp4v') writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) index = 1 while (1): diff --git a/deploy/python/mot_jde_infer.py b/deploy/python/mot_jde_infer.py index c7006a7cd..f646d911c 100644 --- a/deploy/python/mot_jde_infer.py +++ b/deploy/python/mot_jde_infer.py @@ -120,31 +120,31 @@ class JDE_Detector(Detector): online_scores[cls_id].append(tscore) return online_tlwhs, online_scores, online_ids - def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): + def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True): ''' Args: image_list (list): list of image threshold (float): threshold of predicted box' score + repeats (int): repeat number for prediction + add_timer (bool): whether add timer during prediction Returns: online_tlwhs, online_scores, online_ids (dict[np.array]) ''' - self.det_times.preprocess_time_s.start() + # preprocess + if add_timer: + self.det_times.preprocess_time_s.start() inputs = self.preprocess(image_list) - self.det_times.preprocess_time_s.end() pred_dets, pred_embs = None, None input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) + if add_timer: + self.det_times.preprocess_time_s.end() + self.det_times.inference_time_s.start() - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_handle(output_names[0]) - pred_dets = boxes_tensor.copy_to_cpu() - - self.det_times.inference_time_s.start() + # model prediction for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() @@ -152,13 +152,17 @@ class JDE_Detector(Detector): pred_dets = boxes_tensor.copy_to_cpu() embs_tensor = self.predictor.get_output_handle(output_names[1]) pred_embs = embs_tensor.copy_to_cpu() - self.det_times.inference_time_s.end(repeats=repeats) - self.det_times.postprocess_time_s.start() + if add_timer: + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.postprocess_time_s.start() + + # postprocess online_tlwhs, online_scores, online_ids = self.postprocess( pred_dets, pred_embs, threshold) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += 1 + if add_timer: + self.det_times.postprocess_time_s.end() + self.det_times.img_num += 1 return online_tlwhs, online_scores, online_ids @@ -172,7 +176,12 @@ def predict_image(detector, image_list): for frame_id, img_file in enumerate(image_list): frame = cv2.imread(img_file) if FLAGS.run_benchmark: - detector.predict([frame], FLAGS.threshold, warmup=10, repeats=10) + # warmup + detector.predict( + [frame], FLAGS.threshold, repeats=10, add_timer=False) + # run benchmark + detector.predict( + [frame], FLAGS.threshold, repeats=10, add_timer=True) cm, gm, gu = get_current_memory_mb() detector.cpu_mem += cm detector.gpu_mem += gm @@ -181,9 +190,14 @@ def predict_image(detector, image_list): else: online_tlwhs, online_scores, online_ids = detector.predict( [frame], FLAGS.threshold) - online_im = plot_tracking_dict(frame, num_classes, online_tlwhs, - online_ids, online_scores, frame_id, - ids2names=ids2names) + online_im = plot_tracking_dict( + frame, + num_classes, + online_tlwhs, + online_ids, + online_scores, + frame_id, + ids2names=ids2names) if FLAGS.save_images: if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) @@ -211,7 +225,7 @@ def predict_video(detector, camera_id): os.makedirs(FLAGS.output_dir) out_path = os.path.join(FLAGS.output_dir, video_name) if not FLAGS.save_images: - fourcc = cv2.VideoWriter_fourcc(*'mp4v') + fourcc = cv2.VideoWriter_fourcc(* 'mp4v') writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) frame_id = 0 timer = MOTTimer() diff --git a/deploy/python/mot_keypoint_unite_infer.py b/deploy/python/mot_keypoint_unite_infer.py index f3fda1e6a..70f03db76 100644 --- a/deploy/python/mot_keypoint_unite_infer.py +++ b/deploy/python/mot_keypoint_unite_infer.py @@ -64,8 +64,12 @@ def mot_keypoint_unite_predict_image(mot_model, frame = cv2.imread(img_file) if FLAGS.run_benchmark: + # warmup online_tlwhs, online_scores, online_ids = mot_model.predict( - [frame], FLAGS.mot_threshold, warmup=10, repeats=10) + [frame], FLAGS.mot_threshold, repeats=10, add_timer=False) + # run benchmark + online_tlwhs, online_scores, online_ids = mot_model.predict( + [frame], FLAGS.mot_threshold, repeats=10, add_timer=True) cm, gm, gu = get_current_memory_mb() mot_model.cpu_mem += cm mot_model.gpu_mem += gm @@ -84,13 +88,16 @@ def mot_keypoint_unite_predict_image(mot_model, FLAGS.run_benchmark) else: - warmup = 10 if FLAGS.run_benchmark else 0 + if FLAGS.run_benchmark: + keypoint_results = keypoint_model.predict( + [frame], + FLAGS.keypoint_threshold, + repeats=10, + add_timer=False) + repeats = 10 if FLAGS.run_benchmark else 1 keypoint_results = keypoint_model.predict( - [frame], - FLAGS.keypoint_threshold, - warmup=warmup, - repeats=repeats) + [frame], FLAGS.keypoint_threshold, repeats=repeats) if FLAGS.run_benchmark: cm, gm, gu = get_current_memory_mb() @@ -103,7 +110,7 @@ def mot_keypoint_unite_predict_image(mot_model, keypoint_results, visual_thread=FLAGS.keypoint_threshold, returnimg=True, - ids=online_ids + ids=online_ids[0] if KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown' else None) @@ -144,7 +151,7 @@ def mot_keypoint_unite_predict_video(mot_model, os.makedirs(FLAGS.output_dir) out_path = os.path.join(FLAGS.output_dir, video_name) if not FLAGS.save_images: - fourcc = cv2.VideoWriter_fourcc(*'mp4v') + fourcc = cv2.VideoWriter_fourcc(* 'mp4v') writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) frame_id = 0 timer_mot = FPSTimer() @@ -193,7 +200,7 @@ def mot_keypoint_unite_predict_video(mot_model, keypoint_results, visual_thread=FLAGS.keypoint_threshold, returnimg=True, - ids=online_ids + ids=online_ids[0] if KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown' else None) diff --git a/deploy/python/mot_sde_infer.py b/deploy/python/mot_sde_infer.py index a6af02065..16b9a85f0 100644 --- a/deploy/python/mot_sde_infer.py +++ b/deploy/python/mot_sde_infer.py @@ -178,40 +178,43 @@ class SDE_Detector(Detector): return pred_dets, pred_xyxys - def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1): + def predict(self, image, scaled, threshold=0.5, repeats=1, add_timer=True): ''' Args: image (np.ndarray): image numpy data - threshold (float): threshold of predicted box' score scaled (bool): whether the coords after detector outputs are scaled, default False in jde yolov3, set True in general detector. + threshold (float): threshold of predicted box' score + repeats (int): repeat number for prediction + add_timer (bool): whether add timer during prediction Returns: pred_dets (np.ndarray, [N, 6]) ''' - self.det_times.preprocess_time_s.start() + # preprocess + if add_timer: + self.det_times.preprocess_time_s.start() inputs = self.preprocess(image) - self.det_times.preprocess_time_s.end() input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_handle(output_names[0]) - boxes = boxes_tensor.copy_to_cpu() - - self.det_times.inference_time_s.start() + if add_timer: + self.det_times.preprocess_time_s.end() + self.det_times.inference_time_s.start() + # model prediction for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() boxes_tensor = self.predictor.get_output_handle(output_names[0]) boxes = boxes_tensor.copy_to_cpu() - self.det_times.inference_time_s.end(repeats=repeats) - self.det_times.postprocess_time_s.start() + if add_timer: + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.postprocess_time_s.start() + + # postprocess if len(boxes) == 0: pred_dets = np.zeros((1, 6), dtype=np.float32) pred_xyxys = np.zeros((1, 4), dtype=np.float32) @@ -223,8 +226,9 @@ class SDE_Detector(Detector): pred_dets, pred_xyxys = self.postprocess( boxes, input_shape, im_shape, scale_factor, threshold, scaled) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += 1 + if add_timer: + self.det_times.postprocess_time_s.end() + self.det_times.img_num += 1 return pred_dets, pred_xyxys @@ -271,7 +275,8 @@ class SDE_DetectorPicoDet(DetectorPicoDet): assert batch_size == 1, "The JDE Detector only supports batch size=1 now" self.pred_config = pred_config - def postprocess_bboxes(self, boxes, input_shape, im_shape, scale_factor, threshold): + def postprocess_bboxes(self, boxes, input_shape, im_shape, scale_factor, + threshold): over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0] if len(over_thres_idx) == 0: pred_dets = np.zeros((1, 6), dtype=np.float32) @@ -299,33 +304,35 @@ class SDE_DetectorPicoDet(DetectorPicoDet): (pred_tlwhs, pred_scores, pred_cls_ids), axis=1) return pred_dets, pred_xyxys - def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1): + def predict(self, image, scaled, threshold=0.5, repeats=1, add_timer=True): ''' Args: image (np.ndarray): image numpy data - threshold (float): threshold of predicted box' score scaled (bool): whether the coords after detector outputs are scaled, default False in jde yolov3, set True in general detector. + threshold (float): threshold of predicted box' score + repeats (int): repeat number for prediction + add_timer (bool): whether add timer during prediction + Returns: pred_dets (np.ndarray, [N, 6]) ''' - self.det_times.preprocess_time_s.start() + # preprocess + if add_timer: + self.det_times.preprocess_time_s.start() inputs = self.preprocess(image) - self.det_times.preprocess_time_s.end() input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) - np_score_list, np_boxes_list = [], [] - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_handle(output_names[0]) - boxes = boxes_tensor.copy_to_cpu() + if add_timer: + self.det_times.preprocess_time_s.end() + self.det_times.inference_time_s.start() - self.det_times.inference_time_s.start() + # model prediction + np_score_list, np_boxes_list = [], [] for i in range(repeats): self.predictor.run() np_score_list.clear() @@ -340,9 +347,12 @@ class SDE_DetectorPicoDet(DetectorPicoDet): self.predictor.get_output_handle(output_names[ out_idx + num_outs]).copy_to_cpu()) - self.det_times.inference_time_s.end(repeats=repeats) - self.det_times.img_num += 1 - self.det_times.postprocess_time_s.start() + if add_timer: + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.img_num += 1 + self.det_times.postprocess_time_s.start() + + # postprocess self.postprocess = PicoDetPostProcess( inputs['image'].shape[2:], inputs['im_shape'], @@ -360,9 +370,10 @@ class SDE_DetectorPicoDet(DetectorPicoDet): scale_factor = inputs['scale_factor'] pred_dets, pred_xyxys = self.postprocess_bboxes( boxes, input_shape, im_shape, scale_factor, threshold) - + if add_timer: + self.det_times.postprocess_time_s.end() return pred_dets, pred_xyxys - + class SDE_ReID(object): def __init__(self, @@ -445,35 +456,36 @@ class SDE_ReID(object): return online_tlwhs, online_scores, online_ids - def predict(self, crops, pred_dets, warmup=0, repeats=1): - self.det_times.preprocess_time_s.start() + def predict(self, crops, pred_dets, repeats=1, add_timer=True): + # preprocess + if add_timer: + self.det_times.preprocess_time_s.start() inputs = self.preprocess(crops) - self.det_times.preprocess_time_s.end() input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) + if add_timer: + self.det_times.preprocess_time_s.end() + self.det_times.inference_time_s.start() - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - feature_tensor = self.predictor.get_output_handle(output_names[0]) - pred_embs = feature_tensor.copy_to_cpu() - - self.det_times.inference_time_s.start() + # model prediction for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() feature_tensor = self.predictor.get_output_handle(output_names[0]) pred_embs = feature_tensor.copy_to_cpu() - self.det_times.inference_time_s.end(repeats=repeats) + if add_timer: + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.postprocess_time_s.start() - self.det_times.postprocess_time_s.start() + # postprocess online_tlwhs, online_scores, online_ids = self.postprocess(pred_dets, pred_embs) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += 1 + if add_timer: + self.det_times.postprocess_time_s.end() + self.det_times.img_num += 1 return online_tlwhs, online_scores, online_ids @@ -483,8 +495,20 @@ def predict_image(detector, reid_model, image_list): for i, img_file in enumerate(image_list): frame = cv2.imread(img_file) if FLAGS.run_benchmark: + # warmup pred_dets, pred_xyxys = detector.predict( - [frame], FLAGS.scaled, FLAGS.threshold, warmup=10, repeats=10) + [frame], + FLAGS.scaled, + FLAGS.threshold, + repeats=10, + add_timer=True) + # run benchmark + pred_dets, pred_xyxys = detector.predict( + [frame], + FLAGS.scaled, + FLAGS.threshold, + repeats=10, + add_timer=True) cm, gm, gu = get_current_memory_mb() detector.cpu_mem += cm detector.gpu_mem += gm @@ -503,8 +527,12 @@ def predict_image(detector, reid_model, image_list): crops = reid_model.get_crops(pred_xyxys, frame) if FLAGS.run_benchmark: + # warmup + online_tlwhs, online_scores, online_ids = reid_model.predict( + crops, pred_dets, repeats=10, add_timer=False) + # run benchmark online_tlwhs, online_scores, online_ids = reid_model.predict( - crops, pred_dets, warmup=10, repeats=10) + crops, pred_dets, repeats=10, add_timer=False) else: online_tlwhs, online_scores, online_ids = reid_model.predict( crops, pred_dets) @@ -538,7 +566,7 @@ def predict_video(detector, reid_model, camera_id): os.makedirs(FLAGS.output_dir) out_path = os.path.join(FLAGS.output_dir, video_name) if not FLAGS.save_images: - fourcc = cv2.VideoWriter_fourcc(*'mp4v') + fourcc = cv2.VideoWriter_fourcc(* 'mp4v') writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) frame_id = 0 timer = MOTTimer() -- GitLab