From 7441fba7580a5c017cd30d8187498c48db356ed2 Mon Sep 17 00:00:00 2001 From: Feng Ni Date: Tue, 9 Nov 2021 15:56:00 +0800 Subject: [PATCH] [MOT] fix picodet deepsort deploy, add cls_name visualization (#4513) --- deploy/python/mot_jde_infer.py | 11 +- deploy/python/mot_sde_infer.py | 180 +++++++++++++++++++++++++--- ppdet/modeling/mot/visualization.py | 25 ++-- 3 files changed, 186 insertions(+), 30 deletions(-) diff --git a/deploy/python/mot_jde_infer.py b/deploy/python/mot_jde_infer.py index 22ab467dc..1070c0f41 100644 --- a/deploy/python/mot_jde_infer.py +++ b/deploy/python/mot_jde_infer.py @@ -23,7 +23,6 @@ import paddle from paddle.inference import Config from paddle.inference import create_predictor -from preprocess import preprocess from utils import argsparser, Timer, get_current_memory_mb from infer import Detector, get_test_images, print_arguments, PredictConfig from benchmark_utils import PaddleInferBenchmark @@ -167,6 +166,8 @@ def predict_image(detector, image_list): results = [] num_classes = detector.num_classes data_type = 'mcmot' if num_classes > 1 else 'mot' + ids2names = detector.pred_config.labels + image_list.sort() for frame_id, img_file in enumerate(image_list): frame = cv2.imread(img_file) @@ -181,7 +182,8 @@ def predict_image(detector, image_list): online_tlwhs, online_scores, online_ids = detector.predict( [frame], FLAGS.threshold) online_im = plot_tracking_dict(frame, num_classes, online_tlwhs, - online_ids, online_scores, frame_id) + online_ids, online_scores, frame_id, + ids2names) if FLAGS.save_images: if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) @@ -216,6 +218,8 @@ def predict_video(detector, camera_id): results = defaultdict(list) # support single class and multi classes num_classes = detector.num_classes data_type = 'mcmot' if num_classes > 1 else 'mot' + ids2names = detector.pred_config.labels + while (1): ret, frame = capture.read() if not ret: @@ -237,7 +241,8 @@ def predict_video(detector, camera_id): online_ids, online_scores, frame_id=frame_id, - fps=fps) + fps=fps, + ids2names=ids2names) if FLAGS.save_images: save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2]) if not os.path.exists(save_dir): diff --git a/deploy/python/mot_sde_infer.py b/deploy/python/mot_sde_infer.py index 5d00c4f26..a6af02065 100644 --- a/deploy/python/mot_sde_infer.py +++ b/deploy/python/mot_sde_infer.py @@ -23,9 +23,9 @@ import paddle from paddle.inference import Config from paddle.inference import create_predictor -from preprocess import preprocess +from picodet_postprocess import PicoDetPostProcess from utils import argsparser, Timer, get_current_memory_mb -from infer import Detector, get_test_images, print_arguments, PredictConfig +from infer import Detector, DetectorPicoDet, get_test_images, print_arguments, PredictConfig from infer import load_predictor from benchmark_utils import PaddleInferBenchmark @@ -139,6 +139,7 @@ class SDE_Detector(Detector): cpu_threads=cpu_threads, enable_mkldnn=enable_mkldnn) assert batch_size == 1, "The JDE Detector only supports batch size=1 now" + self.pred_config = pred_config def postprocess(self, boxes, input_shape, im_shape, scale_factor, threshold, scaled): @@ -147,6 +148,8 @@ class SDE_Detector(Detector): pred_dets = np.zeros((1, 6), dtype=np.float32) pred_xyxys = np.zeros((1, 4), dtype=np.float32) return pred_dets, pred_xyxys + else: + boxes = boxes[over_thres_idx] if not scaled: # scaled means whether the coords after detector outputs @@ -159,6 +162,11 @@ class SDE_Detector(Detector): pred_xyxys, keep_idx = clip_box(pred_bboxes, input_shape, im_shape, scale_factor) + if len(keep_idx[0]) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + return pred_dets, pred_xyxys + pred_scores = boxes[:, 1:2][keep_idx[0]] pred_cls_ids = boxes[:, 0:1][keep_idx[0]] pred_tlwhs = np.concatenate( @@ -168,7 +176,7 @@ class SDE_Detector(Detector): pred_dets = np.concatenate( (pred_tlwhs, pred_scores, pred_cls_ids), axis=1) - return pred_dets[over_thres_idx], pred_xyxys[over_thres_idx] + return pred_dets, pred_xyxys def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1): ''' @@ -220,6 +228,142 @@ class SDE_Detector(Detector): return pred_dets, pred_xyxys +class SDE_DetectorPicoDet(DetectorPicoDet): + """ + Args: + pred_config (object): config of model, defined by `Config(model_dir)` + model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU + run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + trt_min_shape (int): min shape for dynamic shape in trt + trt_max_shape (int): max shape for dynamic shape in trt + trt_opt_shape (int): opt shape for dynamic shape in trt + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + cpu_threads (int): cpu threads + enable_mkldnn (bool): whether to open MKLDNN + """ + + def __init__(self, + pred_config, + model_dir, + device='CPU', + run_mode='fluid', + batch_size=1, + trt_min_shape=1, + trt_max_shape=1088, + trt_opt_shape=608, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False): + super(SDE_DetectorPicoDet, self).__init__( + pred_config=pred_config, + model_dir=model_dir, + device=device, + run_mode=run_mode, + batch_size=batch_size, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn) + assert batch_size == 1, "The JDE Detector only supports batch size=1 now" + self.pred_config = pred_config + + def postprocess_bboxes(self, boxes, input_shape, im_shape, scale_factor, threshold): + over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0] + if len(over_thres_idx) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + return pred_dets, pred_xyxys + else: + boxes = boxes[over_thres_idx] + + pred_bboxes = boxes[:, 2:] + + pred_xyxys, keep_idx = clip_box(pred_bboxes, input_shape, im_shape, + scale_factor) + if len(keep_idx[0]) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + return pred_dets, pred_xyxys + + pred_scores = boxes[:, 1:2][keep_idx[0]] + pred_cls_ids = boxes[:, 0:1][keep_idx[0]] + pred_tlwhs = np.concatenate( + (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), + axis=1) + + pred_dets = np.concatenate( + (pred_tlwhs, pred_scores, pred_cls_ids), axis=1) + return pred_dets, pred_xyxys + + def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1): + ''' + Args: + image (np.ndarray): image numpy data + threshold (float): threshold of predicted box' score + scaled (bool): whether the coords after detector outputs are scaled, + default False in jde yolov3, set True in general detector. + Returns: + pred_dets (np.ndarray, [N, 6]) + ''' + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(image) + self.det_times.preprocess_time_s.end() + + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) + + np_score_list, np_boxes_list = [], [] + for i in range(warmup): + self.predictor.run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_handle(output_names[0]) + boxes = boxes_tensor.copy_to_cpu() + + self.det_times.inference_time_s.start() + for i in range(repeats): + self.predictor.run() + np_score_list.clear() + np_boxes_list.clear() + output_names = self.predictor.get_output_names() + num_outs = int(len(output_names) / 2) + for out_idx in range(num_outs): + np_score_list.append( + self.predictor.get_output_handle(output_names[out_idx]) + .copy_to_cpu()) + np_boxes_list.append( + self.predictor.get_output_handle(output_names[ + out_idx + num_outs]).copy_to_cpu()) + + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.img_num += 1 + self.det_times.postprocess_time_s.start() + self.postprocess = PicoDetPostProcess( + inputs['image'].shape[2:], + inputs['im_shape'], + inputs['scale_factor'], + strides=self.pred_config.fpn_stride, + nms_threshold=self.pred_config.nms['nms_threshold']) + boxes, boxes_num = self.postprocess(np_score_list, np_boxes_list) + + if len(boxes) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + else: + input_shape = inputs['image'].shape[2:] + im_shape = inputs['im_shape'] + scale_factor = inputs['scale_factor'] + pred_dets, pred_xyxys = self.postprocess_bboxes( + boxes, input_shape, im_shape, scale_factor, threshold) + + return pred_dets, pred_xyxys + + class SDE_ReID(object): def __init__(self, pred_config, @@ -350,7 +494,7 @@ def predict_image(detector, reid_model, image_list): pred_dets, pred_xyxys = detector.predict([frame], FLAGS.scaled, FLAGS.threshold) - if len(pred_dets) == 1 and sum(pred_dets) == 0: + if len(pred_dets) == 1 and np.sum(pred_dets) == 0: print('Frame {} has no object, try to modify score threshold.'. format(i)) online_im = frame @@ -407,7 +551,7 @@ def predict_video(detector, reid_model, camera_id): pred_dets, pred_xyxys = detector.predict([frame], FLAGS.scaled, FLAGS.threshold) - if len(pred_dets) == 1 and sum(pred_dets) == 0: + if len(pred_dets) == 1 and np.sum(pred_dets) == 0: print('Frame {} has no object, try to modify score threshold.'. format(frame_id)) timer.toc() @@ -464,17 +608,21 @@ def predict_video(detector, reid_model, camera_id): def main(): pred_config = PredictConfig(FLAGS.model_dir) - detector = SDE_Detector( - pred_config, - FLAGS.model_dir, - device=FLAGS.device, - run_mode=FLAGS.run_mode, - trt_min_shape=FLAGS.trt_min_shape, - trt_max_shape=FLAGS.trt_max_shape, - trt_opt_shape=FLAGS.trt_opt_shape, - trt_calib_mode=FLAGS.trt_calib_mode, - cpu_threads=FLAGS.cpu_threads, - enable_mkldnn=FLAGS.enable_mkldnn) + detector_func = 'SDE_Detector' + if pred_config.arch == 'PicoDet': + detector_func = 'SDE_DetectorPicoDet' + + detector = eval(detector_func)(pred_config, + FLAGS.model_dir, + device=FLAGS.device, + run_mode=FLAGS.run_mode, + batch_size=FLAGS.batch_size, + trt_min_shape=FLAGS.trt_min_shape, + trt_max_shape=FLAGS.trt_max_shape, + trt_opt_shape=FLAGS.trt_opt_shape, + trt_calib_mode=FLAGS.trt_calib_mode, + cpu_threads=FLAGS.cpu_threads, + enable_mkldnn=FLAGS.enable_mkldnn) pred_config = PredictConfig(FLAGS.reid_model_dir) reid_model = SDE_ReID( diff --git a/ppdet/modeling/mot/visualization.py b/ppdet/modeling/mot/visualization.py index 36cd3ba6d..de0e63965 100644 --- a/ppdet/modeling/mot/visualization.py +++ b/ppdet/modeling/mot/visualization.py @@ -28,7 +28,7 @@ def plot_tracking(image, scores=None, frame_id=0, fps=0., - ids2=None): + ids2names=[]): im = np.ascontiguousarray(np.copy(image)) im_h, im_w = im.shape[:2] @@ -52,15 +52,16 @@ def plot_tracking(image, intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) obj_id = int(obj_ids[i]) id_text = '{}'.format(int(obj_id)) - if ids2 is not None: - id_text = id_text + ', {}'.format(int(ids2[i])) + if ids2names != []: + assert len(ids2names) == 1, "plot_tracking only supports single classes." + id_text = '{}_'.format(ids2names[0]) + id_text _line_thickness = 1 if obj_id <= 0 else line_thickness color = get_color(abs(obj_id)) cv2.rectangle( im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness) cv2.putText( im, - id_text, (intbox[0], intbox[1] + 10), + id_text, (intbox[0], intbox[1] - 10), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=text_thickness) @@ -69,7 +70,7 @@ def plot_tracking(image, text = '{:.2f}'.format(float(scores[i])) cv2.putText( im, - text, (intbox[0], intbox[1] - 10), + text, (intbox[0], intbox[1] + 10), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), thickness=text_thickness) @@ -83,7 +84,7 @@ def plot_tracking_dict(image, scores_dict, frame_id=0, fps=0., - ids2=None): + ids2names=[]): im = np.ascontiguousarray(np.copy(image)) im_h, im_w = im.shape[:2] @@ -111,10 +112,12 @@ def plot_tracking_dict(image, x1, y1, w, h = tlwh intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) obj_id = int(obj_ids[i]) - if num_classes == 1: - id_text = '{}'.format(int(obj_id)) + + id_text = '{}'.format(int(obj_id)) + if ids2names != []: + id_text = '{}_{}'.format(ids2names[cls_id], id_text) else: - id_text = 'class{}_id{}'.format(cls_id, int(obj_id)) + id_text = 'class{}_{}'.format(cls_id, id_text) _line_thickness = 1 if obj_id <= 0 else line_thickness color = get_color(abs(obj_id)) @@ -126,7 +129,7 @@ def plot_tracking_dict(image, thickness=line_thickness) cv2.putText( im, - id_text, (intbox[0], intbox[1] + 10), + id_text, (intbox[0], intbox[1] - 10), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=text_thickness) @@ -135,7 +138,7 @@ def plot_tracking_dict(image, text = '{:.2f}'.format(float(scores[i])) cv2.putText( im, - text, (intbox[0], intbox[1] - 10), + text, (intbox[0], intbox[1] + 10), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), thickness=text_thickness) -- GitLab