diff --git a/configs/mot/jde/_base_/jde_darknet53.yml b/configs/mot/jde/_base_/jde_darknet53.yml index 73faa52f662e7db24ef40c25c029561225d1a3b8..f5370fc6affa10f33af04185c48d61d5a2f06d98 100644 --- a/configs/mot/jde/_base_/jde_darknet53.yml +++ b/configs/mot/jde/_base_/jde_darknet53.yml @@ -53,4 +53,4 @@ JDETracker: det_thresh: 0.3 track_buffer: 30 min_box_area: 200 - motion: KalmanFilter + vertical_ratio: 1.6 # for pedestrian diff --git a/deploy/pphuman/config/tracker_config.yml b/deploy/pphuman/config/tracker_config.yml index 5182da93e3f19bd49421e09d1ec01be0c0f11643..ddd55e8653870ed9bdfe9734995e8af5b56f49e2 100644 --- a/deploy/pphuman/config/tracker_config.yml +++ b/deploy/pphuman/config/tracker_config.yml @@ -11,8 +11,8 @@ JDETracker: conf_thres: 0.6 low_conf_thres: 0.1 match_thres: 0.9 - min_box_area: 100 - vertical_ratio: 1.6 # for pedestrian + min_box_area: 0 + vertical_ratio: 0 # 1.6 for pedestrian DeepSORTTracker: input_size: [64, 192] diff --git a/deploy/pptracking/python/mot/tracker/jde_tracker.py b/deploy/pptracking/python/mot/tracker/jde_tracker.py index 8549801febb198fadece87ed043bf106436134ae..df6783259040b8b0f5648f16a4ece5835746d195 100644 --- a/deploy/pptracking/python/mot/tracker/jde_tracker.py +++ b/deploy/pptracking/python/mot/tracker/jde_tracker.py @@ -38,7 +38,7 @@ class JDETracker(object): track_buffer (int): buffer for tracker min_box_area (int): min box area to filter out low quality boxes vertical_ratio (float): w/h, the vertical ratio of the bbox to filter - bad results. If set <0 means no need to filter bboxes,usually set + bad results. If set <= 0 means no need to filter bboxes,usually set 1.6 for pedestrian tracking. tracked_thresh (float): linear assignment threshold of tracked stracks and detections @@ -64,8 +64,8 @@ class JDETracker(object): num_classes=1, det_thresh=0.3, track_buffer=30, - min_box_area=200, - vertical_ratio=1.6, + min_box_area=0, + vertical_ratio=0, tracked_thresh=0.7, r_tracked_thresh=0.5, unconfirmed_thresh=0.7, @@ -161,9 +161,8 @@ class JDETracker(object): detections = [ STrack( STrack.tlbr_to_tlwh(tlbrs[2:6]), tlbrs[1], cls_id, - 30, temp_feat) - for (tlbrs, temp_feat - ) in zip(pred_dets_cls, pred_embs_cls) + 30, temp_feat) for (tlbrs, temp_feat) in + zip(pred_dets_cls, pred_embs_cls) ] else: detections = [] @@ -238,15 +237,13 @@ class JDETracker(object): for tlbrs in pred_dets_cls_second ] else: - pred_embs_cls_second = pred_embs_dict[cls_id][inds_second] + pred_embs_cls_second = pred_embs_dict[cls_id][ + inds_second] detections_second = [ STrack( - STrack.tlbr_to_tlwh(tlbrs[2:6]), - tlbrs[1], - cls_id, - 30, - temp_feat) - for (tlbrs, temp_feat) in zip(pred_dets_cls_second, pred_embs_cls_second) + STrack.tlbr_to_tlwh(tlbrs[2:6]), tlbrs[1], + cls_id, 30, temp_feat) for (tlbrs, temp_feat) in + zip(pred_dets_cls_second, pred_embs_cls_second) ] else: detections_second = [] diff --git a/deploy/pptracking/python/mot_jde_infer.py b/deploy/pptracking/python/mot_jde_infer.py index afabf5f4b6a573cb8a97af757dc92dafb29a76b2..045418c981a6fcd34eb1c17d7d9437bd0fb50e1d 100644 --- a/deploy/pptracking/python/mot_jde_infer.py +++ b/deploy/pptracking/python/mot_jde_infer.py @@ -112,8 +112,8 @@ class JDE_Detector(Detector): # tracker config assert self.pred_config.tracker, "The exported JDE Detector model should have tracker." cfg = self.pred_config.tracker - min_box_area = cfg.get('min_box_area', 200) - vertical_ratio = cfg.get('vertical_ratio', 1.6) + min_box_area = cfg.get('min_box_area', 0.0) + vertical_ratio = cfg.get('vertical_ratio', 0.0) conf_thres = cfg.get('conf_thres', 0.0) tracked_thresh = cfg.get('tracked_thresh', 0.7) metric_type = cfg.get('metric_type', 'euclidean') @@ -164,7 +164,7 @@ class JDE_Detector(Detector): repeats (int): repeats number for prediction Returns: result (dict): include 'pred_dets': np.ndarray: shape:[N,6], N: number of box, - matix element:[x_min, y_min, x_max, y_max, score, class] + matix element:[class, score, x_min, y_min, x_max, y_max] FairMOT(JDE)'s result include 'pred_embs': np.ndarray: shape: [N, 128] ''' diff --git a/deploy/pptracking/python/mot_sde_infer.py b/deploy/pptracking/python/mot_sde_infer.py index 62907ba240a34facc1264ebd3b1092c66dcdef99..da1b3caed2e534d4e91dd9e18c032b30baa70d19 100644 --- a/deploy/pptracking/python/mot_sde_infer.py +++ b/deploy/pptracking/python/mot_sde_infer.py @@ -165,8 +165,8 @@ class SDE_Detector(Detector): # use ByteTracker use_byte = cfg.get('use_byte', False) det_thresh = cfg.get('det_thresh', 0.3) - min_box_area = cfg.get('min_box_area', 200) - vertical_ratio = cfg.get('vertical_ratio', 1.6) + min_box_area = cfg.get('min_box_area', 0) + vertical_ratio = cfg.get('vertical_ratio', 0) match_thres = cfg.get('match_thres', 0.9) conf_thres = cfg.get('conf_thres', 0.6) low_conf_thres = cfg.get('low_conf_thres', 0.1) @@ -194,7 +194,7 @@ class SDE_Detector(Detector): return result def reidprocess(self, det_results, repeats=1): - pred_dets = det_results['boxes'] + pred_dets = det_results['boxes'] # cls_id, score, x0, y0, x1, y1 pred_xyxys = pred_dets[:, 2:6] ori_image = det_results['ori_image'] @@ -234,7 +234,7 @@ class SDE_Detector(Detector): return det_results def tracking(self, det_results): - pred_dets = det_results['boxes'] + pred_dets = det_results['boxes'] # cls_id, score, x0, y0, x1, y1 pred_embs = det_results.get('embeddings', None) if self.use_deepsort_tracker: diff --git a/deploy/pptracking/python/tracker_config.yml b/deploy/pptracking/python/tracker_config.yml index 5182da93e3f19bd49421e09d1ec01be0c0f11643..ddd55e8653870ed9bdfe9734995e8af5b56f49e2 100644 --- a/deploy/pptracking/python/tracker_config.yml +++ b/deploy/pptracking/python/tracker_config.yml @@ -11,8 +11,8 @@ JDETracker: conf_thres: 0.6 low_conf_thres: 0.1 match_thres: 0.9 - min_box_area: 100 - vertical_ratio: 1.6 # for pedestrian + min_box_area: 0 + vertical_ratio: 0 # 1.6 for pedestrian DeepSORTTracker: input_size: [64, 192] diff --git a/deploy/python/README.md b/deploy/python/README.md index 8b672c84da4467ba2802e5e1b39aecba6779516d..4b28f0cf83da7d6195aad574ccdd5a3560d614eb 100644 --- a/deploy/python/README.md +++ b/deploy/python/README.md @@ -3,27 +3,76 @@ 在PaddlePaddle中预测引擎和训练引擎底层有着不同的优化方法, 预测引擎使用了AnalysisPredictor,专门针对推理进行了优化,是基于[C++预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/native_infer.html)的Python接口,该引擎可以对模型进行多项图优化,减少不必要的内存拷贝。如果用户在部署已训练模型的过程中对性能有较高的要求,我们提供了独立于PaddleDetection的预测脚本,方便用户直接集成部署。 -主要包含两个步骤: - +Python端预测部署主要包含两个步骤: - 导出预测模型 - 基于Python进行预测 ## 1. 导出预测模型 -PaddleDetection在训练过程包括网络的前向和优化器相关参数,而在部署过程中,我们只需要前向参数,具体参考:[导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/deploy/EXPORT_MODEL.md) +PaddleDetection在训练过程包括网络的前向和优化器相关参数,而在部署过程中,我们只需要前向参数,具体参考:[导出模型](../deploy/EXPORT_MODEL.md),例如 + +```bash +# 导出YOLOv3检测模型 +python tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml --output_dir=./inference_model \ + -o weights=https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams + +# 导出HigherHRNet(bottom-up)关键点检测模型 +python tools/export_model.py -c configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml -o weights=https://paddledet.bj.bcebos.com/models/keypoint/higherhrnet_hrnet_w32_512.pdparams + +# 导出HRNet(top-down)关键点检测模型 +python tools/export_model.py -c configs/keypoint/hrnet/hrnet_w32_384x288.yml -o weights=https://paddledet.bj.bcebos.com/models/keypoint/hrnet_w32_384x288.pdparams + +# 导出FairMOT多目标跟踪模型 +python tools/export_model.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams + +# 导出ByteTrack多目标跟踪模型(相当于只导出检测器) +python tools/export_model.py -c configs/mot/bytetrack/detector/ppyoloe_crn_l_36e_640x640_mot17half.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/ppyoloe_crn_l_36e_640x640_mot17half.pdparams +``` 导出后目录下,包括`infer_cfg.yml`, `model.pdiparams`, `model.pdiparams.info`, `model.pdmodel`四个文件。 -## 2. 基于Python的预测 +## 2. 基于Python的预测 +### 2.1 通用检测 +在终端输入以下命令进行预测: +```bash +python deploy/python/infer.py --model_dir=./output_inference/yolov3_darknet53_270e_coco --image_file=./demo/000000014439.jpg --device=GPU +``` +### 2.2 关键点检测 在终端输入以下命令进行预测: +```bash +# keypoint top-down(HRNet)/bottom-up(HigherHRNet)单独推理,该模式下top-down模型HRNet只支持单人截图预测 +python deploy/python/keypoint_infer.py --model_dir=output_inference/hrnet_w32_384x288/ --image_file=./demo/hrnet_demo.jpg --device=GPU --threshold=0.5 +python deploy/python/keypoint_infer.py --model_dir=output_inference/higherhrnet_hrnet_w32_512/ --image_file=./demo/000000014439_640x640.jpg --device=GPU --threshold=0.5 + +# detector 检测 + keypoint top-down模型联合部署(联合推理只支持top-down关键点模型) +python deploy/python/det_keypoint_unite_infer.py --det_model_dir=output_inference/yolov3_darknet53_270e_coco/ --keypoint_model_dir=output_inference/hrnet_w32_384x288/ --video_file={your video name}.mp4 --device=GPU +``` +**注意:** + - 关键点检测模型导出和预测具体可参照[keypoint](../../configs/keypoint/README.md),可分别在各个模型的文档中查找具体用法; + - 此目录下的关键点检测部署为基础前向功能,更多关键点检测功能可使用PP-Human项目,参照[pphuman](../pphuman/README.md); + +### 2.3 多目标跟踪 +在终端输入以下命令进行预测: ```bash -python deploy/python/infer.py --model_dir=./output_inference/yolov3_mobilenet_v1_roadsign --image_file=./demo/road554.png --device=GPU +# FairMOT跟踪 +python deploy/python/mot_jde_infer.py --model_dir=output_inference/fairmot_dla34_30e_1088x608 --video_file={your video name}.mp4 --device=GPU + +# ByteTrack跟踪 +python deploy/python/mot_sde_infer.py --model_dir=output_inference/ppyoloe_crn_l_36e_640x640_mot17half/ --tracker_config=deploy/python/tracker_config.yml --video_file={your video name}.mp4 --device=GPU --scaled=True + +# FairMOT多目标跟踪联合HRNet关键点检测(联合推理只支持top-down关键点模型) +python deploy/python/mot_keypoint_unite_infer.py --mot_model_dir=output_inference/fairmot_dla34_30e_1088x608/ --keypoint_model_dir=output_inference/hrnet_w32_384x288/ --video_file={your video name}.mp4 --device=GPU ``` +**注意:** + - 多目标跟踪模型导出和预测具体可参照[mot]](../../configs/mot/README.md),可分别在各个模型的文档中查找具体用法; + - 此目录下的跟踪部署为基础前向功能以及联合关键点部署,更多跟踪功能可使用PP-Human项目,参照[pphuman](../pphuman/README.md),或PP-Tracking项目(绘制轨迹、出入口流量计数),参照[pptracking](../pptracking/README.md); + + 参数说明如下: | 参数 | 是否必须|含义 | diff --git a/deploy/python/mot_jde_infer.py b/deploy/python/mot_jde_infer.py index 99033cabaa13164eb300c7e0e4800bffb12f462b..546942b21ccf5e44f782a500de85a4a624c166a8 100644 --- a/deploy/python/mot_jde_infer.py +++ b/deploy/python/mot_jde_infer.py @@ -32,7 +32,7 @@ sys.path.insert(0, parent_path) from pptracking.python.mot import JDETracker from pptracking.python.mot.utils import MOTTimer, write_mot_results -from pptracking.python.visualize import plot_tracking, plot_tracking_dict +from pptracking.python.mot.visualize import plot_tracking_dict # Global dictionary MOT_JDE_SUPPORT_MODELS = { @@ -54,23 +54,30 @@ class JDE_Detector(Detector): trt_calib_mode (bool): If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True cpu_threads (int): cpu threads - enable_mkldnn (bool): whether to open MKLDNN + enable_mkldnn (bool): whether to open MKLDNN + output_dir (string): The path of output, default as 'output' + threshold (float): Score threshold of the detected bbox, default as 0.5 + save_images (bool): Whether to save visualization image results, default as False + save_mot_txts (bool): Whether to save tracking results (txt), default as False """ - def __init__(self, - model_dir, - tracker_config=None, - device='CPU', - run_mode='paddle', - batch_size=1, - trt_min_shape=1, - trt_max_shape=1088, - trt_opt_shape=608, - trt_calib_mode=False, - cpu_threads=1, - enable_mkldnn=False, - output_dir='output', - threshold=0.5): + def __init__( + self, + model_dir, + tracker_config=None, + device='CPU', + run_mode='paddle', + batch_size=1, + trt_min_shape=1, + trt_max_shape=1088, + trt_opt_shape=608, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False, + output_dir='output', + threshold=0.5, + save_images=False, + save_mot_txts=False, ): super(JDE_Detector, self).__init__( model_dir=model_dir, device=device, @@ -84,6 +91,8 @@ class JDE_Detector(Detector): enable_mkldnn=enable_mkldnn, output_dir=output_dir, threshold=threshold, ) + self.save_images = save_images + self.save_mot_txts = save_mot_txts assert batch_size == 1, "MOT model only supports batch_size=1." self.det_times = Timer(with_tracker=True) self.num_classes = len(self.pred_config.labels) @@ -91,8 +100,8 @@ class JDE_Detector(Detector): # tracker config assert self.pred_config.tracker, "The exported JDE Detector model should have tracker." cfg = self.pred_config.tracker - min_box_area = cfg.get('min_box_area', 200) - vertical_ratio = cfg.get('vertical_ratio', 1.6) + min_box_area = cfg.get('min_box_area', 0.0) + vertical_ratio = cfg.get('vertical_ratio', 0.0) conf_thres = cfg.get('conf_thres', 0.0) tracked_thresh = cfg.get('tracked_thresh', 0.7) metric_type = cfg.get('metric_type', 'euclidean') @@ -115,7 +124,7 @@ class JDE_Detector(Detector): return result def tracking(self, det_results): - pred_dets = det_results['pred_dets'] # 'cls_id, score, x0, y0, x1, y1' + pred_dets = det_results['pred_dets'] # cls_id, score, x0, y0, x1, y1 pred_embs = det_results['pred_embs'] online_targets_dict = self.tracker.update(pred_dets, pred_embs) @@ -164,7 +173,8 @@ class JDE_Detector(Detector): image_list, run_benchmark=False, repeats=1, - visual=True): + visual=True, + seq_name=None): mot_results = [] num_classes = self.num_classes image_list.sort() @@ -225,7 +235,7 @@ class JDE_Detector(Detector): self.det_times.img_num += 1 if visual: - if frame_id % 10 == 0: + if len(image_list) > 1 and frame_id % 10 == 0: print('Tracking frame {}'.format(frame_id)) frame, _ = decode_image(img_file, {}) @@ -237,7 +247,8 @@ class JDE_Detector(Detector): online_scores, frame_id=frame_id, ids2names=ids2names) - seq_name = image_list[0].split('/')[-2] + if seq_name is None: + seq_name = image_list[0].split('/')[-2] save_dir = os.path.join(self.output_dir, seq_name) if not os.path.exists(save_dir): os.makedirs(save_dir) @@ -264,7 +275,8 @@ class JDE_Detector(Detector): if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) out_path = os.path.join(self.output_dir, video_out_name) - fourcc = cv2.VideoWriter_fourcc(*'mp4v') + video_format = 'mp4v' + fourcc = cv2.VideoWriter_fourcc(*video_format) writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) frame_id = 1 @@ -282,7 +294,9 @@ class JDE_Detector(Detector): frame_id += 1 timer.tic() - mot_results = self.predict_image([frame], visual=False) + seq_name = video_out_name.split('.')[0] + mot_results = self.predict_image( + [frame], visual=False, seq_name=seq_name) timer.toc() online_tlwhs, online_scores, online_ids = mot_results[0] @@ -307,20 +321,33 @@ class JDE_Detector(Detector): cv2.imshow('Mask Detection', im) if cv2.waitKey(1) & 0xFF == ord('q'): break + + if self.save_mot_txts: + result_filename = os.path.join( + self.output_dir, video_out_name.split('.')[-2] + '.txt') + + write_mot_results(result_filename, results, data_type, num_classes) + writer.release() def main(): detector = JDE_Detector( FLAGS.model_dir, + tracker_config=None, device=FLAGS.device, run_mode=FLAGS.run_mode, + batch_size=1, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, trt_calib_mode=FLAGS.trt_calib_mode, cpu_threads=FLAGS.cpu_threads, - enable_mkldnn=FLAGS.enable_mkldnn) + enable_mkldnn=FLAGS.enable_mkldnn, + output_dir=FLAGS.output_dir, + threshold=FLAGS.threshold, + save_images=FLAGS.save_images, + save_mot_txts=FLAGS.save_mot_txts) # predict from video file or camera video stream if FLAGS.video_file is not None or FLAGS.camera_id != -1: diff --git a/deploy/python/mot_keypoint_unite_infer.py b/deploy/python/mot_keypoint_unite_infer.py index 3eea4bd6b22c148b09be9cf794116725bf6e89d6..96ed8e399f501cc49ccc2fbcb5109175e90ab755 100644 --- a/deploy/python/mot_keypoint_unite_infer.py +++ b/deploy/python/mot_keypoint_unite_infer.py @@ -24,7 +24,7 @@ from collections import defaultdict from mot_keypoint_unite_utils import argsparser from preprocess import decode_image -from infer import print_arguments, get_test_images +from infer import print_arguments, get_test_images, bench_log from mot_sde_infer import SDE_Detector from mot_jde_infer import JDE_Detector, MOT_JDE_SUPPORT_MODELS from keypoint_infer import KeyPointDetector, KEYPOINT_SUPPORT_MODELS @@ -39,7 +39,7 @@ import sys parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2))) sys.path.insert(0, parent_path) -from pptracking.python.visualize import plot_tracking, plot_tracking_dict +from pptracking.python.mot.visualize import plot_tracking, plot_tracking_dict from pptracking.python.mot.utils import MOTTimer as FPSTimer @@ -92,7 +92,7 @@ def mot_topdown_unite_predict(mot_detector, keypoint_res = predict_with_given_det( image, results, topdown_keypoint_detector, keypoint_batch_size, - FLAGS.mot_threshold, FLAGS.keypoint_threshold, FLAGS.run_benchmark) + FLAGS.run_benchmark) if save_res: store_res.append([ @@ -146,7 +146,7 @@ def mot_topdown_unite_predict_video(mot_detector, if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) out_path = os.path.join(FLAGS.output_dir, video_name) - fourcc = cv2.VideoWriter_fourcc(*'mp4v') + fourcc = cv2.VideoWriter_fourcc(* 'mp4v') writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) frame_id = 0 timer_mot, timer_kp, timer_mot_kp = FPSTimer(), FPSTimer(), FPSTimer() @@ -179,7 +179,7 @@ def mot_topdown_unite_predict_video(mot_detector, timer_kp.tic() keypoint_res = predict_with_given_det( frame, results, topdown_keypoint_detector, keypoint_batch_size, - FLAGS.mot_threshold, FLAGS.keypoint_threshold, FLAGS.run_benchmark) + FLAGS.run_benchmark) timer_kp.toc() timer_mot_kp.toc() diff --git a/deploy/python/mot_sde_infer.py b/deploy/python/mot_sde_infer.py index 4dd66dda0812d7143bc5f31cb033f8ac8305828c..ad514c08ad7de28f3819f72bf08c65117df99be9 100644 --- a/deploy/python/mot_sde_infer.py +++ b/deploy/python/mot_sde_infer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,15 +23,15 @@ import paddle from benchmark_utils import PaddleInferBenchmark from preprocess import decode_image from utils import argsparser, Timer, get_current_memory_mb -from infer import Detector, get_test_images, print_arguments, bench_log, PredictConfig +from infer import Detector, get_test_images, print_arguments, bench_log, PredictConfig, load_predictor # add python path import sys parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2))) sys.path.insert(0, parent_path) -from pptracking.python.mot import JDETracker -from pptracking.python.mot.utils import MOTTimer, write_mot_results +from pptracking.python.mot import JDETracker, DeepSORTTracker +from pptracking.python.mot.utils import MOTTimer, write_mot_results, get_crops, clip_box from pptracking.python.mot.visualize import plot_tracking, plot_tracking_dict @@ -50,7 +50,11 @@ class SDE_Detector(Detector): calibration, trt_calib_mode need to set True cpu_threads (int): cpu threads enable_mkldnn (bool): whether to open MKLDNN - use_dark(bool): whether to use postprocess in DarkPose + output_dir (string): The path of output, default as 'output' + threshold (float): Score threshold of the detected bbox, default as 0.5 + save_images (bool): Whether to save visualization image results, default as False + save_mot_txts (bool): Whether to save tracking results (txt), default as False + reid_model_dir (str): reid model dir, default None for ByteTrack, but set for DeepSORT """ def __init__(self, @@ -66,7 +70,10 @@ class SDE_Detector(Detector): cpu_threads=1, enable_mkldnn=False, output_dir='output', - threshold=0.5): + threshold=0.5, + save_images=False, + save_mot_txts=False, + reid_model_dir=None): super(SDE_Detector, self).__init__( model_dir=model_dir, device=device, @@ -80,65 +87,198 @@ class SDE_Detector(Detector): enable_mkldnn=enable_mkldnn, output_dir=output_dir, threshold=threshold, ) + self.save_images = save_images + self.save_mot_txts = save_mot_txts assert batch_size == 1, "MOT model only supports batch_size=1." self.det_times = Timer(with_tracker=True) self.num_classes = len(self.pred_config.labels) - # tracker config + # reid config + self.use_reid = False if reid_model_dir is None else True + if self.use_reid: + self.reid_pred_config = self.set_config(reid_model_dir) + self.reid_predictor, self.config = load_predictor( + reid_model_dir, + run_mode=run_mode, + batch_size=50, # reid_batch_size + min_subgraph_size=self.reid_pred_config.min_subgraph_size, + device=device, + use_dynamic_shape=self.reid_pred_config.use_dynamic_shape, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn) + else: + self.reid_pred_config = None + self.reid_predictor = None + + assert tracker_config is not None, 'Note that tracker_config should be set.' self.tracker_config = tracker_config - cfg = yaml.safe_load(open(self.tracker_config))['tracker'] - min_box_area = cfg.get('min_box_area', 200) - vertical_ratio = cfg.get('vertical_ratio', 1.6) - use_byte = cfg.get('use_byte', True) - match_thres = cfg.get('match_thres', 0.9) - conf_thres = cfg.get('conf_thres', 0.6) - low_conf_thres = cfg.get('low_conf_thres', 0.1) - - self.tracker = JDETracker( - use_byte=use_byte, - num_classes=self.num_classes, - min_box_area=min_box_area, - vertical_ratio=vertical_ratio, - match_thres=match_thres, - conf_thres=conf_thres, - low_conf_thres=low_conf_thres) + tracker_cfg = yaml.safe_load(open(self.tracker_config)) + cfg = tracker_cfg[tracker_cfg['type']] + + # tracker config + self.use_deepsort_tracker = True if tracker_cfg[ + 'type'] == 'DeepSORTTracker' else False + if self.use_deepsort_tracker: + # use DeepSORTTracker + if self.reid_pred_config is not None and hasattr( + self.reid_pred_config, 'tracker'): + cfg = self.reid_pred_config.tracker + budget = cfg.get('budget', 100) + max_age = cfg.get('max_age', 30) + max_iou_distance = cfg.get('max_iou_distance', 0.7) + matching_threshold = cfg.get('matching_threshold', 0.2) + min_box_area = cfg.get('min_box_area', 0) + vertical_ratio = cfg.get('vertical_ratio', 0) + + self.tracker = DeepSORTTracker( + budget=budget, + max_age=max_age, + max_iou_distance=max_iou_distance, + matching_threshold=matching_threshold, + min_box_area=min_box_area, + vertical_ratio=vertical_ratio, ) + else: + # use ByteTracker + use_byte = cfg.get('use_byte', False) + det_thresh = cfg.get('det_thresh', 0.3) + min_box_area = cfg.get('min_box_area', 0) + vertical_ratio = cfg.get('vertical_ratio', 0) + match_thres = cfg.get('match_thres', 0.9) + conf_thres = cfg.get('conf_thres', 0.6) + low_conf_thres = cfg.get('low_conf_thres', 0.1) + + self.tracker = JDETracker( + use_byte=use_byte, + det_thresh=det_thresh, + num_classes=self.num_classes, + min_box_area=min_box_area, + vertical_ratio=vertical_ratio, + match_thres=match_thres, + conf_thres=conf_thres, + low_conf_thres=low_conf_thres, ) + + def postprocess(self, inputs, result): + # postprocess output of predictor + np_boxes_num = result['boxes_num'] + if np_boxes_num[0] <= 0: + print('[WARNNING] No object detected.') + result = {'boxes': np.zeros([0, 6]), 'boxes_num': [0]} + result = {k: v for k, v in result.items() if v is not None} + return result + + def reidprocess(self, det_results, repeats=1): + pred_dets = det_results['boxes'] + pred_xyxys = pred_dets[:, 2:6] + + ori_image = det_results['ori_image'] + ori_image_shape = ori_image.shape[:2] + pred_xyxys, keep_idx = clip_box(pred_xyxys, ori_image_shape) + + if len(keep_idx[0]) == 0: + det_results['boxes'] = np.zeros((1, 6), dtype=np.float32) + det_results['embeddings'] = None + return det_results + + pred_dets = pred_dets[keep_idx[0]] + pred_xyxys = pred_dets[:, 2:6] + + w, h = self.tracker.input_size + crops = get_crops(pred_xyxys, ori_image, w, h) + + # to keep fast speed, only use topk crops + crops = crops[:50] # reid_batch_size + det_results['crops'] = np.array(crops).astype('float32') + det_results['boxes'] = pred_dets[:50] + + input_names = self.reid_predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.reid_predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(det_results[input_names[i]]) + + # model prediction + for i in range(repeats): + self.reid_predictor.run() + output_names = self.reid_predictor.get_output_names() + feature_tensor = self.reid_predictor.get_output_handle(output_names[ + 0]) + pred_embs = feature_tensor.copy_to_cpu() + + det_results['embeddings'] = pred_embs + return det_results def tracking(self, det_results): pred_dets = det_results['boxes'] # 'cls_id, score, x0, y0, x1, y1' - pred_embs = None - - online_targets_dict = self.tracker.update(pred_dets, pred_embs) - online_tlwhs = defaultdict(list) - online_scores = defaultdict(list) - online_ids = defaultdict(list) - for cls_id in range(self.num_classes): - online_targets = online_targets_dict[cls_id] + pred_embs = det_results.get('embeddings', None) + + if self.use_deepsort_tracker: + # use DeepSORTTracker, only support singe class + self.tracker.predict() + online_targets = self.tracker.update(pred_dets, pred_embs) + online_tlwhs, online_scores, online_ids = [], [], [] for t in online_targets: - tlwh = t.tlwh - tid = t.track_id - tscore = t.score - if tlwh[2] * tlwh[3] <= self.tracker.min_box_area: + if not t.is_confirmed() or t.time_since_update > 1: continue + tlwh = t.to_tlwh() + tscore = t.score + tid = t.track_id if self.tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ 3] > self.tracker.vertical_ratio: continue - online_tlwhs[cls_id].append(tlwh) - online_ids[cls_id].append(tid) - online_scores[cls_id].append(tscore) - - return online_tlwhs, online_scores, online_ids + online_tlwhs.append(tlwh) + online_scores.append(tscore) + online_ids.append(tid) + + tracking_outs = { + 'online_tlwhs': online_tlwhs, + 'online_scores': online_scores, + 'online_ids': online_ids, + } + return tracking_outs + else: + # use ByteTracker, support multiple class + online_tlwhs = defaultdict(list) + online_scores = defaultdict(list) + online_ids = defaultdict(list) + online_targets_dict = self.tracker.update(pred_dets, pred_embs) + for cls_id in range(self.num_classes): + online_targets = online_targets_dict[cls_id] + for t in online_targets: + tlwh = t.tlwh + tid = t.track_id + tscore = t.score + if tlwh[2] * tlwh[3] <= self.tracker.min_box_area: + continue + if self.tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ + 3] > self.tracker.vertical_ratio: + continue + online_tlwhs[cls_id].append(tlwh) + online_ids[cls_id].append(tid) + online_scores[cls_id].append(tscore) + + tracking_outs = { + 'online_tlwhs': online_tlwhs, + 'online_scores': online_scores, + 'online_ids': online_ids, + } + return tracking_outs def predict_image(self, image_list, run_benchmark=False, repeats=1, - visual=True): - mot_results = [] + visual=True, + seq_name=None): num_classes = self.num_classes image_list.sort() ids2names = self.pred_config.labels + mot_results = [] for frame_id, img_file in enumerate(image_list): batch_image_list = [img_file] # bs=1 in MOT model + frame, _ = decode_image(img_file, {}) if run_benchmark: # preprocess inputs = self.preprocess(batch_image_list) # warmup @@ -159,10 +299,16 @@ class SDE_Detector(Detector): self.det_times.postprocess_time_s.end() # tracking + if self.use_reid: + det_result['frame_id'] = frame_id + det_result['seq_name'] = seq_name + det_result['ori_image'] = frame + det_result = self.reidprocess(det_result) result_warmup = self.tracking(det_result) self.det_times.tracking_time_s.start() - online_tlwhs, online_scores, online_ids = self.tracking( - det_result) + if self.use_reid: + det_result = self.reidprocess(det_result) + tracking_outs = self.tracking(det_result) self.det_times.tracking_time_s.end() self.det_times.img_num += 1 @@ -186,32 +332,47 @@ class SDE_Detector(Detector): # tracking process self.det_times.tracking_time_s.start() - online_tlwhs, online_scores, online_ids = self.tracking( - det_result) + if self.use_reid: + det_result['frame_id'] = frame_id + det_result['seq_name'] = seq_name + det_result['ori_image'] = frame + det_result = self.reidprocess(det_result) + tracking_outs = self.tracking(det_result) self.det_times.tracking_time_s.end() self.det_times.img_num += 1 + online_tlwhs = tracking_outs['online_tlwhs'] + online_scores = tracking_outs['online_scores'] + online_ids = tracking_outs['online_ids'] + + mot_results.append([online_tlwhs, online_scores, online_ids]) + if visual: - if frame_id % 10 == 0: + if len(image_list) > 1 and frame_id % 10 == 0: print('Tracking frame {}'.format(frame_id)) frame, _ = decode_image(img_file, {}) - - im = plot_tracking_dict( - frame, - num_classes, - online_tlwhs, - online_ids, - online_scores, - frame_id=frame_id, - ids2names=[]) - seq_name = image_list[0].split('/')[-2] + if isinstance(online_tlwhs, defaultdict): + im = plot_tracking_dict( + frame, + num_classes, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id, + ids2names=[]) + else: + im = plot_tracking( + frame, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id) save_dir = os.path.join(self.output_dir, seq_name) if not os.path.exists(save_dir): os.makedirs(save_dir) cv2.imwrite( os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im) - mot_results.append([online_tlwhs, online_scores, online_ids]) return mot_results def predict_video(self, video_file, camera_id): @@ -231,13 +392,17 @@ class SDE_Detector(Detector): if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) out_path = os.path.join(self.output_dir, video_out_name) - fourcc = cv2.VideoWriter_fourcc(* 'mp4v') + video_format = 'mp4v' + fourcc = cv2.VideoWriter_fourcc(*video_format) writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) frame_id = 1 timer = MOTTimer() - results = defaultdict(list) # support single class and multi classes + results = defaultdict(list) num_classes = self.num_classes + data_type = 'mcmot' if num_classes > 1 else 'mot' + ids2names = self.pred_config.labels + while (1): ret, frame = capture.read() if not ret: @@ -247,31 +412,53 @@ class SDE_Detector(Detector): frame_id += 1 timer.tic() - mot_results = self.predict_image([frame], visual=False) + seq_name = video_out_name.split('.')[0] + mot_results = self.predict_image( + [frame], visual=False, seq_name=seq_name) timer.toc() + # bs=1 in MOT model online_tlwhs, online_scores, online_ids = mot_results[0] - for cls_id in range(num_classes): - results[cls_id].append( - (frame_id + 1, online_tlwhs[cls_id], online_scores[cls_id], - online_ids[cls_id])) fps = 1. / timer.duration - im = plot_tracking_dict( - frame, - num_classes, - online_tlwhs, - online_ids, - online_scores, - frame_id=frame_id, - fps=fps, - ids2names=[]) + if self.use_deepsort_tracker: + # use DeepSORTTracker, only support singe class + results[0].append( + (frame_id + 1, online_tlwhs, online_scores, online_ids)) + im = plot_tracking( + frame, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id, + fps=fps) + else: + # use ByteTracker, support multiple class + for cls_id in range(num_classes): + results[cls_id].append( + (frame_id + 1, online_tlwhs[cls_id], + online_scores[cls_id], online_ids[cls_id])) + im = plot_tracking_dict( + frame, + num_classes, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id, + fps=fps, + ids2names=ids2names) writer.write(im) if camera_id != -1: cv2.imshow('Mask Detection', im) if cv2.waitKey(1) & 0xFF == ord('q'): break + + if self.save_mot_txts: + result_filename = os.path.join( + self.output_dir, video_out_name.split('.')[-2] + '.txt') + write_mot_results(result_filename, results) + writer.release() @@ -282,18 +469,20 @@ def main(): arch = yml_conf['arch'] detector = SDE_Detector( FLAGS.model_dir, - FLAGS.tracker_config, + tracker_config=FLAGS.tracker_config, device=FLAGS.device, run_mode=FLAGS.run_mode, - batch_size=FLAGS.batch_size, + batch_size=1, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, trt_calib_mode=FLAGS.trt_calib_mode, cpu_threads=FLAGS.cpu_threads, enable_mkldnn=FLAGS.enable_mkldnn, + output_dir=FLAGS.output_dir, threshold=FLAGS.threshold, - output_dir=FLAGS.output_dir) + save_images=FLAGS.save_images, + save_mot_txts=FLAGS.save_mot_txts, ) # predict from video file or camera video stream if FLAGS.video_file is not None or FLAGS.camera_id != -1: @@ -303,7 +492,9 @@ def main(): if FLAGS.image_dir is None and FLAGS.image_file is not None: assert FLAGS.batch_size == 1, "--batch_size should be 1 in MOT models." img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) - detector.predict_image(img_list, FLAGS.run_benchmark, repeats=10) + seq_name = FLAGS.image_dir.split('/')[-1] + detector.predict_image( + img_list, FLAGS.run_benchmark, repeats=10, seq_name=seq_name) if not FLAGS.run_benchmark: detector.det_times.info(average=True) diff --git a/deploy/python/tracker_config.yml b/deploy/python/tracker_config.yml index d92510148ec175d9dd7c19fd191e43f13cebe2ce..ddd55e8653870ed9bdfe9734995e8af5b56f49e2 100644 --- a/deploy/python/tracker_config.yml +++ b/deploy/python/tracker_config.yml @@ -1,10 +1,26 @@ -# config of tracker for MOT SDE Detector, use ByteTracker as default. -# The tracker of MOT JDE Detector is exported together with the model. +# config of tracker for MOT SDE Detector, use 'JDETracker' as default. +# The tracker of MOT JDE Detector (such as FairMOT) is exported together with the model. # Here 'min_box_area' and 'vertical_ratio' are set for pedestrian, you can modify for other objects tracking. -tracker: - use_byte: true + +type: JDETracker # 'JDETracker' or 'DeepSORTTracker' + +# BYTETracker +JDETracker: + use_byte: True + det_thresh: 0.3 conf_thres: 0.6 low_conf_thres: 0.1 match_thres: 0.9 - min_box_area: 100 - vertical_ratio: 1.6 + min_box_area: 0 + vertical_ratio: 0 # 1.6 for pedestrian + +DeepSORTTracker: + input_size: [64, 192] + min_box_area: 0 + vertical_ratio: -1 + budget: 100 + max_age: 70 + n_init: 3 + metric_type: cosine + matching_threshold: 0.2 + max_iou_distance: 0.9 diff --git a/ppdet/modeling/mot/tracker/jde_tracker.py b/ppdet/modeling/mot/tracker/jde_tracker.py index 5d2dcb9a018e3c0a26af837c9abd0a965fdbc7df..a00174ff3d4575ea42feb2b4f2435522f23f0b25 100644 --- a/ppdet/modeling/mot/tracker/jde_tracker.py +++ b/ppdet/modeling/mot/tracker/jde_tracker.py @@ -44,7 +44,7 @@ class JDETracker(object): track_buffer (int): buffer for tracker min_box_area (int): min box area to filter out low quality boxes vertical_ratio (float): w/h, the vertical ratio of the bbox to filter - bad results. If set <0 means no need to filter bboxes,usually set + bad results. If set <= 0 means no need to filter bboxes,usually set 1.6 for pedestrian tracking. tracked_thresh (float): linear assignment threshold of tracked stracks and detections @@ -70,8 +70,8 @@ class JDETracker(object): num_classes=1, det_thresh=0.3, track_buffer=30, - min_box_area=200, - vertical_ratio=1.6, + min_box_area=0, + vertical_ratio=0, tracked_thresh=0.7, r_tracked_thresh=0.5, unconfirmed_thresh=0.7, @@ -167,9 +167,8 @@ class JDETracker(object): detections = [ STrack( STrack.tlbr_to_tlwh(tlbrs[2:6]), tlbrs[1], cls_id, - 30, temp_feat) - for (tlbrs, temp_feat - ) in zip(pred_dets_cls, pred_embs_cls) + 30, temp_feat) for (tlbrs, temp_feat) in + zip(pred_dets_cls, pred_embs_cls) ] else: detections = [] @@ -244,15 +243,13 @@ class JDETracker(object): for tlbrs in pred_dets_cls_second ] else: - pred_embs_cls_second = pred_embs_dict[cls_id][inds_second] + pred_embs_cls_second = pred_embs_dict[cls_id][ + inds_second] detections_second = [ STrack( - STrack.tlbr_to_tlwh(tlbrs[2:6]), - tlbrs[1], - cls_id, - 30, - temp_feat) - for (tlbrs, temp_feat) in zip(pred_dets_cls_second, pred_embs_cls_second) + STrack.tlbr_to_tlwh(tlbrs[2:6]), tlbrs[1], + cls_id, 30, temp_feat) for (tlbrs, temp_feat) in + zip(pred_dets_cls_second, pred_embs_cls_second) ] else: detections_second = []