From dc6ba2ac4ee5e60f265d6a601545ccbc10fd0fef Mon Sep 17 00:00:00 2001 From: Feng Ni Date: Fri, 28 Jan 2022 17:37:51 +0800 Subject: [PATCH] [MOT] add mcfairmot vehicle models (#5163) * add mcfairmot 4 class vehicle with bytetracker * update modelzoo, test=document_fix --- configs/mot/mcfairmot/README.md | 18 ++++- configs/mot/mcfairmot/README_cn.md | 19 ++++- ..._1088x608_visdrone_vehicle_bytetracker.yml | 69 +++++++++++++++++ ..._1088x608_visdrone_vehicle_bytetracker.yml | 77 +++++++++++++++++++ ppdet/data/source/mot.py | 4 + 5 files changed, 185 insertions(+), 2 deletions(-) create mode 100644 configs/mot/mcfairmot/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.yml create mode 100644 configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.yml diff --git a/configs/mot/mcfairmot/README.md b/configs/mot/mcfairmot/README.md index 985a6ecd1..1e63acce6 100644 --- a/configs/mot/mcfairmot/README.md +++ b/configs/mot/mcfairmot/README.md @@ -31,9 +31,18 @@ PP-tracking provides an AI studio public project tutorial. Please refer to this | HRNetV2-W18 | 576x320 | 12.0 | 33.8 | 2178 | - |[model](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.pdparams) | [config](./mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.yml) | **Notes:** - - MOTA is the average MOTA of 10 catecories in the VisDrone2019 MOT dataset, and its value is also equal to the average MOTA of all the evaluated video sequences. + - MOTA is the average MOTA of 10 catecories in the VisDrone2019 MOT dataset, and its value is also equal to the average MOTA of all the evaluated video sequences. Here we provide the download [link](https://bj.bcebos.com/v1/paddledet/data/mot/visdrone_mcmot.zip) of the dataset. - MCFairMOT used 4 GPUs for training 30 epoches. The batch size is 6 on each GPU for MCFairMOT DLA-34, and 8 for MCFairMOT HRNetV2-W18. +### MCFairMOT Results on VisDrone Vehicle Val Set +| backbone | input shape | MOTA | IDF1 | IDS | FPS | download | config | +| :--------------| :------- | :----: | :----: | :---: | :------: | :----: |:----: | +| DLA-34 | 1088x608 | 37.7 | 56.8 | 199 | - |[model](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.pdparams) | [config](./mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.yml) | +| HRNetV2-W18 | 1088x608 | 35.6 | 56.3 | 190 | - |[model](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.pdparams) | [config](./mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.yml) | + +**Notes:** + - MOTA is the average MOTA of 4 catecories in the VisDrone Vehicle dataset, and this dataset is extracted from the VisDrone2019 MOT dataset, here we provide the download [link](https://bj.bcebos.com/v1/paddledet/data/mot/visdrone_mcmot_vehicle.zip). + - The tracker used in MCFairMOT model here is ByteTracker. ## Getting Start @@ -106,4 +115,11 @@ python deploy/pptracking/python/mot_jde_infer.py --model_dir=output_inference/mc pages={1-1}, doi={10.1109/TPAMI.2021.3119563} } + +@article{zhang2021bytetrack, + title={ByteTrack: Multi-Object Tracking by Associating Every Detection Box}, + author={Zhang, Yifu and Sun, Peize and Jiang, Yi and Yu, Dongdong and Yuan, Zehuan and Luo, Ping and Liu, Wenyu and Wang, Xinggang}, + journal={arXiv preprint arXiv:2110.06864}, + year={2021} +} ``` diff --git a/configs/mot/mcfairmot/README_cn.md b/configs/mot/mcfairmot/README_cn.md index e0309e0fb..c2f60fee1 100644 --- a/configs/mot/mcfairmot/README_cn.md +++ b/configs/mot/mcfairmot/README_cn.md @@ -30,9 +30,19 @@ PP-Tracking 提供了AI Studio公开项目案例,教程请参考[PP-Tracking | HRNetV2-W18 | 576x320 | 12.0 | 33.8 | 2178 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.pdparams) | [配置文件](./mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.yml) | **注意:** - - MOTA是VisDrone2019 MOT数据集10类目标的平均MOTA, 其值也等于所有评估的视频序列的平均MOTA。 + - MOTA是VisDrone2019 MOT数据集10类目标的平均MOTA, 其值也等于所有评估的视频序列的平均MOTA,此处提供数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/mot/visdrone_mcmot.zip)。 - MCFairMOT模型均使用4个GPU进行训练,训练30个epoch。DLA-34骨干网络的每个GPU上batch size为6,HRNetV2-W18骨干网络的每个GPU上batch size为8。 +### MCFairMOT 在VisDrone Vehicle val-set上结果 +| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FPS | 下载链接 | 配置文件 | +| :--------------| :------- | :----: | :----: | :---: | :------: | :----: |:----: | +| DLA-34 | 1088x608 | 37.7 | 56.8 | 199 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.pdparams) | [配置文件](./mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.yml) | +| HRNetV2-W18 | 1088x608 | 35.6 | 56.3 | 190 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.pdparams) | [配置文件](./mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.yml) | + +**注意:** + - MOTA是VisDrone Vehicle数据集4类车辆目标的平均MOTA, 该数据集是VisDrone数据集中抽出4类车辆类别组成的,此处提供数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/mot/visdrone_mcmot_vehicle.zip)。 + - MCFairMOT模型此处使用的跟踪器是使用的ByteTracker。 + ## 快速开始 ### 1. 训练 @@ -103,4 +113,11 @@ python deploy/pptracking/python/mot_jde_infer.py --model_dir=output_inference/mc pages={1-1}, doi={10.1109/TPAMI.2021.3119563} } + +@article{zhang2021bytetrack, + title={ByteTrack: Multi-Object Tracking by Associating Every Detection Box}, + author={Zhang, Yifu and Sun, Peize and Jiang, Yi and Yu, Dongdong and Yuan, Zehuan and Luo, Ping and Liu, Wenyu and Wang, Xinggang}, + journal={arXiv preprint arXiv:2110.06864}, + year={2021} +} ``` diff --git a/configs/mot/mcfairmot/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.yml b/configs/mot/mcfairmot/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.yml new file mode 100644 index 000000000..6118f053c --- /dev/null +++ b/configs/mot/mcfairmot/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.yml @@ -0,0 +1,69 @@ +_BASE_: [ + '../fairmot/fairmot_dla34_30e_1088x608.yml', + '../../datasets/mcmot.yml' +] +metric: MCMOT +num_classes: 4 + +# for MCMOT training +TrainDataset: + !MCMOTDataSet + dataset_dir: dataset/mot + image_lists: ['visdrone_mcmot_vehicle.train'] + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide'] + label_list: label_list.txt + +EvalMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + data_root: visdrone_mcmot_vehicle/images/val + keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT + anno_path: dataset/mot/visdrone_mcmot_vehicle/label_list.txt + +# for MCMOT video inference +TestMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + keep_ori_im: True # set True if save visualization images or video + anno_path: dataset/mot/visdrone_mcmot_vehicle/label_list.txt + + +pretrain_weights: https://paddledet.bj.bcebos.com/models/centernet_dla34_140e_coco.pdparams + +FairMOT: + detector: CenterNet + reid: FairMOTEmbeddingHead + loss: FairMOTLoss + tracker: JDETracker # multi-class tracker + +CenterNetHead: + regress_ltrb: False + +CenterNetPostProcess: + for_mot: True + regress_ltrb: False + max_per_img: 200 + +JDETracker: + min_box_area: 0 + vertical_ratio: 0 # no need to filter bboxes according to w/h + use_byte: True + match_thres: 0.8 + conf_thres: 0.4 + low_conf_thres: 0.2 + +weights: output/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker/model_final + +epoch: 30 +LearningRate: + base_lr: 0.0005 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [10, 20] + use_warmup: False + +OptimizerBuilder: + optimizer: + type: Adam + regularizer: NULL diff --git a/configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.yml b/configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.yml new file mode 100644 index 000000000..815bc2290 --- /dev/null +++ b/configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.yml @@ -0,0 +1,77 @@ +_BASE_: [ + '../fairmot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.yml', + '../../datasets/mcmot.yml' +] +metric: MCMOT +num_classes: 4 + +# for MCMOT training +TrainDataset: + !MCMOTDataSet + dataset_dir: dataset/mot + image_lists: ['visdrone_mcmot_vehicle.train'] + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide'] + label_list: label_list.txt + +EvalMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + data_root: visdrone_mcmot_vehicle/images/val + keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT + anno_path: dataset/mot/visdrone_mcmot_vehicle/label_list.txt + +# for MCMOT video inference +TestMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + keep_ori_im: True # set True if save visualization images or video + anno_path: dataset/mot/visdrone_mcmot_vehicle/label_list.txt + + +architecture: FairMOT +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/HRNet_W18_C_pretrained.pdparams +for_mot: True + +FairMOT: + detector: CenterNet + reid: FairMOTEmbeddingHead + loss: FairMOTLoss + tracker: JDETracker # multi-class tracker + +CenterNetHead: + regress_ltrb: False + +CenterNetPostProcess: + regress_ltrb: False + max_per_img: 200 + +JDETracker: + min_box_area: 0 + vertical_ratio: 0 # no need to filter bboxes according to w/h + use_byte: True + match_thres: 0.8 + conf_thres: 0.4 + low_conf_thres: 0.2 + +weights: output/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker/model_final + +epoch: 30 +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [15, 22] + use_warmup: True + - !BurninWarmup + steps: 1000 + +OptimizerBuilder: + optimizer: + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +TrainReader: + batch_size: 8 diff --git a/ppdet/data/source/mot.py b/ppdet/data/source/mot.py index d46c02f52..1baadf570 100644 --- a/ppdet/data/source/mot.py +++ b/ppdet/data/source/mot.py @@ -472,6 +472,7 @@ class MOTImageFolder(DetDataset): image_dir=None, sample_num=-1, keep_ori_im=False, + anno_path=None, **kwargs): super(MOTImageFolder, self).__init__( dataset_dir, image_dir, sample_num=sample_num) @@ -481,6 +482,7 @@ class MOTImageFolder(DetDataset): self._imid2path = {} self.roidbs = None self.frame_rate = frame_rate + self.anno_path = anno_path def check_or_download_dataset(self): return @@ -571,6 +573,8 @@ class MOTImageFolder(DetDataset): "wrong or unsupported file format: {}".format(self.video_file) self.roidbs = self._load_video_images() + def get_anno(self): + return self.anno_path def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')): return f.lower().endswith(extensions) -- GitLab