diff --git a/configs/mot/mcfairmot/README.md b/configs/mot/mcfairmot/README.md index 985a6ecd18842ab1b84d2648d94d817ff930ab9e..1e63acce691970be860a64a42748bb9c37427d4a 100644 --- a/configs/mot/mcfairmot/README.md +++ b/configs/mot/mcfairmot/README.md @@ -31,9 +31,18 @@ PP-tracking provides an AI studio public project tutorial. Please refer to this | HRNetV2-W18 | 576x320 | 12.0 | 33.8 | 2178 | - |[model](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.pdparams) | [config](./mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.yml) | **Notes:** - - MOTA is the average MOTA of 10 catecories in the VisDrone2019 MOT dataset, and its value is also equal to the average MOTA of all the evaluated video sequences. + - MOTA is the average MOTA of 10 catecories in the VisDrone2019 MOT dataset, and its value is also equal to the average MOTA of all the evaluated video sequences. Here we provide the download [link](https://bj.bcebos.com/v1/paddledet/data/mot/visdrone_mcmot.zip) of the dataset. - MCFairMOT used 4 GPUs for training 30 epoches. The batch size is 6 on each GPU for MCFairMOT DLA-34, and 8 for MCFairMOT HRNetV2-W18. +### MCFairMOT Results on VisDrone Vehicle Val Set +| backbone | input shape | MOTA | IDF1 | IDS | FPS | download | config | +| :--------------| :------- | :----: | :----: | :---: | :------: | :----: |:----: | +| DLA-34 | 1088x608 | 37.7 | 56.8 | 199 | - |[model](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.pdparams) | [config](./mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.yml) | +| HRNetV2-W18 | 1088x608 | 35.6 | 56.3 | 190 | - |[model](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.pdparams) | [config](./mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.yml) | + +**Notes:** + - MOTA is the average MOTA of 4 catecories in the VisDrone Vehicle dataset, and this dataset is extracted from the VisDrone2019 MOT dataset, here we provide the download [link](https://bj.bcebos.com/v1/paddledet/data/mot/visdrone_mcmot_vehicle.zip). + - The tracker used in MCFairMOT model here is ByteTracker. ## Getting Start @@ -106,4 +115,11 @@ python deploy/pptracking/python/mot_jde_infer.py --model_dir=output_inference/mc pages={1-1}, doi={10.1109/TPAMI.2021.3119563} } + +@article{zhang2021bytetrack, + title={ByteTrack: Multi-Object Tracking by Associating Every Detection Box}, + author={Zhang, Yifu and Sun, Peize and Jiang, Yi and Yu, Dongdong and Yuan, Zehuan and Luo, Ping and Liu, Wenyu and Wang, Xinggang}, + journal={arXiv preprint arXiv:2110.06864}, + year={2021} +} ``` diff --git a/configs/mot/mcfairmot/README_cn.md b/configs/mot/mcfairmot/README_cn.md index e0309e0fb55e9ed75461a756bb78f78fcf4675c1..c2f60fee161191193e5a2002d948290e26e25d05 100644 --- a/configs/mot/mcfairmot/README_cn.md +++ b/configs/mot/mcfairmot/README_cn.md @@ -30,9 +30,19 @@ PP-Tracking 提供了AI Studio公开项目案例,教程请参考[PP-Tracking | HRNetV2-W18 | 576x320 | 12.0 | 33.8 | 2178 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.pdparams) | [配置文件](./mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.yml) | **注意:** - - MOTA是VisDrone2019 MOT数据集10类目标的平均MOTA, 其值也等于所有评估的视频序列的平均MOTA。 + - MOTA是VisDrone2019 MOT数据集10类目标的平均MOTA, 其值也等于所有评估的视频序列的平均MOTA,此处提供数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/mot/visdrone_mcmot.zip)。 - MCFairMOT模型均使用4个GPU进行训练,训练30个epoch。DLA-34骨干网络的每个GPU上batch size为6,HRNetV2-W18骨干网络的每个GPU上batch size为8。 +### MCFairMOT 在VisDrone Vehicle val-set上结果 +| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FPS | 下载链接 | 配置文件 | +| :--------------| :------- | :----: | :----: | :---: | :------: | :----: |:----: | +| DLA-34 | 1088x608 | 37.7 | 56.8 | 199 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.pdparams) | [配置文件](./mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.yml) | +| HRNetV2-W18 | 1088x608 | 35.6 | 56.3 | 190 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.pdparams) | [配置文件](./mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.yml) | + +**注意:** + - MOTA是VisDrone Vehicle数据集4类车辆目标的平均MOTA, 该数据集是VisDrone数据集中抽出4类车辆类别组成的,此处提供数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/mot/visdrone_mcmot_vehicle.zip)。 + - MCFairMOT模型此处使用的跟踪器是使用的ByteTracker。 + ## 快速开始 ### 1. 训练 @@ -103,4 +113,11 @@ python deploy/pptracking/python/mot_jde_infer.py --model_dir=output_inference/mc pages={1-1}, doi={10.1109/TPAMI.2021.3119563} } + +@article{zhang2021bytetrack, + title={ByteTrack: Multi-Object Tracking by Associating Every Detection Box}, + author={Zhang, Yifu and Sun, Peize and Jiang, Yi and Yu, Dongdong and Yuan, Zehuan and Luo, Ping and Liu, Wenyu and Wang, Xinggang}, + journal={arXiv preprint arXiv:2110.06864}, + year={2021} +} ``` diff --git a/configs/mot/mcfairmot/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.yml b/configs/mot/mcfairmot/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.yml new file mode 100644 index 0000000000000000000000000000000000000000..6118f053c43aa043a00d1310c00ac0a68018d6bb --- /dev/null +++ b/configs/mot/mcfairmot/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker.yml @@ -0,0 +1,69 @@ +_BASE_: [ + '../fairmot/fairmot_dla34_30e_1088x608.yml', + '../../datasets/mcmot.yml' +] +metric: MCMOT +num_classes: 4 + +# for MCMOT training +TrainDataset: + !MCMOTDataSet + dataset_dir: dataset/mot + image_lists: ['visdrone_mcmot_vehicle.train'] + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide'] + label_list: label_list.txt + +EvalMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + data_root: visdrone_mcmot_vehicle/images/val + keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT + anno_path: dataset/mot/visdrone_mcmot_vehicle/label_list.txt + +# for MCMOT video inference +TestMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + keep_ori_im: True # set True if save visualization images or video + anno_path: dataset/mot/visdrone_mcmot_vehicle/label_list.txt + + +pretrain_weights: https://paddledet.bj.bcebos.com/models/centernet_dla34_140e_coco.pdparams + +FairMOT: + detector: CenterNet + reid: FairMOTEmbeddingHead + loss: FairMOTLoss + tracker: JDETracker # multi-class tracker + +CenterNetHead: + regress_ltrb: False + +CenterNetPostProcess: + for_mot: True + regress_ltrb: False + max_per_img: 200 + +JDETracker: + min_box_area: 0 + vertical_ratio: 0 # no need to filter bboxes according to w/h + use_byte: True + match_thres: 0.8 + conf_thres: 0.4 + low_conf_thres: 0.2 + +weights: output/mcfairmot_dla34_30e_1088x608_visdrone_vehicle_bytetracker/model_final + +epoch: 30 +LearningRate: + base_lr: 0.0005 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [10, 20] + use_warmup: False + +OptimizerBuilder: + optimizer: + type: Adam + regularizer: NULL diff --git a/configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.yml b/configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.yml new file mode 100644 index 0000000000000000000000000000000000000000..815bc22909b31fada1c52acdc2dcc0f2481de57e --- /dev/null +++ b/configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker.yml @@ -0,0 +1,77 @@ +_BASE_: [ + '../fairmot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.yml', + '../../datasets/mcmot.yml' +] +metric: MCMOT +num_classes: 4 + +# for MCMOT training +TrainDataset: + !MCMOTDataSet + dataset_dir: dataset/mot + image_lists: ['visdrone_mcmot_vehicle.train'] + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide'] + label_list: label_list.txt + +EvalMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + data_root: visdrone_mcmot_vehicle/images/val + keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT + anno_path: dataset/mot/visdrone_mcmot_vehicle/label_list.txt + +# for MCMOT video inference +TestMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + keep_ori_im: True # set True if save visualization images or video + anno_path: dataset/mot/visdrone_mcmot_vehicle/label_list.txt + + +architecture: FairMOT +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/HRNet_W18_C_pretrained.pdparams +for_mot: True + +FairMOT: + detector: CenterNet + reid: FairMOTEmbeddingHead + loss: FairMOTLoss + tracker: JDETracker # multi-class tracker + +CenterNetHead: + regress_ltrb: False + +CenterNetPostProcess: + regress_ltrb: False + max_per_img: 200 + +JDETracker: + min_box_area: 0 + vertical_ratio: 0 # no need to filter bboxes according to w/h + use_byte: True + match_thres: 0.8 + conf_thres: 0.4 + low_conf_thres: 0.2 + +weights: output/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle_bytetracker/model_final + +epoch: 30 +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [15, 22] + use_warmup: True + - !BurninWarmup + steps: 1000 + +OptimizerBuilder: + optimizer: + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +TrainReader: + batch_size: 8 diff --git a/ppdet/data/source/mot.py b/ppdet/data/source/mot.py index d46c02f5292a5478d0930fe995b8d9a0a9d715ce..1baadf570d13afe6f9c648fdff755ac314d1aa35 100644 --- a/ppdet/data/source/mot.py +++ b/ppdet/data/source/mot.py @@ -472,6 +472,7 @@ class MOTImageFolder(DetDataset): image_dir=None, sample_num=-1, keep_ori_im=False, + anno_path=None, **kwargs): super(MOTImageFolder, self).__init__( dataset_dir, image_dir, sample_num=sample_num) @@ -481,6 +482,7 @@ class MOTImageFolder(DetDataset): self._imid2path = {} self.roidbs = None self.frame_rate = frame_rate + self.anno_path = anno_path def check_or_download_dataset(self): return @@ -571,6 +573,8 @@ class MOTImageFolder(DetDataset): "wrong or unsupported file format: {}".format(self.video_file) self.roidbs = self._load_video_images() + def get_anno(self): + return self.anno_path def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')): return f.lower().endswith(extensions)