From 570ec45e3e00042a8a5aed81329767d91225a446 Mon Sep 17 00:00:00 2001 From: Feng Ni Date: Tue, 23 Nov 2021 15:40:04 +0800 Subject: [PATCH] cherry-pick MOT (#4668) * cherry-pick cfg modelzoo readme * cherry pick modeling engine source * cherry-pick deploy python mot --- configs/mot/README.md | 251 ++--------------- configs/mot/README_cn.md | 253 ++---------------- configs/mot/deepsort/README_cn.md | 2 +- configs/mot/fairmot/README.md | 9 + configs/mot/fairmot/README_cn.md | 9 + .../fairmot/_base_/optimizer_30e_momentum.yml | 1 - .../fairmot_enhance_dla34_60e_1088x608.yml | 7 + ...airmot_hrnetv2_w18_dlafpn_30e_1088x608.yml | 2 +- ...fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml | 2 +- ...fairmot_hrnetv2_w18_dlafpn_30e_864x480.yml | 2 +- configs/mot/mcfairmot/README.md | 2 + configs/mot/mcfairmot/README_cn.md | 2 + ...rnetv2_w18_dlafpn_30e_576x320_visdrone.yml | 47 ++++ ...rnetv2_w18_dlafpn_30e_864x480_visdrone.yml | 47 ++++ configs/mot/mtmct/README.md | 1 + configs/mot/mtmct/README_cn.md | 113 ++++++++ configs/mot/pedestrian/README_cn.md | 11 +- ...lafpn_30e_1088x608_visdrone_pedestrian.yml | 26 ++ ...dlafpn_30e_576x320_visdrone_pedestrian.yml | 26 ++ ...dlafpn_30e_864x480_visdrone_pedestrian.yml | 26 ++ configs/mot/vehicle/README_cn.md | 14 +- ...8_dlafpn_30e_1088x608_visdrone_vehicle.yml | 40 +++ ..._dlafpn_30e_576x320_bdd100kmot_vehicle.yml | 40 +++ ...18_dlafpn_30e_576x320_visdrone_vehicle.yml | 40 +++ ...18_dlafpn_30e_864x480_visdrone_vehicle.yml | 40 +++ deploy/python/infer.py | 4 +- deploy/python/mot_jde_infer.py | 11 +- deploy/python/mot_sde_infer.py | 180 +++++++++++-- ppdet/data/source/mot.py | 23 +- ppdet/engine/tracker.py | 113 +++++--- ppdet/modeling/architectures/jde.py | 37 +-- .../modeling/mot/tracker/base_sde_tracker.py | 2 + ppdet/modeling/mot/utils.py | 69 +++-- ppdet/modeling/mot/visualization.py | 26 +- ppdet/modeling/necks/centernet_fpn.py | 2 + ppdet/modeling/post_process.py | 11 +- ppdet/modeling/reid/fairmot_embedding_head.py | 28 +- ppdet/modeling/reid/jde_embedding_head.py | 54 +++- ppdet/modeling/reid/pyramidal_embedding.py | 3 +- tools/infer_mot.py | 2 +- 40 files changed, 930 insertions(+), 648 deletions(-) create mode 100644 configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.yml create mode 100644 configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone.yml create mode 120000 configs/mot/mtmct/README.md create mode 100644 configs/mot/mtmct/README_cn.md create mode 100755 configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.yml create mode 100755 configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_pedestrian.yml create mode 100755 configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_pedestrian.yml create mode 100755 configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle.yml create mode 100755 configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.yml create mode 100755 configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.yml create mode 100755 configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_vehicle.yml diff --git a/configs/mot/README.md b/configs/mot/README.md index e06405045..5f25ef6e9 100644 --- a/configs/mot/README.md +++ b/configs/mot/README.md @@ -6,9 +6,7 @@ English | [简体中文](README_cn.md) - [Introduction](#Introduction) - [Installation](#Installation) - [Model Zoo](#Model_Zoo) -- [Feature Tracking Model](#Feature_Tracking_Model) - [Dataset Preparation](#Dataset_Preparation) -- [Getting Start](#Getting_Start) - [Citations](#Citations) ## Introduction @@ -26,8 +24,15 @@ Paddledetection implements three MOT algorithms of these two series. - [FairMOT](https://arxiv.org/abs/2004.01888) is based on an Anchor Free detector Centernet, which overcomes the problem of anchor and feature misalignment in anchor based detection framework. The fusion of deep and shallow features enables the detection and ReID tasks to obtain the required features respectively. It also uses low dimensional ReID features. FairMOT is a simple baseline composed of two homogeneous branches propose to predict the pixel level target score and ReID features. It achieves the fairness between the two tasks and obtains a higher level of real-time MOT performance. -
- +[PP-Tracking](../../deploy/pptracking/README.md) is the first open source real-time tracking system based on PaddlePaddle deep learning framework. Aiming at the difficulties and pain points of the actual business, PP-Tracking has built-in capabilities and industrial applications such as pedestrian and vehicle tracking, cross-camera tracking, multi-class tracking, small target tracking and traffic counting, and provides a visual development interface. The model integrates multi-object tracking, object detection and ReID lightweight algorithm to further improve the deployment performance of PP-Tracking on the server. It also supports Python and C + + deployment and adapts to Linux, NVIDIA and Jetson multi platform environment.。 +
+ +
+ +
+ +
+ video source:VisDrone2021, BDD100K dataset
@@ -46,161 +51,19 @@ pip install -r requirements.txt ## Model Zoo -### DeepSORT Results on MOT-16 Training Set - -| backbone | input shape | MOTA | IDF1 | IDS | FP | FN | FPS | det result/model |ReID model| config | -| :---------| :------- | :----: | :----: | :--: | :----: | :---: | :---: | :---: | :---: | :---: | -| ResNet-101 | 1088x608 | 72.2 | 60.5 | 998 | 8054 | 21644 | - | [det result](https://dataset.bj.bcebos.com/mot/det_results_dir.zip) |[ReID model](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pcb_pyramid_r101.pdparams)|[config](./deepsort/reid/deepsort_pcb_pyramid_r101.yml) | -| ResNet-101 | 1088x608 | 68.3 | 56.5 | 1722 | 17337 | 15890 | - | [det model](https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams) |[ReID model](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pcb_pyramid_r101.pdparams)|[config](./deepsort/deepsort_jde_yolov3_pcb_pyramid.yml) | -| PPLCNet | 1088x608 | 72.2 | 59.5 | 1087 | 8034 | 21481 | - | [det result](https://dataset.bj.bcebos.com/mot/det_results_dir.zip) |[ReID model](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet.pdparams)|[config](./deepsort/reid/deepsort_pplcnet.yml) | -| PPLCNet | 1088x608 | 68.1 | 53.6 | 1979 | 17446 | 15766 | - | [det model](https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams) |[ReID model](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet.pdparams)|[config](./deepsort/deepsort_jde_yolov3_pplcnet.yml) | - -### DeepSORT Results on MOT-16 Test Set - -| backbone | input shape | MOTA | IDF1 | IDS | FP | FN | FPS | det result/model |ReID model| config | -| :---------| :------- | :----: | :----: | :--: | :----: | :---: | :---: | :---: | :---: | :---: | -| ResNet-101 | 1088x608 | 64.1 | 53.0 | 1024 | 12457 | 51919 | - | [det result](https://dataset.bj.bcebos.com/mot/det_results_dir.zip) | [ReID model](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pcb_pyramid_r101.pdparams)|[config](./deepsort/reid/deepsort_pcb_pyramid_r101.yml) | -| ResNet-101 | 1088x608 | 61.2 | 48.5 | 1799 | 25796 | 43232 | - | [det model](https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams) |[ReID model](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pcb_pyramid_r101.pdparams)|[config](./deepsort/deepsort_jde_yolov3_pcb_pyramid.yml) | -| PPLCNet | 1088x608 | 64.0 | 51.3 | 1208 | 12697 | 51784 | - | [det result](https://dataset.bj.bcebos.com/mot/det_results_dir.zip) |[ReID model](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet.pdparams)|[config](./deepsort/reid/deepsort_pplcnet.yml) | -| PPLCNet | 1088x608 | 61.1 | 48.8 | 2010 | 25401 | 43432 | - | [det model](https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams) |[ReID model](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet.pdparams)|[config](./deepsort/deepsort_jde_yolov3_pplcnet.yml) | - -**Notes:** -DeepSORT does not need to train on MOT dataset, only used for evaluation. Now it supports two evaluation methods. -- 1.Load the result file and the ReID model. Before DeepSORT evaluation, you should get detection results by a detection model first, and then prepare them like this: -``` -det_results_dir - |——————MOT16-02.txt - |——————MOT16-04.txt - |——————MOT16-05.txt - |——————MOT16-09.txt - |——————MOT16-10.txt - |——————MOT16-11.txt - |——————MOT16-13.txt -``` -For MOT16 dataset, you can download a detection result after matching called det_results_dir.zip provided by PaddleDetection: -``` -wget https://dataset.bj.bcebos.com/mot/det_results_dir.zip -``` -If you use a stronger detection model, you can get better results. Each txt is the detection result of all the pictures extracted from each video, and each line describes a bounding box with the following format: -``` -[frame_id],[x0],[y0],[w],[h],[score],[class_id] -``` -- `frame_id` is the frame number of the image. -- `x0,y0` is the X and Y coordinates of the left bound of the object box. -- `w,h` is the pixel width and height of the object box. -- `score` is the confidence score of the object box. -- `class_id` is the category of the object box, set `0` if only has one category. - -- 2.Load the detection model and the ReID model at the same time. Here, the JDE version of YOLOv3 is selected. For more detail of configuration, see `configs/mot/deepsort/deepsort_jde_yolov3_pcb_pyramid.yml` and `configs/mot/deepsort/deepsort_ppyolov2_pplcnet.yml` for other general detectors. - - -### JDE Results on MOT-16 Training Set - -| backbone | input shape | MOTA | IDF1 | IDS | FP | FN | FPS | download | config | -| :----------------- | :------- | :----: | :----: | :---: | :----: | :---: | :---: | :---: | :---: | -| DarkNet53 | 1088x608 | 72.0 | 66.9 | 1397 | 7274 | 22209 | - |[model](https://paddledet.bj.bcebos.com/models/mot/jde_darknet53_30e_1088x608.pdparams) | [config](./jde/jde_darknet53_30e_1088x608.yml) | -| DarkNet53 | 864x480 | 69.1 | 64.7 | 1539 | 7544 | 25046 | - |[model](https://paddledet.bj.bcebos.com/models/mot/jde_darknet53_30e_864x480.pdparams) | [config](./jde/jde_darknet53_30e_864x480.yml) | -| DarkNet53 | 576x320 | 63.7 | 64.4 | 1310 | 6782 | 31964 | - |[model](https://paddledet.bj.bcebos.com/models/mot/jde_darknet53_30e_576x320.pdparams) | [config](./jde/jde_darknet53_30e_576x320.yml) | - -### JDE Results on MOT-16 Test Set - -| backbone | input shape | MOTA | IDF1 | IDS | FP | FN | FPS | download | config | -| :----------------- | :------- | :----: | :----: | :---: | :----: | :---: | :---: | :---: | :---: | -| DarkNet53(paper) | 1088x608 | 64.4 | 55.8 | 1544 | - | - | - | - | - | -| DarkNet53 | 1088x608 | 64.6 | 58.5 | 1864 | 10550 | 52088 | - |[model](https://paddledet.bj.bcebos.com/models/mot/jde_darknet53_30e_1088x608.pdparams) | [config](./jde/jde_darknet53_30e_1088x608.yml) | -| DarkNet53(paper) | 864x480 | 62.1 | 56.9 | 1608 | - | - | - | - | - | -| DarkNet53 | 864x480 | 63.2 | 57.7 | 1966 | 10070 | 55081 | - |[model](https://paddledet.bj.bcebos.com/models/mot/jde_darknet53_30e_864x480.pdparams) | [config](./jde/jde_darknet53_30e_864x480.yml) | -| DarkNet53 | 576x320 | 59.1 | 56.4 | 1911 | 10923 | 61789 | - |[model](https://paddledet.bj.bcebos.com/models/mot/jde_darknet53_30e_576x320.pdparams) | [config](./jde/jde_darknet53_30e_576x320.yml) | - -**Notes:** - JDE used 8 GPUs for training and mini-batch size as 4 on each GPU, and trained for 30 epoches. - - -### FairMOT Results on MOT-16 Training Set - -| backbone | input shape | MOTA | IDF1 | IDS | FP | FN | FPS | download | config | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: | :------: | :----: |:-----: | -| DLA-34(paper) | 1088x608 | 83.3 | 81.9 | 544 | 3822 | 14095 | - | - | - | -| DLA-34 | 1088x608 | 83.2 | 83.1 | 499 | 3861 | 14223 | - | [model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams) | [config](./fairmot/fairmot_dla34_30e_1088x608.yml) | -| DLA-34 | 864x480 | 80.8 | 81.1 | 561 | 3643 | 16967 | - |[model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_864x480.pdparams) | [config](./fairmot/fairmot_dla34_30e_864x480.yml) | -| DLA-34 | 576x320 | 74.0 | 76.1 | 640 | 4989 | 23034 | - |[model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_576x320.pdparams) | [config](./fairmot/fairmot_dla34_30e_576x320.yml) | - - -### FairMOT Results on MOT-16 Test Set - -| backbone | input shape | MOTA | IDF1 | IDS | FP | FN | FPS | download | config | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: | :------: | :----: |:-----: | -| DLA-34(paper) | 1088x608 | 74.9 | 72.8 | 1074 | - | - | 25.9 | - | - | -| DLA-34 | 1088x608 | 75.0 | 74.7 | 919 | 7934 | 36747 | - | [model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams) | [config](./fairmot/fairmot_dla34_30e_1088x608.yml) | -| DLA-34 | 864x480 | 73.0 | 72.6 | 977 | 7578 | 40601 | - |[model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_864x480.pdparams) | [config](./fairmot/fairmot_dla34_30e_864x480.yml) | -| DLA-34 | 576x320 | 69.9 | 70.2 | 1044 | 8869 | 44898 | - |[model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_576x320.pdparams) | [config](./fairmot/fairmot_dla34_30e_576x320.yml) | - -**Notes:** - FairMOT DLA-34 used 2 GPUs for training and mini-batch size as 6 on each GPU, and trained for 30 epoches. - - -### FairMOT enhance model -### Results on MOT-16 Test Set -| backbone | input shape | MOTA | IDF1 | IDS | FP | FN | FPS | download | config | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: | :------: | :----: |:-----: | -| DLA-34 | 1088x608 | 75.9 | 74.7 | 1021 | 11425 | 31475 | - |[model](https://paddledet.bj.bcebos.com/models/mot/fairmot_enhance_dla34_30e_1088x608.pdparams) | [config](./fairmot_enhance_dla34_30e_1088x608.yml) | - -### Results on MOT-17 Test Set -| backbone | input shape | MOTA | IDF1 | IDS | FP | FN | FPS | download | config | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: | :------: | :----: |:-----: | -| DLA-34 | 1088x608 | 75.3 | 74.2 | 3270 | 29112 | 106749 | - |[model](https://paddledet.bj.bcebos.com/models/mot/fairmot_enhance_dla34_30e_1088x608.pdparams) | [config](./fairmot_enhance_dla34_30e_1088x608.yml) | - -**Notes:** - FairMOT enhance DLA-34 used 8 GPUs for training and mini-batch size as 16 on each GPU,and trained for 60 epoches. The crowdhuman dataset is added to the train-set during training. - - -### FairMOT light model -### Results on MOT-16 Test Set -| backbone | input shape | MOTA | IDF1 | IDS | FP | FN | FPS | download | config | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: | :------: | :----: |:-----: | -| HRNetV2-W18 | 1088x608 | 71.7 | 66.6 | 1340 | 8642 | 41592 | - |[model](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.pdparams) | [config](./fairmot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.yml) | - -### Results on MOT-17 Test Set -| backbone | input shape | MOTA | IDF1 | IDS | FP | FN | FPS | download | config | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: | :------: | :----: |:-----: | -| HRNetV2-W18 | 1088x608 | 70.7 | 65.7 | 4281 | 22485 | 138468 | - |[model](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.pdparams) | [config](./fairmot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.yml) | -| HRNetV2-W18 | 864x480 | 70.3 | 65.8 | 4056 | 18927 | 144486 | - |[model](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_864x480.pdparams) | [config](./fairmot/fairmot_hrnetv2_w18_dlafpn_30e_864x480.yml) | -| HRNetV2-W18 | 576x320 | 65.3 | 64.8 | 4137 | 28860 | 163017 | - |[model](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.pdparams) | [config](./fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml) | - -**Notes:** - FairMOT HRNetV2-W18 used 8 GPUs for training and mini-batch size as 4 on each GPU, and trained for 30 epoches. Only ImageNet pre-train model is used, and the optimizer adopts Momentum. The crowdhuman dataset is added to the train-set during training. - +- base models + - [DeepSORT](deepsort/README.md) + - [JDE](jde/README.md) + - [FairMOT](fairmot/README.md) +- feature models + - [Pedestrian](pedestrian/README.md) + - [Head](headtracking21/README.md) + - [Vehicle](vehicle/README.md) +- Multi-Class Tracking + - [MCFairMOT](mcfairmot/README.md) +- Multi-Target Multi-Camera Tracking + - [MTMCT](mtmct/README.md) -## Feature Tracking Model - -### [Head Tracking](./headtracking21/README.md) - -### FairMOT Results on HT-21 Training Set -| backbone | input shape | MOTA | IDF1 | IDS | FP | FN | FPS | download | config | -| :--------------| :------- | :----: | :----: | :---: | :----: | :---: | :------: | :----: |:----: | -| DLA-34 | 1088x608 | 64.7 | 69.0 | 8533 | 148817 | 234970 | - | [model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_headtracking21.pdparams) | [config](./headtracking21/fairmot_dla34_30e_1088x608_headtracking21.yml) | - -### FairMOT Results on HT-21 Test Set -| backbone | input shape | MOTA | IDF1 | IDS | FP | FN | FPS | download | config | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: |:-------: | :----: | :----: | -| DLA-34 | 1088x608 | 60.8 | 62.8 | 12781 | 118109 | 198896 | - | [model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_headtracking21.pdparams) | [config](./headtracking21/fairmot_dla34_30e_1088x608_headtracking21.yml) | - - -### [Pedestrian Tracking](./pedestrian/README.md) -### FairMOT Results on each val-set of Pedestrian category -| Dataset | input shape | MOTA | IDF1 | FPS | download | config | -| :-------------| :------- | :----: | :----: | :----: | :-----: |:------: | -| PathTrack | 1088x608 | 44.9 | 59.3 | - |[model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_pathtrack.pdparams) | [config](./pedestrian/fairmot_dla34_30e_1088x608_pathtrack.yml) | -| VisDrone | 1088x608 | 49.2 | 63.1 | - | [model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_visdrone_pedestrian.pdparams) | [config](./pedestrian/fairmot_dla34_30e_1088x608_visdrone_pedestrian.yml) | - - -### [Vehicle Tracking](./vehicle/README.md) -### FairMOT Results on each val-set of Vehicle category -| Dataset | input shape | MOTA | IDF1 | FPS | download | config | -| :-------------| :------- | :----: | :----: | :----: | :-----: |:------: | -| BDD100K | 1088x608 | 43.5 | 50.0 | - | [model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_bdd100k_vehicle.pdparams) | [config](./vehicle/fairmot_dla34_30e_1088x608_bdd100k_vehicle.yml) | -| KITTI | 1088x608 | 82.7 | - | - |[model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_kitti_vehicle.pdparams) | [config](./vehicle/fairmot_dla34_30e_1088x608_kitti_vehicle.yml) | -| VisDrone | 1088x608 | 52.1 | 63.3 | - | [model](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_visdrone_vehicle.pdparams) | [config](./vehicle/fairmot_dla34_30e_1088x608_visdrone_vehicle.yml) | ## Dataset Preparation @@ -273,78 +136,6 @@ dataset/mot |——————PRW ``` -## Getting Start - -### 1. Training - -Training FairMOT on 2 GPUs with following command - -```bash -python -m paddle.distributed.launch --log_dir=./fairmot_dla34_30e_1088x608/ --gpus 0,1 tools/train.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -``` - -### 2. Evaluation - -Evaluating the track performance of FairMOT on val dataset in single GPU with following commands: - -```bash -# use weights released in PaddleDetection model zoo -CUDA_VISIBLE_DEVICES=0 python tools/eval_mot.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams - -# use saved checkpoint in training -CUDA_VISIBLE_DEVICES=0 python tools/eval_mot.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -o weights=output/fairmot_dla34_30e_1088x608/model_final.pdparams -``` -**Notes:** - The default evaluation dataset is MOT-16 Train Set. If you want to change the evaluation dataset, please refer to the following code and modify `configs/datasets/mot.yml`, modify `data_root`: -``` -EvalMOTDataset: - !MOTImageFolder - dataset_dir: dataset/mot - data_root: MOT17/images/train - keep_ori_im: False # set True if save visualization images or video -``` - -### 3. Inference - -Inference a vidoe on single GPU with following command: - -```bash -# inference on video and save a video -CUDA_VISIBLE_DEVICES=0 python tools/infer_mot.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams --video_file={your video name}.mp4 --frame_rate=20 --save_videos -``` - -Inference a image folder on single GPU with following command: - -```bash -# inference image folder and save a video -CUDA_VISIBLE_DEVICES=0 python tools/infer_mot.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams --image_dir={your infer images folder} --save_videos -``` - -**Notes:** - Please make sure that [ffmpeg](https://ffmpeg.org/ffmpeg.html) is installed first, on Linux(Ubuntu) platform you can directly install it by the following command:`apt-get update && apt-get install -y ffmpeg`. `--frame_rate` means the frame rate of the video and the frames extracted per second. It can be set by yourself, default value is -1 indicating the video frame rate read by OpenCV. - - -### 4. Export model - -```bash -CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams -``` - -### 5. Using exported model for python inference - -```bash -python deploy/python/mot_jde_infer.py --model_dir=output_inference/fairmot_dla34_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts -``` -**Notes:** -The tracking model is used to predict the video, and does not support the prediction of a single image. The visualization video of the tracking results is saved by default. You can add `--save_mot_txts` to save the txt result file, or `--save_images` to save the visualization images. - -### 6. Using exported MOT and keypoint model for unite python inference - -```bash -python deploy/python/mot_keypoint_unite_infer.py --mot_model_dir=output_inference/fairmot_dla34_30e_1088x608/ --keypoint_model_dir=output_inference/higherhrnet_hrnet_w32_512/ --video_file={your video name}.mp4 --device=GPU -``` -**Notes:** - Keypoint model export tutorial: `configs/keypoint/README.md`. ## Citations ``` diff --git a/configs/mot/README_cn.md b/configs/mot/README_cn.md index ade9009a6..6addfd905 100644 --- a/configs/mot/README_cn.md +++ b/configs/mot/README_cn.md @@ -6,9 +6,7 @@ - [简介](#简介) - [安装依赖](#安装依赖) - [模型库](#模型库) -- [特色垂类跟踪模型](#特色垂类跟踪模型) - [数据集准备](#数据集准备) -- [快速开始](#快速开始) - [引用](#引用) ## 简介 @@ -26,8 +24,15 @@ PaddleDetection实现了这两个系列的3种多目标跟踪算法。 - [FairMOT](https://arxiv.org/abs/2004.01888)以Anchor Free的CenterNet检测器为基础,克服了Anchor-Based的检测框架中anchor和特征不对齐问题,深浅层特征融合使得检测和ReID任务各自获得所需要的特征,并且使用低维度ReID特征,提出了一种由两个同质分支组成的简单baseline来预测像素级目标得分和ReID特征,实现了两个任务之间的公平性,并获得了更高水平的实时多目标跟踪精度。 -
- +[PP-Tracking](../../deploy/pptracking/README.md)是基于PaddlePaddle深度学习框架的业界首个开源实时跟踪系统。针对实际业务的难点痛点,PP-Tracking内置行人车辆跟踪、跨镜头跟踪、多类别跟踪、小目标跟踪及流量计数等能力与产业应用,同时提供可视化开发界面。模型集成多目标跟踪,目标检测,ReID轻量级算法,进一步提升PP-Tracking在服务器端部署性能。同时支持python,C++部署,适配Linux,Nvidia Jetson多平台环境。 +
+ +
+ +
+ +
+ 视频来源:VisDrone2021, BDD100K开源数据集
@@ -45,163 +50,18 @@ pip install -r requirements.txt ## 模型库 -### DeepSORT在MOT-16 Training Set上结果 - -| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FP | FN | FPS | 检测结果或模型 | ReID模型 |配置文件 | -| :---------| :------- | :----: | :----: | :--: | :----: | :---: | :---: | :-----:| :-----: | :-----: | -| ResNet-101 | 1088x608 | 72.2 | 60.5 | 998 | 8054 | 21644 | - | [检测结果](https://dataset.bj.bcebos.com/mot/det_results_dir.zip) |[ReID模型](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pcb_pyramid_r101.pdparams)|[配置文件](./deepsort/reid/deepsort_pcb_pyramid_r101.yml) | -| ResNet-101 | 1088x608 | 68.3 | 56.5 | 1722 | 17337 | 15890 | - | [检测模型](https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams) |[ReID模型](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pcb_pyramid_r101.pdparams)|[配置文件](./deepsort/deepsort_jde_yolov3_pcb_pyramid.yml) | -| PPLCNet | 1088x608 | 72.2 | 59.5 | 1087 | 8034 | 21481 | - | [检测结果](https://dataset.bj.bcebos.com/mot/det_results_dir.zip) |[ReID模型](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet.pdparams)|[配置文件](./deepsort/reid/deepsort_pplcnet.yml) | -| PPLCNet | 1088x608 | 68.1 | 53.6 | 1979 | 17446 | 15766 | - | [检测模型](https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams) |[ReID模型](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet.pdparams)|[配置文件](./deepsort/deepsort_jde_yolov3_pplcnet.yml) | - -### DeepSORT在MOT-16 Test Set上结果 - -| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FP | FN | FPS | 检测结果或模型 | ReID模型 |配置文件 | -| :---------| :------- | :----: | :----: | :--: | :----: | :---: | :---: | :-----: | :-----: |:-----: | -| ResNet-101 | 1088x608 | 64.1 | 53.0 | 1024 | 12457 | 51919 | - | [检测结果](https://dataset.bj.bcebos.com/mot/det_results_dir.zip) | [ReID模型](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pcb_pyramid_r101.pdparams)|[配置文件](./deepsort/reid/deepsort_pcb_pyramid_r101.yml) | -| ResNet-101 | 1088x608 | 61.2 | 48.5 | 1799 | 25796 | 43232 | - | [检测模型](https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams) |[ReID模型](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pcb_pyramid_r101.pdparams)|[配置文件](./deepsort/deepsort_jde_yolov3_pcb_pyramid.yml) | -| PPLCNet | 1088x608 | 64.0 | 51.3 | 1208 | 12697 | 51784 | - | [检测结果](https://dataset.bj.bcebos.com/mot/det_results_dir.zip) |[ReID模型](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet.pdparams)|[配置文件](./deepsort/reid/deepsort_pplcnet.yml) | -| PPLCNet | 1088x608 | 61.1 | 48.8 | 2010 | 25401 | 43432 | - | [检测模型](https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams) |[ReID模型](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet.pdparams)|[配置文件](./deepsort/deepsort_jde_yolov3_pplcnet.yml) | - -**注意:** -DeepSORT不需要训练MOT数据集,只用于评估,现在支持两种评估的方式。 -- **方式1**:加载检测结果文件和ReID模型,在使用DeepSORT模型评估之前,应该首先通过一个检测模型得到检测结果,然后像这样准备好结果文件: -``` -det_results_dir - |——————MOT16-02.txt - |——————MOT16-04.txt - |——————MOT16-05.txt - |——————MOT16-09.txt - |——————MOT16-10.txt - |——————MOT16-11.txt - |——————MOT16-13.txt -``` -对于MOT16数据集,可以下载PaddleDetection提供的一个经过匹配之后的检测框结果det_results_dir.zip并解压: -``` -wget https://dataset.bj.bcebos.com/mot/det_results_dir.zip -``` -如果使用更强的检测模型,可以取得更好的结果。其中每个txt是每个视频中所有图片的检测结果,每行都描述一个边界框,格式如下: -``` -[frame_id],[x0],[y0],[w],[h],[score],[class_id] -``` -- `frame_id`是图片帧的序号 -- `x0,y0`是目标框的左上角x和y坐标 -- `w,h`是目标框的像素宽高 -- `score`是目标框的得分 -- `class_id`是目标框的类别,如果只有1类则是`0` - -- **方式2**:同时加载检测模型和ReID模型,此处选用JDE版本的YOLOv3,具体配置见`configs/mot/deepsort/deepsort_jde_yolov3_pcb_pyramid.yml`。加载其他通用检测模型可参照`configs/mot/deepsort/deepsort_ppyolov2_pplcnet.yml`进行修改。 - - -### JDE在MOT-16 Training Set上结果 - -| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FP | FN | FPS | 下载链接 | 配置文件 | -| :----------------- | :------- | :----: | :----: | :---: | :----: | :---: | :---: | :---: | :---: | -| DarkNet53 | 1088x608 | 72.0 | 66.9 | 1397 | 7274 | 22209 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/jde_darknet53_30e_1088x608.pdparams) | [配置文件](./jde/jde_darknet53_30e_1088x608.yml) | -| DarkNet53 | 864x480 | 69.1 | 64.7 | 1539 | 7544 | 25046 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/jde_darknet53_30e_864x480.pdparams) | [配置文件](./jde/jde_darknet53_30e_864x480.yml) | -| DarkNet53 | 576x320 | 63.7 | 64.4 | 1310 | 6782 | 31964 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/jde_darknet53_30e_576x320.pdparams) | [配置文件](./jde/jde_darknet53_30e_576x320.yml) | - - -### JDE在MOT-16 Test Set上结果 - -| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FP | FN | FPS | 下载链接 | 配置文件 | -| :----------------- | :------- | :----: | :----: | :---: | :----: | :---: | :---: | :---: | :---: | -| DarkNet53(paper) | 1088x608 | 64.4 | 55.8 | 1544 | - | - | - | - | - | -| DarkNet53 | 1088x608 | 64.6 | 58.5 | 1864 | 10550 | 52088 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/jde_darknet53_30e_1088x608.pdparams) | [配置文件](./jde/jde_darknet53_30e_1088x608.yml) | -| DarkNet53(paper) | 864x480 | 62.1 | 56.9 | 1608 | - | - | - | - | - | -| DarkNet53 | 864x480 | 63.2 | 57.7 | 1966 | 10070 | 55081 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/jde_darknet53_30e_864x480.pdparams) | [配置文件](./jde/jde_darknet53_30e_864x480.yml) | -| DarkNet53 | 576x320 | 59.1 | 56.4 | 1911 | 10923 | 61789 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/jde_darknet53_30e_576x320.pdparams) | [配置文件](./jde/jde_darknet53_30e_576x320.yml) | - -**注意:** - JDE使用8个GPU进行训练,每个GPU上batch size为4,训练了30个epoch。 - - -### FairMOT在MOT-16 Training Set上结果 - -| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FP | FN | FPS | 下载链接 | 配置文件 | -| :--------------| :------- | :----: | :----: | :---: | :----: | :---: | :------: | :----: |:----: | -| DLA-34(paper) | 1088x608 | 83.3 | 81.9 | 544 | 3822 | 14095 | - | - | - | -| DLA-34 | 1088x608 | 83.2 | 83.1 | 499 | 3861 | 14223 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams) | [配置文件](./fairmot/fairmot_dla34_30e_1088x608.yml) | -| DLA-34 | 864x480 | 80.8 | 81.1 | 561 | 3643 | 16967 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_864x480.pdparams) | [配置文件](./fairmot/fairmot_dla34_30e_864x480.yml) | -| DLA-34 | 576x320 | 74.0 | 76.1 | 640 | 4989 | 23034 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_576x320.pdparams) | [配置文件](./fairmot/fairmot_dla34_30e_576x320.yml) | - -### FairMOT在MOT-16 Test Set上结果 - -| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FP | FN | FPS | 下载链接 | 配置文件 | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: |:-------: | :----: | :----: | -| DLA-34(paper) | 1088x608 | 74.9 | 72.8 | 1074 | - | - | 25.9 | - | - | -| DLA-34 | 1088x608 | 75.0 | 74.7 | 919 | 7934 | 36747 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams) | [配置文件](./fairmot/fairmot_dla34_30e_1088x608.yml) | -| DLA-34 | 864x480 | 73.0 | 72.6 | 977 | 7578 | 40601 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_864x480.pdparams) | [配置文件](./fairmot/fairmot_dla34_30e_864x480.yml) | -| DLA-34 | 576x320 | 69.9 | 70.2 | 1044 | 8869 | 44898 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_576x320.pdparams) | [配置文件](./fairmot/fairmot_dla34_30e_576x320.yml) | - -**注意:** - FairMOT DLA-34均使用2个GPU进行训练,每个GPU上batch size为6,训练30个epoch。 - - -### FairMOT enhance模型 -### 在MOT-16 Test Set上结果 -| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FP | FN | FPS | 下载链接 | 配置文件 | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: | :------: | :----: |:-----: | -| DLA-34 | 1088x608 | 75.9 | 74.7 | 1021 | 11425 | 31475 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_enhance_dla34_60e_1088x608.pdparams) | [配置文件](./fairmot_enhance_dla34_60e_1088x608.yml) | - -### 在MOT-17 Test Set上结果 -| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FP | FN | FPS | 下载链接 | 配置文件 | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: | :------: | :----: |:-----: | -| DLA-34 | 1088x608 | 75.3 | 74.2 | 3270 | 29112 | 106749 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_enhance_dla34_60e_1088x608.pdparams) | [配置文件](./fairmot_enhance_dla34_60e_1088x608.yml) | - -**注意:** - FairMOT enhance DLA-34使用8个GPU进行训练,每个GPU上batch size为16,训练60个epoch,并且训练集中加入了crowdhuman数据集一起参与训练。 - - -### FairMOT轻量级模型 -### 在MOT-16 Test Set上结果 -| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FP | FN | FPS | 下载链接 | 配置文件 | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: | :------: | :----: |:-----: | -| HRNetV2-W18 | 1088x608 | 71.7 | 66.6 | 1340 | 8642 | 41592 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.pdparams) | [配置文件](./fairmot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.yml) | - -### 在MOT-17 Test Set上结果 -| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FP | FN | FPS | 下载链接 | 配置文件 | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: | :------: | :----: |:-----: | -| HRNetV2-W18 | 1088x608 | 70.7 | 65.7 | 4281 | 22485 | 138468 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.pdparams) | [配置文件](./fairmot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.yml) | -| HRNetV2-W18 | 864x480 | 70.3 | 65.8 | 4056 | 18927 | 144486 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_864x480.pdparams) | [配置文件](./fairmot/fairmot_hrnetv2_w18_dlafpn_30e_864x480.yml) | -| HRNetV2-W18 | 576x320 | 65.3 | 64.8 | 4137 | 28860 | 163017 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.pdparams) | [配置文件](./fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml) | - -**注意:** - FairMOT HRNetV2-W18均使用8个GPU进行训练,每个GPU上batch size为4,训练30个epoch,使用的ImageNet预训练,优化器策略采用的是Momentum,并且训练集中加入了crowdhuman数据集一起参与训练。 - - -## 特色垂类跟踪模型 - -### [人头跟踪(Head Tracking)](./headtracking21/README.md) -### FairMOT在HT-21 Training Set上结果 -| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FP | FN | FPS | 下载链接 | 配置文件 | -| :--------------| :------- | :----: | :----: | :---: | :----: | :---: | :------: | :----: |:----: | -| DLA-34 | 1088x608 | 64.7 | 69.0 | 8533 | 148817 | 234970 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_headtracking21.pdparams) | [配置文件](./headtracking21/fairmot_dla34_30e_1088x608_headtracking21.yml) | - -### FairMOT在HT-21 Test Set上结果 -| 骨干网络 | 输入尺寸 | MOTA | IDF1 | IDS | FP | FN | FPS | 下载链接 | 配置文件 | -| :--------------| :------- | :----: | :----: | :----: | :----: | :----: |:-------: | :----: | :----: | -| DLA-34 | 1088x608 | 60.8 | 62.8 | 12781 | 118109 | 198896 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_headtracking21.pdparams) | [配置文件](./headtracking21/fairmot_dla34_30e_1088x608_headtracking21.yml) | - - -### [行人跟踪 (Pedestrian Tracking)](./pedestrian/README.md) -### FairMOT在各个数据集val-set上Pedestrian类别的结果 -| 数据集 | 输入尺寸 | MOTA | IDF1 | FPS | 下载链接 | 配置文件 | -| :-------------| :------- | :----: | :----: | :----: | :-----: |:------: | -| PathTrack | 1088x608 | 44.9 | 59.3 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_pathtrack.pdparams) | [配置文件](./pedestrian/fairmot_dla34_30e_1088x608_pathtrack.yml) | -| VisDrone | 1088x608 | 49.2 | 63.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_visdrone_pedestrian.pdparams) | [配置文件](./pedestrian/fairmot_dla34_30e_1088x608_visdrone_pedestrian.yml) | - - -### [车辆跟踪 (Vehicle Tracking)](./vehicle/README.md) -### FairMOT在各个数据集val-set上Vehicle类别的结果 - -| 数据集 | 输入尺寸 | MOTA | IDF1 | FPS | 下载链接 | 配置文件 | -| :-------------| :------- | :----: | :----: | :----: | :-----: |:------: | -| BDD100K | 1088x608 | 43.5 | 50.0 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_bdd100kmot_vehicle.pdparams) | [配置文件](./vehicle/fairmot_dla34_30e_1088x608_bdd100kmot_vehicle.yml) | -| KITTI | 1088x608 | 82.7 | - | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_kitti_vehicle.pdparams) | [配置文件](./vehicle/fairmot_dla34_30e_1088x608_kitti_vehicle.yml) | -| VisDrone | 1088x608 | 52.1 | 63.3 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_visdrone_vehicle.pdparams) | [配置文件](./vehicle/fairmot_dla34_30e_1088x608_visdrone_vehicle.yml) | - - +- 基础模型 + - [DeepSORT](deepsort/README_cn.md) + - [JDE](jde/README_cn.md) + - [FairMOT](fairmot/README_cn.md) +- 特色垂类模型 + - [行人跟踪](pedestrian/README_cn.md) + - [人头跟踪](headtracking21/README_cn.md) + - [车辆跟踪](vehicle/README_cn.md) +- 多类别跟踪 + - [多类别跟踪](mcfairmot/README_cn.md) +- 跨境头跟踪 + - [跨境头跟踪](mtmct/README_cn.md) ## 数据集准备 @@ -272,77 +132,6 @@ dataset/mot |——————PRW ``` -## 快速开始 - -### 1. 训练 - -FairMOT使用2个GPU通过如下命令一键式启动训练 - -```bash -python -m paddle.distributed.launch --log_dir=./fairmot_dla34_30e_1088x608/ --gpus 0,1 tools/train.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -``` - -### 2. 评估 - -FairMOT使用单张GPU通过如下命令一键式启动评估 - -```bash -# 使用PaddleDetection发布的权重 -CUDA_VISIBLE_DEVICES=0 python tools/eval_mot.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams - -# 使用训练保存的checkpoint -CUDA_VISIBLE_DEVICES=0 python tools/eval_mot.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -o weights=output/fairmot_dla34_30e_1088x608/model_final.pdparams -``` -**注意:** - 默认评估的是MOT-16 Train Set数据集,如需换评估数据集可参照以下代码修改`configs/datasets/mot.yml`,修改`data_root`: -``` -EvalMOTDataset: - !MOTImageFolder - dataset_dir: dataset/mot - data_root: MOT17/images/train - keep_ori_im: False # set True if save visualization images or video -``` - -### 3. 预测 - -使用单个GPU通过如下命令预测一个视频,并保存为视频 - -```bash -# 预测一个视频 -CUDA_VISIBLE_DEVICES=0 python tools/infer_mot.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams --video_file={your video name}.mp4 --frame_rate=20 --save_videos -``` - -使用单个GPU通过如下命令预测一个图片文件夹,并保存为视频 - -```bash -# 预测一个图片文件夹 -CUDA_VISIBLE_DEVICES=0 python tools/infer_mot.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams --image_dir={your infer images folder} --save_videos -``` - -**注意:** - 请先确保已经安装了[ffmpeg](https://ffmpeg.org/ffmpeg.html), Linux(Ubuntu)平台可以直接用以下命令安装:`apt-get update && apt-get install -y ffmpeg`。`--frame_rate`表示视频的帧率,表示每秒抽取多少帧,可以自行设置,默认为-1表示会使用OpenCV读取的视频帧率。 - -### 4. 导出预测模型 - -```bash -CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608.pdparams -``` - -### 5. 用导出的模型基于Python去预测 - -```bash -python deploy/python/mot_jde_infer.py --model_dir=output_inference/fairmot_dla34_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts -``` -**注意:** - 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`表示保存跟踪结果的txt文件,或`--save_images`表示保存跟踪结果可视化图片。 - -### 6. 用导出的跟踪和关键点模型Python联合预测 - -```bash -python deploy/python/mot_keypoint_unite_infer.py --mot_model_dir=output_inference/fairmot_dla34_30e_1088x608/ --keypoint_model_dir=output_inference/higherhrnet_hrnet_w32_512/ --video_file={your video name}.mp4 --device=GPU -``` -**注意:** - 关键点模型导出教程请参考`configs/keypoint/README.md`。 ## 引用 ``` diff --git a/configs/mot/deepsort/README_cn.md b/configs/mot/deepsort/README_cn.md index 98ef29d27..32a324449 100644 --- a/configs/mot/deepsort/README_cn.md +++ b/configs/mot/deepsort/README_cn.md @@ -120,7 +120,7 @@ Step 1:导出检测模型 CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/detector/jde_yolov3_darknet53_30e_1088x608_mix.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams # 或导出PPYOLOv2行人检测模型 -CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/detector/ppyolov2_r50vd_dcn_365e_640x640_mot17half.yml -o weights=https://paddledet.bj.bcebos.com/mot/deepsort/ppyolov2_r50vd_dcn_365e_640x640_mot17half.pdparams +CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/detector/ppyolov2_r50vd_dcn_365e_640x640_mot17half.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/ppyolov2_r50vd_dcn_365e_640x640_mot17half.pdparams ``` Step 2:导出ReID模型 diff --git a/configs/mot/fairmot/README.md b/configs/mot/fairmot/README.md index 8a077861e..b28563fef 100644 --- a/configs/mot/fairmot/README.md +++ b/configs/mot/fairmot/README.md @@ -128,6 +128,15 @@ python deploy/python/mot_jde_infer.py --model_dir=output_inference/fairmot_dla34 The tracking model is used to predict the video, and does not support the prediction of a single image. The visualization video of the tracking results is saved by default. You can add `--save_mot_txts` to save the txt result file, or `--save_images` to save the visualization images. +### 6. Using exported MOT and keypoint model for unite python inference + +```bash +python deploy/python/mot_keypoint_unite_infer.py --mot_model_dir=output_inference/fairmot_dla34_30e_1088x608/ --keypoint_model_dir=output_inference/higherhrnet_hrnet_w32_512/ --video_file={your video name}.mp4 --device=GPU +``` +**Notes:** + Keypoint model export tutorial: `configs/keypoint/README.md`. + + ## Citations ``` @article{zhang2020fair, diff --git a/configs/mot/fairmot/README_cn.md b/configs/mot/fairmot/README_cn.md index 7948832d5..e411256de 100644 --- a/configs/mot/fairmot/README_cn.md +++ b/configs/mot/fairmot/README_cn.md @@ -125,6 +125,15 @@ python deploy/python/mot_jde_infer.py --model_dir=output_inference/fairmot_dla34 **注意:** 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`表示保存跟踪结果的txt文件,或`--save_images`表示保存跟踪结果可视化图片。 +### 6. 用导出的跟踪和关键点模型Python联合预测 + +```bash +python deploy/python/mot_keypoint_unite_infer.py --mot_model_dir=output_inference/fairmot_dla34_30e_1088x608/ --keypoint_model_dir=output_inference/higherhrnet_hrnet_w32_512/ --video_file={your video name}.mp4 --device=GPU +``` +**注意:** + 关键点模型导出教程请参考`configs/keypoint/README.md`。 + + ## 引用 ``` @article{zhang2020fair, diff --git a/configs/mot/fairmot/_base_/optimizer_30e_momentum.yml b/configs/mot/fairmot/_base_/optimizer_30e_momentum.yml index eec339309..506b5de7e 100644 --- a/configs/mot/fairmot/_base_/optimizer_30e_momentum.yml +++ b/configs/mot/fairmot/_base_/optimizer_30e_momentum.yml @@ -12,7 +12,6 @@ LearningRate: OptimizerBuilder: optimizer: - momentum: 0.9 type: Momentum regularizer: factor: 0.0001 diff --git a/configs/mot/fairmot/fairmot_enhance_dla34_60e_1088x608.yml b/configs/mot/fairmot/fairmot_enhance_dla34_60e_1088x608.yml index 0d8ea6afd..c404468e3 100644 --- a/configs/mot/fairmot/fairmot_enhance_dla34_60e_1088x608.yml +++ b/configs/mot/fairmot/fairmot_enhance_dla34_60e_1088x608.yml @@ -9,6 +9,13 @@ norm_type: sync_bn use_ema: true ema_decay: 0.9998 +# add crowdhuman +TrainDataset: + !MOTDataSet + dataset_dir: dataset/mot + image_lists: ['mot17.train', 'caltech.all', 'cuhksysu.train', 'prw.train', 'citypersons.train', 'eth.train', 'crowdhuman.train', 'crowdhuman.val'] + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide'] + worker_num: 4 TrainReader: inputs_def: diff --git a/configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.yml b/configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.yml index dc9178db1..bd0645fdf 100644 --- a/configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.yml +++ b/configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.yml @@ -10,7 +10,7 @@ norm_type: sync_bn use_ema: true ema_decay: 0.9998 -# for MOT training +# add crowdhuman TrainDataset: !MOTDataSet dataset_dir: dataset/mot diff --git a/configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml b/configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml index a480ebd04..bc35d346e 100644 --- a/configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml +++ b/configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml @@ -10,7 +10,7 @@ norm_type: sync_bn use_ema: true ema_decay: 0.9998 -# for MOT training +# add crowdhuman TrainDataset: !MOTDataSet dataset_dir: dataset/mot diff --git a/configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_864x480.yml b/configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_864x480.yml index 25f1b636a..061734a48 100644 --- a/configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_864x480.yml +++ b/configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_864x480.yml @@ -10,7 +10,7 @@ norm_type: sync_bn use_ema: true ema_decay: 0.9998 -# for MOT training +# add crowdhuman TrainDataset: !MOTDataSet dataset_dir: dataset/mot diff --git a/configs/mot/mcfairmot/README.md b/configs/mot/mcfairmot/README.md index 573d64ac2..453620b39 100644 --- a/configs/mot/mcfairmot/README.md +++ b/configs/mot/mcfairmot/README.md @@ -18,6 +18,8 @@ MCFairMOT is the Multi-class extended version of [FairMOT](https://arxiv.org/abs | :--------------| :------- | :----: | :----: | :---: | :------: | :----: |:----: | | DLA-34 | 1088x608 | 24.3 | 41.6 | 2314 | - |[model](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_dla34_30e_1088x608_visdrone.pdparams) | [config](./mcfairmot_dla34_30e_1088x608_visdrone.yml) | | HRNetV2-W18 | 1088x608 | 20.4 | 39.9 | 2603 | - |[model](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.pdparams) | [config](./mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.yml) | +| HRNetV2-W18 | 864x480 | 18.2 | 38.7 | 2416 | - |[model](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone.pdparams) | [config](./mcfairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone.yml) | +| HRNetV2-W18 | 576x320 | 12.0 | 33.8 | 2178 | - |[model](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.pdparams) | [config](./mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.yml) | **Notes:** MOTA is the average MOTA of 10 catecories in the VisDrone2019 MOT dataset, and its value is also equal to the average MOTA of all the evaluated video sequences. diff --git a/configs/mot/mcfairmot/README_cn.md b/configs/mot/mcfairmot/README_cn.md index a44442fc9..d157cd332 100644 --- a/configs/mot/mcfairmot/README_cn.md +++ b/configs/mot/mcfairmot/README_cn.md @@ -19,6 +19,8 @@ MCFairMOT是[FairMOT](https://arxiv.org/abs/2004.01888)的多类别扩展版本 | :--------------| :------- | :----: | :----: | :---: | :------: | :----: |:----: | | DLA-34 | 1088x608 | 24.3 | 41.6 | 2314 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_dla34_30e_1088x608_visdrone.pdparams) | [配置文件](./mcfairmot_dla34_30e_1088x608_visdrone.yml) | | HRNetV2-W18 | 1088x608 | 20.4 | 39.9 | 2603 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.pdparams) | [配置文件](./mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.yml) | +| HRNetV2-W18 | 864x480 | 18.2 | 38.7 | 2416 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone.pdparams) | [配置文件](./mcfairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone.yml) | +| HRNetV2-W18 | 576x320 | 12.0 | 33.8 | 2178 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.pdparams) | [配置文件](./mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.yml) | **注意:** MOTA是VisDrone2019 MOT数据集10类目标的平均MOTA, 其值也等于所有评估的视频序列的平均MOTA。 diff --git a/configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.yml b/configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.yml new file mode 100644 index 000000000..e0fe18381 --- /dev/null +++ b/configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone.yml @@ -0,0 +1,47 @@ +_BASE_: [ + '../fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml', + '../../datasets/mcmot.yml' +] + +architecture: FairMOT +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/HRNet_W18_C_pretrained.pdparams +for_mot: True + +FairMOT: + detector: CenterNet + reid: FairMOTEmbeddingHead + loss: FairMOTLoss + tracker: JDETracker # multi-class tracker + +CenterNetHead: + regress_ltrb: False + +CenterNetPostProcess: + regress_ltrb: False + max_per_img: 200 + +JDETracker: + min_box_area: 0 + vertical_ratio: 0 # no need to filter bboxes according to w/h + conf_thres: 0.4 + tracked_thresh: 0.4 + metric_type: cosine + +weights: output/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone/model_final + +epoch: 30 +LearningRate: + base_lr: 0.0005 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [10, 20] + use_warmup: False + +OptimizerBuilder: + optimizer: + type: Adam + regularizer: NULL + +TrainReader: + batch_size: 8 diff --git a/configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone.yml b/configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone.yml new file mode 100644 index 000000000..02d918dde --- /dev/null +++ b/configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone.yml @@ -0,0 +1,47 @@ +_BASE_: [ + '../fairmot/fairmot_hrnetv2_w18_dlafpn_30e_864x480.yml', + '../../datasets/mcmot.yml' +] + +architecture: FairMOT +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/HRNet_W18_C_pretrained.pdparams +for_mot: True + +FairMOT: + detector: CenterNet + reid: FairMOTEmbeddingHead + loss: FairMOTLoss + tracker: JDETracker # multi-class tracker + +CenterNetHead: + regress_ltrb: False + +CenterNetPostProcess: + regress_ltrb: False + max_per_img: 200 + +JDETracker: + min_box_area: 0 + vertical_ratio: 0 # no need to filter bboxes according to w/h + conf_thres: 0.4 + tracked_thresh: 0.4 + metric_type: cosine + +weights: output/mcfairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone/model_final + +epoch: 30 +LearningRate: + base_lr: 0.0005 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [10, 20] + use_warmup: False + +OptimizerBuilder: + optimizer: + type: Adam + regularizer: NULL + +TrainReader: + batch_size: 8 diff --git a/configs/mot/mtmct/README.md b/configs/mot/mtmct/README.md new file mode 120000 index 000000000..4015683cf --- /dev/null +++ b/configs/mot/mtmct/README.md @@ -0,0 +1 @@ +README_cn.md \ No newline at end of file diff --git a/configs/mot/mtmct/README_cn.md b/configs/mot/mtmct/README_cn.md new file mode 100644 index 000000000..89ef2e8c7 --- /dev/null +++ b/configs/mot/mtmct/README_cn.md @@ -0,0 +1,113 @@ +English | [简体中文](README_cn.md) + +# MTMCT (Multi-Target Multi-Camera Tracking) + +## 内容 +- [简介](#简介) +- [模型库](#模型库) +- [快速开始](#快速开始) +- [引用](#引用) + +## 简介 +MTMCT (Multi-Target Multi-Camera Tracking) 跨镜头多目标跟踪是某一场景下的不同摄像头拍摄的视频进行多目标跟踪,是跟踪领域一个非常重要的研究课题,在安防监控、自动驾驶、智慧城市等行业起着重要作用。MTMCT预测的是同一场景下的不同摄像头拍摄的视频,其方法的效果受场景先验知识和相机数量角度拓扑结构等信息的影响较大,PaddleDetection此处提供的是去除场景和相机相关优化方法后的一个基础版本的MTMCT算法实现,如果要继续提高效果,需要专门针对该场景和相机信息设计后处理算法。此处选用DeepSORT方案做MTMCT,为了达到实时性选用了PaddleDetection自研的PPYOLOv2和PP-PicoDet作为检测器,选用PaddleClas自研的轻量级网络PP-LCNet作为ReID模型。 + +MTMCT是[PP-Tracking](../../../deploy/pptracking)项目中一个非常重要的方向,[PP-Tracking](../../../deploy/pptracking/README.md)是基于PaddlePaddle深度学习框架的业界首个开源实时跟踪系统。针对实际业务的难点痛点,PP-Tracking内置行人车辆跟踪、跨镜头跟踪、多类别跟踪、小目标跟踪及流量计数等能力与产业应用,同时提供可视化开发界面。模型集成多目标跟踪,目标检测,ReID轻量级算法,进一步提升PP-Tracking在服务器端部署性能。同时支持python,C++部署,适配Linux,Nvidia Jetson多平台环境。具体可前往该目录使用。 + + +## 模型库 +### DeepSORT在 AIC21 MTMCT(CityFlow) 车辆跨境跟踪数据集Test集上的结果 + +| 检测器 | 输入尺度 | ReID | 场景 | Tricks | IDF1 | IDP | IDR | Precision | Recall | FPS | 检测器下载链接 | ReID下载链接 | +| :--------- | :--------- | :------- | :----- | :------ |:----- |:------- |:----- |:--------- |:-------- |:----- |:------ | :------ | +| PP-PicoDet | 640x640 | PP-LCNet | S06 | - | 0.3617 | 0.4417 | 0.3062 | 0.6266 | 0.4343 | - |[Detector](https://paddledet.bj.bcebos.com/models/mot/deepsort/picodet_l_640_aic21mtmct_vehicle.tar) |[ReID](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet_vehicle.tar) | +| PPYOLOv2 | 640x640 | PP-LCNet | S06 | - | 0.4450 | 0.4611 | 0.4300 | 0.6385 | 0.5954 | - |[Detector](https://paddledet.bj.bcebos.com/models/mot/deepsort/ppyolov2_r50vd_dcn_365e_aic21mtmct_vehicle.tar) |[ReID](https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet_vehicle.tar) | + +**注意:** + S06是AIC21 MTMCT数据集Test集的场景名称,S06场景下有’c041,c042,c043,c044,c045,c046‘共6个摄像头的视频。 + + +## 数据集准备 +此处提供了车辆和行人的两种模型方案,对于车辆是选用的[AIC21 MTMCT](https://www.aicitychallenge.org) (CityFlow)车辆跨境跟踪数据集,对于行人是选用的[WILDTRACK](https://www.epfl.ch/labs/cvlab/data/data-wildtrack)行人跨境跟踪数据集。 +AIC21 MTMCT原始数据集的目录如下所示: +``` +|——————AIC21_Track3_MTMC_Tracking + |——————cam_framenum (Number of frames below each camera) + |——————cam_loc (Positional relationship between cameras) + |——————cam_timestamp (Time difference between cameras) + |——————eval (evaluation function and ground_truth.txt) + |——————test + |——————train + |——————validation + |——————DataLicenseAgreement_AICityChallenge_2021.pdf + |——————list_cam.txt (List of all camera paths) + |——————ReadMe.txt (Dataset description) +|——————gen_aicity_mtmct_data.py (Camera data extraction script) +``` +需要处理成如下格式: +``` +├── S01 +│ ├── c001 +│ ├── roi.jog (Area mask of the road) +│ ├── img1 +│ ├── ... +│ ├── c002 +│ ├── roi.jog +│ ├── img1 +│ ├── ... +│ ├── c003 +│ ├── roi.jog +│ ├── img1 +│ ├── ... +├── gt +│ ├── ground_truth_train.txt +│ ├── ground_truth_validation.txt +├── zone (only for S06 when use camera track trick) +│ ├── ... +``` + +#### 生成S01场景的验证集数据 +python gen_aicity_mtmct_data.py ./AIC21_Track3_MTMC_Tracking/train/S01 + + +## 快速开始 + +### 1. 导出模型 +Step 1:下载导出的检测模型 +```bash +wget https://paddledet.bj.bcebos.com/models/mot/deepsort/picodet_l_640_aic21mtmct_vehicle.tar +tar -xvf picodet_l_640_aic21mtmct_vehicle.tar +``` +Step 2:下载导出的ReID模型 +```bash +wget https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet_vehicle.tar +tar -xvf deepsort_pplcnet_vehicle.tar +``` +**注意:** + PP-PicoDet是轻量级检测模型,其训练请参考[configs/picodet](../../picodet/README.md),并注意修改种类数和数据集路径。 + PP-LCNet是轻量级ReID模型,其训练请参考[PaddleClas](https://github.com/PaddlePaddle/PaddleClas),是在VERI-Wild车辆重识别数据集训练得到的权重,建议直接使用无需重训。 + + +### 2. 用导出的模型基于Python去预测 +```bash +# 用导出PicoDet车辆检测模型和PPLCNet车辆ReID模型 +python deploy/pptracking/python/mot_sde_infer.py --model_dir=picodet_l_640_aic21mtmct_vehicle/ --reid_model_dir=deepsort_pplcnet_vehicle/ --mtmct_dir={your mtmct scene video folder} --device=GPU --scaled=True --save_mot_txts --save_images +``` +**注意:** + 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`(对每个视频保存一个txt),或`--save_images`表示保存跟踪结果可视化图片。 + `--scaled`表示在模型输出结果的坐标是否已经是缩放回原图的,如果使用的检测模型是JDE的YOLOv3则为False,如果使用通用检测模型则为True。 + `--mtmct_dir`是MTMCT预测的某个场景的文件夹名字,里面包含该场景不同摄像头拍摄视频的图片文件夹,其数量至少为两个。 + MTMCT跨镜头跟踪输出结果为视频和txt形式。每个图片文件夹各生成一个可视化的跨镜头跟踪结果,与单镜头跟踪的结果是不同的,单镜头跟踪的结果在几个视频文件夹间是独立无关的。MTMCT的结果txt只有一个,比单镜头跟踪结果txt多了第一列镜头id号。 + MTMCT是[PP-Tracking](../../../deploy/pptracking)项目中的一个非常重要的方向,具体可前往该目录使用。 + + +## 引用 +``` +@InProceedings{Tang19CityFlow, +author = {Zheng Tang and Milind Naphade and Ming-Yu Liu and Xiaodong Yang and Stan Birchfield and Shuo Wang and Ratnesh Kumar and David Anastasiu and Jenq-Neng Hwang}, +title = {CityFlow: A City-Scale Benchmark for Multi-Target Multi-Camera Vehicle Tracking and Re-Identification}, +booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, +month = {June}, +year = {2019}, +pages = {8797–8806} +} +``` diff --git a/configs/mot/pedestrian/README_cn.md b/configs/mot/pedestrian/README_cn.md index 2be86665a..67865fd68 100644 --- a/configs/mot/pedestrian/README_cn.md +++ b/configs/mot/pedestrian/README_cn.md @@ -14,10 +14,13 @@ ### FairMOT在各个数据集val-set上Pedestrian类别的结果 -| 数据集 | 输入尺寸 | MOTA | IDF1 | FPS | 下载链接 | 配置文件 | -| :-------------| :------- | :----: | :----: | :----: | :-----: |:------: | -| PathTrack | 1088x608 | 44.9 | 59.3 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_pathtrack.pdparams) | [配置文件](./fairmot_dla34_30e_1088x608_pathtrack.yml) | -| VisDrone | 1088x608 | 49.2 | 63.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_visdrone_pedestrian.pdparams) | [配置文件](./fairmot_dla34_30e_1088x608_visdrone_pedestrian.yml) | +| 数据集 | 骨干网络 | 输入尺寸 | MOTA | IDF1 | FPS | 下载链接 | 配置文件 | +| :-------------| :-------- | :------- | :----: | :----: | :----: | :-----: |:------: | +| PathTrack | DLA-34 | 1088x608 | 44.9 | 59.3 | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_pathtrack.pdparams) | [配置文件](./fairmot_dla34_30e_1088x608_pathtrack.yml) | +| VisDrone | DLA-34 | 1088x608 | 49.2 | 63.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_visdrone_pedestrian.pdparams) | [配置文件](./fairmot_dla34_30e_1088x608_visdrone_pedestrian.yml) | +| VisDrone | HRNetv2-W18| 1088x608 | 40.5 | 54.7 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_pedestrian.pdparams) | [配置文件](./fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_pedestrian.yml) | +| VisDrone | HRNetv2-W18| 864x480 | 38.6 | 50.9 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_pedestrian.pdparams) | [配置文件](./fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_pedestrian.yml) | +| VisDrone | HRNetv2-W18| 576x320 | 30.6 | 47.2 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_pedestrian.pdparams) | [配置文件](./fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_pedestrian.yml) | **注意:** FairMOT均使用DLA-34为骨干网络,4个GPU进行训练,每个GPU上batch size为6,训练30个epoch。 diff --git a/configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.yml b/configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.yml new file mode 100755 index 000000000..aca526dc3 --- /dev/null +++ b/configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.yml @@ -0,0 +1,26 @@ +_BASE_: [ + '../fairmot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.yml' +] + +weights: output/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian/model_final + +# for MOT training +TrainDataset: + !MOTDataSet + dataset_dir: dataset/mot + image_lists: ['visdrone_pedestrian.train'] + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide'] + +# for MOT evaluation +# If you want to change the MOT evaluation dataset, please modify 'data_root' +EvalMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + data_root: visdrone_pedestrian/images/val + keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT + +# for MOT video inference +TestMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + keep_ori_im: True # set True if save visualization images or video diff --git a/configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_pedestrian.yml b/configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_pedestrian.yml new file mode 100755 index 000000000..1daab8fdc --- /dev/null +++ b/configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_pedestrian.yml @@ -0,0 +1,26 @@ +_BASE_: [ + '../fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml' +] + +weights: output/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_pedestrian/model_final + +# for MOT training +TrainDataset: + !MOTDataSet + dataset_dir: dataset/mot + image_lists: ['visdrone_pedestrian.train'] + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide'] + +# for MOT evaluation +# If you want to change the MOT evaluation dataset, please modify 'data_root' +EvalMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + data_root: visdrone_pedestrian/images/val + keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT + +# for MOT video inference +TestMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + keep_ori_im: True # set True if save visualization images or video diff --git a/configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_pedestrian.yml b/configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_pedestrian.yml new file mode 100755 index 000000000..8fd056351 --- /dev/null +++ b/configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_pedestrian.yml @@ -0,0 +1,26 @@ +_BASE_: [ + '../fairmot/fairmot_hrnetv2_w18_dlafpn_30e_864x480.yml' +] + +weights: output/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_pedestrian/model_final + +# for MOT training +TrainDataset: + !MOTDataSet + dataset_dir: dataset/mot + image_lists: ['visdrone_pedestrian.train'] + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide'] + +# for MOT evaluation +# If you want to change the MOT evaluation dataset, please modify 'data_root' +EvalMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + data_root: visdrone_pedestrian/images/val + keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT + +# for MOT video inference +TestMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + keep_ori_im: True # set True if save visualization images or video diff --git a/configs/mot/vehicle/README_cn.md b/configs/mot/vehicle/README_cn.md index deaa014b9..362fd5cbb 100644 --- a/configs/mot/vehicle/README_cn.md +++ b/configs/mot/vehicle/README_cn.md @@ -17,11 +17,15 @@ ### FairMOT在各个数据集val-set上Vehicle类别的结果 -| 数据集 | 输入尺寸 | MOTA | IDF1 | FPS | 下载链接 | 配置文件 | -| :-------------| :------- | :----: | :----: | :----: | :-----: |:------: | -| BDD100K | 1088x608 | 43.5 | 50.0 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_bdd100kmot_vehicle.pdparams) | [配置文件](./fairmot_dla34_30e_1088x608_bdd100kmot_vehicle.yml) | -| KITTI | 1088x608 | 82.7 | - | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_kitti_vehicle.pdparams) | [配置文件](./fairmot_dla34_30e_1088x608_kitti_vehicle.yml) | -| VisDrone | 1088x608 | 52.1 | 63.3 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_visdrone_vehicle.pdparams) | [配置文件](./fairmot_dla34_30e_1088x608_visdrone_vehicle.yml) | +| 数据集 | 骨干网络 | 输入尺寸 | MOTA | IDF1 | FPS | 下载链接 | 配置文件 | +| :-------------| :-------- | :------- | :----: | :----: | :----: | :-----: |:------: | +| BDD100K | DLA-34 | 1088x608 | 43.5 | 50.0 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_bdd100kmot_vehicle.pdparams) | [配置文件](./fairmot_dla34_30e_1088x608_bdd100kmot_vehicle.yml) | +| BDD100K | HRNetv2-W18| 576x320 | 32.6 | 38.7 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.pdparams) | [配置文件](./fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.yml) | +| KITTI | DLA-34 | 1088x608 | 82.7 | - | - |[下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_kitti_vehicle.pdparams) | [配置文件](./fairmot_dla34_30e_1088x608_kitti_vehicle.yml) | +| VisDrone | DLA-34 | 1088x608 | 52.1 | 63.3 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_dla34_30e_1088x608_visdrone_vehicle.pdparams) | [配置文件](./fairmot_dla34_30e_1088x608_visdrone_vehicle.yml) | +| VisDrone | HRNetv2-W18| 1088x608 | 46.0 | 56.8 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle.pdparams) | [配置文件](./fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle.yml) | +| VisDrone | HRNetv2-W18| 864x480 | 43.7 | 56.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_vehicle.pdparams) | [配置文件](./fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_vehicle.yml) | +| VisDrone | HRNetv2-W18| 576x320 | 39.8 | 52.4 | - | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.pdparams) | [配置文件](./fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.yml) | **注意:** FairMOT均使用DLA-34为骨干网络,4个GPU进行训练,每个GPU上batch size为6,训练30个epoch。 diff --git a/configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle.yml b/configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle.yml new file mode 100755 index 000000000..63e79b542 --- /dev/null +++ b/configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle.yml @@ -0,0 +1,40 @@ +_BASE_: [ + '../fairmot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608.yml' +] + +weights: output/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_vehicle/model_final + +# for MOT training +TrainDataset: + !MOTDataSet + dataset_dir: dataset/mot + image_lists: ['visdrone_vehicle.train'] + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide'] + +# for MOT evaluation +# If you want to change the MOT evaluation dataset, please modify 'data_root' +EvalMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + data_root: visdrone_vehicle/images/val + keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT + +# for MOT video inference +TestMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + keep_ori_im: True # set True if save visualization images or video + +# model config +FairMOT: + detector: CenterNet + reid: FairMOTEmbeddingHead + loss: FairMOTLoss + tracker: JDETracker + +JDETracker: + min_box_area: 0 + vertical_ratio: 0 # no need to filter bboxes according to w/h + conf_thres: 0.4 + tracked_thresh: 0.4 + metric_type: cosine diff --git a/configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.yml b/configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.yml new file mode 100755 index 000000000..599536ff6 --- /dev/null +++ b/configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.yml @@ -0,0 +1,40 @@ +_BASE_: [ + '../fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml' +] + +weights: output/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle/model_final + +# for MOT training +TrainDataset: + !MOTDataSet + dataset_dir: dataset/mot + image_lists: ['bdd100kmot_vehicle.train'] + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide'] + +# for MOT evaluation +# If you want to change the MOT evaluation dataset, please modify 'data_root' +EvalMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + data_root: bdd100kmot_vehicle/images/val + keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT + +# for MOT video inference +TestMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + keep_ori_im: True # set True if save visualization images or video + +# model config +FairMOT: + detector: CenterNet + reid: FairMOTEmbeddingHead + loss: FairMOTLoss + tracker: JDETracker + +JDETracker: + min_box_area: 0 + vertical_ratio: 0 # no need to filter bboxes according to w/h + conf_thres: 0.4 + tracked_thresh: 0.4 + metric_type: cosine diff --git a/configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.yml b/configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.yml new file mode 100755 index 000000000..7a155f110 --- /dev/null +++ b/configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.yml @@ -0,0 +1,40 @@ +_BASE_: [ + '../fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml' +] + +weights: output/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle/model_final + +# for MOT training +TrainDataset: + !MOTDataSet + dataset_dir: dataset/mot + image_lists: ['visdrone_vehicle.train'] + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide'] + +# for MOT evaluation +# If you want to change the MOT evaluation dataset, please modify 'data_root' +EvalMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + data_root: visdrone_vehicle/images/val + keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT + +# for MOT video inference +TestMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + keep_ori_im: True # set True if save visualization images or video + +# model config +FairMOT: + detector: CenterNet + reid: FairMOTEmbeddingHead + loss: FairMOTLoss + tracker: JDETracker + +JDETracker: + min_box_area: 0 + vertical_ratio: 0 # no need to filter bboxes according to w/h + conf_thres: 0.4 + tracked_thresh: 0.4 + metric_type: cosine diff --git a/configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_vehicle.yml b/configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_vehicle.yml new file mode 100755 index 000000000..8dbbce557 --- /dev/null +++ b/configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_vehicle.yml @@ -0,0 +1,40 @@ +_BASE_: [ + '../fairmot/fairmot_hrnetv2_w18_dlafpn_30e_864x480.yml' +] + +weights: output/fairmot_hrnetv2_w18_dlafpn_30e_864x480_visdrone_vehicle/model_final + +# for MOT training +TrainDataset: + !MOTDataSet + dataset_dir: dataset/mot + image_lists: ['visdrone_vehicle.train'] + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide'] + +# for MOT evaluation +# If you want to change the MOT evaluation dataset, please modify 'data_root' +EvalMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + data_root: visdrone_vehicle/images/val + keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT + +# for MOT video inference +TestMOTDataset: + !MOTImageFolder + dataset_dir: dataset/mot + keep_ori_im: True # set True if save visualization images or video + +# model config +FairMOT: + detector: CenterNet + reid: FairMOTEmbeddingHead + loss: FairMOTLoss + tracker: JDETracker + +JDETracker: + min_box_area: 0 + vertical_ratio: 0 # no need to filter bboxes according to w/h + conf_thres: 0.4 + tracked_thresh: 0.4 + metric_type: cosine diff --git a/deploy/python/infer.py b/deploy/python/infer.py index bc268c565..752a07efa 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -537,7 +537,7 @@ def load_predictor(model_dir, } if run_mode in precision_map.keys(): config.enable_tensorrt_engine( - workspace_size=1 << 10, + workspace_size=1 << 25, max_batch_size=batch_size, min_subgraph_size=min_subgraph_size, precision_mode=precision_map[run_mode], @@ -680,7 +680,7 @@ def predict_video(detector, camera_id): if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) out_path = os.path.join(FLAGS.output_dir, video_out_name) - fourcc = cv2.VideoWriter_fourcc(*'mp4v') + fourcc = cv2.VideoWriter_fourcc(* 'mp4v') writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) index = 1 while (1): diff --git a/deploy/python/mot_jde_infer.py b/deploy/python/mot_jde_infer.py index 22ab467dc..c7006a7cd 100644 --- a/deploy/python/mot_jde_infer.py +++ b/deploy/python/mot_jde_infer.py @@ -23,7 +23,6 @@ import paddle from paddle.inference import Config from paddle.inference import create_predictor -from preprocess import preprocess from utils import argsparser, Timer, get_current_memory_mb from infer import Detector, get_test_images, print_arguments, PredictConfig from benchmark_utils import PaddleInferBenchmark @@ -167,6 +166,8 @@ def predict_image(detector, image_list): results = [] num_classes = detector.num_classes data_type = 'mcmot' if num_classes > 1 else 'mot' + ids2names = detector.pred_config.labels + image_list.sort() for frame_id, img_file in enumerate(image_list): frame = cv2.imread(img_file) @@ -181,7 +182,8 @@ def predict_image(detector, image_list): online_tlwhs, online_scores, online_ids = detector.predict( [frame], FLAGS.threshold) online_im = plot_tracking_dict(frame, num_classes, online_tlwhs, - online_ids, online_scores, frame_id) + online_ids, online_scores, frame_id, + ids2names=ids2names) if FLAGS.save_images: if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) @@ -216,6 +218,8 @@ def predict_video(detector, camera_id): results = defaultdict(list) # support single class and multi classes num_classes = detector.num_classes data_type = 'mcmot' if num_classes > 1 else 'mot' + ids2names = detector.pred_config.labels + while (1): ret, frame = capture.read() if not ret: @@ -237,7 +241,8 @@ def predict_video(detector, camera_id): online_ids, online_scores, frame_id=frame_id, - fps=fps) + fps=fps, + ids2names=ids2names) if FLAGS.save_images: save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2]) if not os.path.exists(save_dir): diff --git a/deploy/python/mot_sde_infer.py b/deploy/python/mot_sde_infer.py index 5d00c4f26..a6af02065 100644 --- a/deploy/python/mot_sde_infer.py +++ b/deploy/python/mot_sde_infer.py @@ -23,9 +23,9 @@ import paddle from paddle.inference import Config from paddle.inference import create_predictor -from preprocess import preprocess +from picodet_postprocess import PicoDetPostProcess from utils import argsparser, Timer, get_current_memory_mb -from infer import Detector, get_test_images, print_arguments, PredictConfig +from infer import Detector, DetectorPicoDet, get_test_images, print_arguments, PredictConfig from infer import load_predictor from benchmark_utils import PaddleInferBenchmark @@ -139,6 +139,7 @@ class SDE_Detector(Detector): cpu_threads=cpu_threads, enable_mkldnn=enable_mkldnn) assert batch_size == 1, "The JDE Detector only supports batch size=1 now" + self.pred_config = pred_config def postprocess(self, boxes, input_shape, im_shape, scale_factor, threshold, scaled): @@ -147,6 +148,8 @@ class SDE_Detector(Detector): pred_dets = np.zeros((1, 6), dtype=np.float32) pred_xyxys = np.zeros((1, 4), dtype=np.float32) return pred_dets, pred_xyxys + else: + boxes = boxes[over_thres_idx] if not scaled: # scaled means whether the coords after detector outputs @@ -159,6 +162,11 @@ class SDE_Detector(Detector): pred_xyxys, keep_idx = clip_box(pred_bboxes, input_shape, im_shape, scale_factor) + if len(keep_idx[0]) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + return pred_dets, pred_xyxys + pred_scores = boxes[:, 1:2][keep_idx[0]] pred_cls_ids = boxes[:, 0:1][keep_idx[0]] pred_tlwhs = np.concatenate( @@ -168,7 +176,7 @@ class SDE_Detector(Detector): pred_dets = np.concatenate( (pred_tlwhs, pred_scores, pred_cls_ids), axis=1) - return pred_dets[over_thres_idx], pred_xyxys[over_thres_idx] + return pred_dets, pred_xyxys def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1): ''' @@ -220,6 +228,142 @@ class SDE_Detector(Detector): return pred_dets, pred_xyxys +class SDE_DetectorPicoDet(DetectorPicoDet): + """ + Args: + pred_config (object): config of model, defined by `Config(model_dir)` + model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU + run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + trt_min_shape (int): min shape for dynamic shape in trt + trt_max_shape (int): max shape for dynamic shape in trt + trt_opt_shape (int): opt shape for dynamic shape in trt + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + cpu_threads (int): cpu threads + enable_mkldnn (bool): whether to open MKLDNN + """ + + def __init__(self, + pred_config, + model_dir, + device='CPU', + run_mode='fluid', + batch_size=1, + trt_min_shape=1, + trt_max_shape=1088, + trt_opt_shape=608, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False): + super(SDE_DetectorPicoDet, self).__init__( + pred_config=pred_config, + model_dir=model_dir, + device=device, + run_mode=run_mode, + batch_size=batch_size, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn) + assert batch_size == 1, "The JDE Detector only supports batch size=1 now" + self.pred_config = pred_config + + def postprocess_bboxes(self, boxes, input_shape, im_shape, scale_factor, threshold): + over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0] + if len(over_thres_idx) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + return pred_dets, pred_xyxys + else: + boxes = boxes[over_thres_idx] + + pred_bboxes = boxes[:, 2:] + + pred_xyxys, keep_idx = clip_box(pred_bboxes, input_shape, im_shape, + scale_factor) + if len(keep_idx[0]) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + return pred_dets, pred_xyxys + + pred_scores = boxes[:, 1:2][keep_idx[0]] + pred_cls_ids = boxes[:, 0:1][keep_idx[0]] + pred_tlwhs = np.concatenate( + (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), + axis=1) + + pred_dets = np.concatenate( + (pred_tlwhs, pred_scores, pred_cls_ids), axis=1) + return pred_dets, pred_xyxys + + def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1): + ''' + Args: + image (np.ndarray): image numpy data + threshold (float): threshold of predicted box' score + scaled (bool): whether the coords after detector outputs are scaled, + default False in jde yolov3, set True in general detector. + Returns: + pred_dets (np.ndarray, [N, 6]) + ''' + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(image) + self.det_times.preprocess_time_s.end() + + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) + + np_score_list, np_boxes_list = [], [] + for i in range(warmup): + self.predictor.run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_handle(output_names[0]) + boxes = boxes_tensor.copy_to_cpu() + + self.det_times.inference_time_s.start() + for i in range(repeats): + self.predictor.run() + np_score_list.clear() + np_boxes_list.clear() + output_names = self.predictor.get_output_names() + num_outs = int(len(output_names) / 2) + for out_idx in range(num_outs): + np_score_list.append( + self.predictor.get_output_handle(output_names[out_idx]) + .copy_to_cpu()) + np_boxes_list.append( + self.predictor.get_output_handle(output_names[ + out_idx + num_outs]).copy_to_cpu()) + + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.img_num += 1 + self.det_times.postprocess_time_s.start() + self.postprocess = PicoDetPostProcess( + inputs['image'].shape[2:], + inputs['im_shape'], + inputs['scale_factor'], + strides=self.pred_config.fpn_stride, + nms_threshold=self.pred_config.nms['nms_threshold']) + boxes, boxes_num = self.postprocess(np_score_list, np_boxes_list) + + if len(boxes) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + else: + input_shape = inputs['image'].shape[2:] + im_shape = inputs['im_shape'] + scale_factor = inputs['scale_factor'] + pred_dets, pred_xyxys = self.postprocess_bboxes( + boxes, input_shape, im_shape, scale_factor, threshold) + + return pred_dets, pred_xyxys + + class SDE_ReID(object): def __init__(self, pred_config, @@ -350,7 +494,7 @@ def predict_image(detector, reid_model, image_list): pred_dets, pred_xyxys = detector.predict([frame], FLAGS.scaled, FLAGS.threshold) - if len(pred_dets) == 1 and sum(pred_dets) == 0: + if len(pred_dets) == 1 and np.sum(pred_dets) == 0: print('Frame {} has no object, try to modify score threshold.'. format(i)) online_im = frame @@ -407,7 +551,7 @@ def predict_video(detector, reid_model, camera_id): pred_dets, pred_xyxys = detector.predict([frame], FLAGS.scaled, FLAGS.threshold) - if len(pred_dets) == 1 and sum(pred_dets) == 0: + if len(pred_dets) == 1 and np.sum(pred_dets) == 0: print('Frame {} has no object, try to modify score threshold.'. format(frame_id)) timer.toc() @@ -464,17 +608,21 @@ def predict_video(detector, reid_model, camera_id): def main(): pred_config = PredictConfig(FLAGS.model_dir) - detector = SDE_Detector( - pred_config, - FLAGS.model_dir, - device=FLAGS.device, - run_mode=FLAGS.run_mode, - trt_min_shape=FLAGS.trt_min_shape, - trt_max_shape=FLAGS.trt_max_shape, - trt_opt_shape=FLAGS.trt_opt_shape, - trt_calib_mode=FLAGS.trt_calib_mode, - cpu_threads=FLAGS.cpu_threads, - enable_mkldnn=FLAGS.enable_mkldnn) + detector_func = 'SDE_Detector' + if pred_config.arch == 'PicoDet': + detector_func = 'SDE_DetectorPicoDet' + + detector = eval(detector_func)(pred_config, + FLAGS.model_dir, + device=FLAGS.device, + run_mode=FLAGS.run_mode, + batch_size=FLAGS.batch_size, + trt_min_shape=FLAGS.trt_min_shape, + trt_max_shape=FLAGS.trt_max_shape, + trt_opt_shape=FLAGS.trt_opt_shape, + trt_calib_mode=FLAGS.trt_calib_mode, + cpu_threads=FLAGS.cpu_threads, + enable_mkldnn=FLAGS.enable_mkldnn) pred_config = PredictConfig(FLAGS.reid_model_dir) reid_model = SDE_ReID( diff --git a/ppdet/data/source/mot.py b/ppdet/data/source/mot.py index 5662b2c4b..9ae49d1ca 100644 --- a/ppdet/data/source/mot.py +++ b/ppdet/data/source/mot.py @@ -154,7 +154,7 @@ class MOTDataSet(DetDataset): last_index += v self.num_identities_dict = defaultdict(int) - self.num_identities_dict[0] = int(last_index + 1) # single class + self.num_identities_dict[0] = int(last_index + 1) # single class self.num_imgs_each_data = [len(x) for x in self.img_files.values()] self.total_imgs = sum(self.num_imgs_each_data) @@ -249,6 +249,7 @@ class MCMOTDataSet(DetDataset): └——————labels_with_ids └——————train """ + def __init__(self, dataset_dir=None, image_lists=[], @@ -343,22 +344,26 @@ class MCMOTDataSet(DetDataset): # cname2cid and cid2cname cname2cid = {} - if self.label_list: + if self.label_list is not None: # if use label_list for multi source mix dataset, # please make sure label_list in the first sub_dataset at least. sub_dataset = self.image_lists[0].split('.')[0] label_path = os.path.join(self.dataset_dir, sub_dataset, self.label_list) if not os.path.exists(label_path): - raise ValueError("label_list {} does not exists".format( - label_path)) - with open(label_path, 'r') as fr: - label_id = 0 - for line in fr.readlines(): - cname2cid[line.strip()] = label_id - label_id += 1 + logger.info( + "Note: label_list {} does not exists, use VisDrone 10 classes labels as default.". + format(label_path)) + cname2cid = visdrone_mcmot_label() + else: + with open(label_path, 'r') as fr: + label_id = 0 + for line in fr.readlines(): + cname2cid[line.strip()] = label_id + label_id += 1 else: cname2cid = visdrone_mcmot_label() + cid2cname = dict([(v, k) for (k, v) in cname2cid.items()]) logger.info('MCMOT dataset summary: ') diff --git a/ppdet/engine/tracker.py b/ppdet/engine/tracker.py index 2644ce72d..75602cb64 100644 --- a/ppdet/engine/tracker.py +++ b/ppdet/engine/tracker.py @@ -176,6 +176,7 @@ class Tracker(object): save_dir=None, show_image=False, frame_rate=30, + seq_name='', scaled=False, det_file='', draw_threshold=0): @@ -200,23 +201,31 @@ class Tracker(object): logger.info('Processing frame {} ({:.2f} fps)'.format( frame_id, 1. / max(1e-5, timer.average_time))) - ori_image = data['ori_image'] + ori_image = data['ori_image'] # [bs, H, W, 3] + ori_image_shape = data['ori_image'].shape[1:3] + # ori_image_shape: [H, W] + input_shape = data['image'].shape[2:] - im_shape = data['im_shape'] - scale_factor = data['scale_factor'] + # input_shape: [h, w], before data transforms, set in model config + + im_shape = data['im_shape'][0].numpy() + # im_shape: [new_h, new_w], after data transforms + scale_factor = data['scale_factor'][0].numpy() + + empty_detections = False + # when it has no detected bboxes, will not inference reid model + # and if visualize, use original image instead # forward timer.tic() if not use_detector: dets = dets_list[frame_id] - bbox_tlwh = paddle.to_tensor(dets['bbox'], dtype='float32') + bbox_tlwh = np.array(dets['bbox'], dtype='float32') if bbox_tlwh.shape[0] > 0: # detector outputs: pred_cls_ids, pred_scores, pred_bboxes - pred_cls_ids = paddle.to_tensor( - dets['cls_id'], dtype='float32').unsqueeze(1) - pred_scores = paddle.to_tensor( - dets['score'], dtype='float32').unsqueeze(1) - pred_bboxes = paddle.concat( + pred_cls_ids = np.array(dets['cls_id'], dtype='float32') + pred_scores = np.array(dets['score'], dtype='float32') + pred_bboxes = np.concatenate( (bbox_tlwh[:, 0:2], bbox_tlwh[:, 2:4] + bbox_tlwh[:, 0:2]), axis=1) @@ -224,16 +233,21 @@ class Tracker(object): logger.warning( 'Frame {} has not object, try to modify score threshold.'. format(frame_id)) - frame_id += 1 - continue + empty_detections = True else: outs = self.model.detector(data) - if outs['bbox_num'] > 0: + outs['bbox'] = outs['bbox'].numpy() + outs['bbox_num'] = outs['bbox_num'].numpy() + + if outs['bbox_num'] > 0 and empty_detections == False: # detector outputs: pred_cls_ids, pred_scores, pred_bboxes pred_cls_ids = outs['bbox'][:, 0:1] pred_scores = outs['bbox'][:, 1:2] if not scaled: - # scaled means whether the coords after detector outputs + # Note: scaled=False only in JDE YOLOv3 or other detectors + # with LetterBoxResize and JDEBBoxPostProcess. + # + # 'scaled' means whether the coords after detector outputs # have been scaled back to the original image, set True # in general detector, set False in JDE YOLOv3. pred_bboxes = scale_coords(outs['bbox'][:, 2:], @@ -243,20 +257,36 @@ class Tracker(object): pred_bboxes = outs['bbox'][:, 2:] else: logger.warning( - 'Frame {} has not object, try to modify score threshold.'. + 'Frame {} has not detected object, try to modify score threshold.'. format(frame_id)) - frame_id += 1 - continue + empty_detections = True + + if not empty_detections: + pred_xyxys, keep_idx = clip_box(pred_bboxes, ori_image_shape) + if len(keep_idx[0]) == 0: + logger.warning( + 'Frame {} has not detected object left after clip_box.'. + format(frame_id)) + empty_detections = True + + if empty_detections: + timer.toc() + # if visualize, use original image instead + online_ids, online_tlwhs, online_scores = None, None, None + save_vis_results(data, frame_id, online_ids, online_tlwhs, + online_scores, timer.average_time, show_image, + save_dir, self.cfg.num_classes) + frame_id += 1 + # thus will not inference reid model + continue - pred_xyxys, keep_idx = clip_box(pred_bboxes, input_shape, im_shape, - scale_factor) - pred_scores = paddle.gather_nd(pred_scores, keep_idx).unsqueeze(1) - pred_cls_ids = paddle.gather_nd(pred_cls_ids, keep_idx).unsqueeze(1) - pred_tlwhs = paddle.concat( + pred_scores = pred_scores[keep_idx[0]] + pred_cls_ids = pred_cls_ids[keep_idx[0]] + pred_tlwhs = np.concatenate( (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), axis=1) - pred_dets = paddle.concat( + pred_dets = np.concatenate( (pred_tlwhs, pred_scores, pred_cls_ids), axis=1) tracker = self.model.tracker @@ -268,8 +298,7 @@ class Tracker(object): crops = paddle.to_tensor(crops) data.update({'crops': crops}) - pred_embs = self.model(data) - pred_dets, pred_embs = pred_dets.numpy(), pred_embs.numpy() + pred_embs = self.model(data).numpy() tracker.predict() online_targets = tracker.update(pred_dets, pred_embs) @@ -361,6 +390,7 @@ class Tracker(object): save_dir=save_dir, show_image=show_image, frame_rate=frame_rate, + seq_name=seq, scaled=scaled, det_file=os.path.join(det_results_dir, '{}.txt'.format(seq))) @@ -417,19 +447,19 @@ class Tracker(object): logger.info("Found {} inference images in total.".format(len(images))) return images - def mot_predict(self, - video_file, - frame_rate, - image_dir, - output_dir, - data_type='mot', - model_type='JDE', - save_images=False, - save_videos=True, - show_image=False, - scaled=False, - det_results_dir='', - draw_threshold=0.5): + def mot_predict_seq(self, + video_file, + frame_rate, + image_dir, + output_dir, + data_type='mot', + model_type='JDE', + save_images=False, + save_videos=True, + show_image=False, + scaled=False, + det_results_dir='', + draw_threshold=0.5): assert video_file is not None or image_dir is not None, \ "--video_file or --image_dir should be set." assert video_file is None or os.path.isfile(video_file), \ @@ -452,6 +482,8 @@ class Tracker(object): logger.info('Starting tracking video {}'.format(video_file)) elif image_dir: seq = image_dir.split('/')[-1].split('.')[0] + if os.path.exists(os.path.join(image_dir, 'img1')): + image_dir = os.path.join(image_dir, 'img1') images = [ '{}/{}'.format(image_dir, x) for x in os.listdir(image_dir) ] @@ -484,6 +516,7 @@ class Tracker(object): save_dir=save_dir, show_image=show_image, frame_rate=frame_rate, + seq_name=seq, scaled=scaled, det_file=os.path.join(det_results_dir, '{}.txt'.format(seq)), @@ -491,9 +524,6 @@ class Tracker(object): else: raise ValueError(model_type) - write_mot_results(result_filename, results, data_type, - self.cfg.num_classes) - if save_videos: output_video_path = os.path.join(save_dir, '..', '{}_vis.mp4'.format(seq)) @@ -501,3 +531,6 @@ class Tracker(object): save_dir, output_video_path) os.system(cmd_str) logger.info('Save video in {}'.format(output_video_path)) + + write_mot_results(result_filename, results, data_type, + self.cfg.num_classes) diff --git a/ppdet/modeling/architectures/jde.py b/ppdet/modeling/architectures/jde.py index 7057a1da9..11b45c8c1 100644 --- a/ppdet/modeling/architectures/jde.py +++ b/ppdet/modeling/architectures/jde.py @@ -16,8 +16,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import paddle -from ppdet.modeling.mot.utils import scale_coords from ppdet.core.workspace import register, create from .meta_arch import BaseArch @@ -73,8 +71,11 @@ class JDE(BaseArch): emb_feats = det_outs['emb_feats'] loss_confs = det_outs['det_losses']['loss_confs'] loss_boxes = det_outs['det_losses']['loss_boxes'] - jde_losses = self.reid(emb_feats, self.inputs, loss_confs, - loss_boxes) + jde_losses = self.reid( + emb_feats, + self.inputs, + loss_confs=loss_confs, + loss_boxes=loss_boxes) return jde_losses else: if self.metric == 'MOTDet': @@ -84,32 +85,18 @@ class JDE(BaseArch): } return det_results - elif self.metric == 'ReID': - emb_feats = det_outs['emb_feats'] - embs_and_gts = self.reid(emb_feats, self.inputs, test_emb=True) - return embs_and_gts - elif self.metric == 'MOT': emb_feats = det_outs['emb_feats'] - emb_outs = self.reid(emb_feats, self.inputs) - + bboxes = det_outs['bbox'] boxes_idx = det_outs['boxes_idx'] - bbox = det_outs['bbox'] - - input_shape = self.inputs['image'].shape[2:] - im_shape = self.inputs['im_shape'] - scale_factor = self.inputs['scale_factor'] - - bbox[:, 2:] = scale_coords(bbox[:, 2:], input_shape, im_shape, - scale_factor) - nms_keep_idx = det_outs['nms_keep_idx'] - pred_dets = paddle.concat((bbox[:, 2:], bbox[:, 1:2], bbox[:, 0:1]), axis=1) - - emb_valid = paddle.gather_nd(emb_outs, boxes_idx) - pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx) - + pred_dets, pred_embs = self.reid( + emb_feats, + self.inputs, + bboxes=bboxes, + boxes_idx=boxes_idx, + nms_keep_idx=nms_keep_idx) return pred_dets, pred_embs else: diff --git a/ppdet/modeling/mot/tracker/base_sde_tracker.py b/ppdet/modeling/mot/tracker/base_sde_tracker.py index 99a20f422..1ab48d487 100644 --- a/ppdet/modeling/mot/tracker/base_sde_tracker.py +++ b/ppdet/modeling/mot/tracker/base_sde_tracker.py @@ -87,6 +87,7 @@ class Track(object): self.state = TrackState.Tentative self.features = [] + self.feat = feature if feature is not None: self.features.append(feature) @@ -125,6 +126,7 @@ class Track(object): self.covariance, detection.to_xyah()) self.features.append(detection.feature) + self.feat = detection.feature self.cls_id = detection.cls_id self.score = detection.score diff --git a/ppdet/modeling/mot/utils.py b/ppdet/modeling/mot/utils.py index 1a39713fa..b3657d257 100644 --- a/ppdet/modeling/mot/utils.py +++ b/ppdet/modeling/mot/utils.py @@ -15,9 +15,8 @@ import os import cv2 import time -import paddle import numpy as np -from .visualization import plot_tracking_dict +from .visualization import plot_tracking_dict, plot_tracking __all__ = [ 'MOTTimer', @@ -157,14 +156,26 @@ def save_vis_results(data, if show_image or save_dir is not None: assert 'ori_image' in data img0 = data['ori_image'].numpy()[0] - online_im = plot_tracking_dict( - img0, - num_classes, - online_tlwhs, - online_ids, - online_scores, - frame_id=frame_id, - fps=1. / average_time) + if online_ids is None: + online_im = img0 + else: + if isinstance(online_tlwhs, dict): + online_im = plot_tracking_dict( + img0, + num_classes, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id, + fps=1. / average_time) + else: + online_im = plot_tracking( + img0, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id, + fps=1. / average_time) if show_image: cv2.imshow('online_im', online_im) if save_dir is not None: @@ -186,45 +197,45 @@ def load_det_results(det_file, num_frames): # [frame_id],[x0],[y0],[w],[h],[score],[class_id] for l in lables_with_frame: results['bbox'].append(l[1:5]) - results['score'].append(l[5]) - results['cls_id'].append(l[6]) + results['score'].append(l[5:6]) + results['cls_id'].append(l[6:7]) results_list.append(results) return results_list def scale_coords(coords, input_shape, im_shape, scale_factor): - im_shape = im_shape.numpy()[0] - ratio = scale_factor[0][0] + # Note: ratio has only one value, scale_factor[0] == scale_factor[1] + # + # This function only used for JDE YOLOv3 or other detectors with + # LetterBoxResize and JDEBBoxPostProcess, coords output from detector had + # not scaled back to the origin image. + + ratio = scale_factor[0] pad_w = (input_shape[1] - int(im_shape[1])) / 2 pad_h = (input_shape[0] - int(im_shape[0])) / 2 - coords = paddle.cast(coords, 'float32') coords[:, 0::2] -= pad_w coords[:, 1::2] -= pad_h coords[:, 0:4] /= ratio - coords[:, :4] = paddle.clip(coords[:, :4], min=0, max=coords[:, :4].max()) + coords[:, :4] = np.clip(coords[:, :4], a_min=0, a_max=coords[:, :4].max()) return coords.round() -def clip_box(xyxy, input_shape, im_shape, scale_factor): - im_shape = im_shape.numpy()[0] - ratio = scale_factor.numpy()[0][0] - img0_shape = [int(im_shape[0] / ratio), int(im_shape[1] / ratio)] - - xyxy[:, 0::2] = paddle.clip(xyxy[:, 0::2], min=0, max=img0_shape[1]) - xyxy[:, 1::2] = paddle.clip(xyxy[:, 1::2], min=0, max=img0_shape[0]) +def clip_box(xyxy, ori_image_shape): + H, W = ori_image_shape + xyxy[:, 0::2] = np.clip(xyxy[:, 0::2], a_min=0, a_max=W) + xyxy[:, 1::2] = np.clip(xyxy[:, 1::2], a_min=0, a_max=H) w = xyxy[:, 2:3] - xyxy[:, 0:1] h = xyxy[:, 3:4] - xyxy[:, 1:2] - mask = paddle.logical_and(h > 0, w > 0) - keep_idx = paddle.nonzero(mask) - xyxy = paddle.gather_nd(xyxy, keep_idx[:, :1]) - return xyxy, keep_idx + mask = np.logical_and(h > 0, w > 0) + keep_idx = np.nonzero(mask) + return xyxy[keep_idx[0]], keep_idx def get_crops(xyxy, ori_img, w, h): crops = [] - xyxy = xyxy.numpy().astype(np.int64) + xyxy = xyxy.astype(np.int64) ori_img = ori_img.numpy() - ori_img = np.squeeze(ori_img, axis=0).transpose(1, 0, 2) + ori_img = np.squeeze(ori_img, axis=0).transpose(1, 0, 2) # [h,w,3]->[w,h,3] for i, bbox in enumerate(xyxy): crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :] crops.append(crop) diff --git a/ppdet/modeling/mot/visualization.py b/ppdet/modeling/mot/visualization.py index 36cd3ba6d..6d13a2877 100644 --- a/ppdet/modeling/mot/visualization.py +++ b/ppdet/modeling/mot/visualization.py @@ -28,7 +28,7 @@ def plot_tracking(image, scores=None, frame_id=0, fps=0., - ids2=None): + ids2names=[]): im = np.ascontiguousarray(np.copy(image)) im_h, im_w = im.shape[:2] @@ -52,15 +52,17 @@ def plot_tracking(image, intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) obj_id = int(obj_ids[i]) id_text = '{}'.format(int(obj_id)) - if ids2 is not None: - id_text = id_text + ', {}'.format(int(ids2[i])) + if ids2names != []: + assert len( + ids2names) == 1, "plot_tracking only supports single classes." + id_text = '{}_'.format(ids2names[0]) + id_text _line_thickness = 1 if obj_id <= 0 else line_thickness color = get_color(abs(obj_id)) cv2.rectangle( im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness) cv2.putText( im, - id_text, (intbox[0], intbox[1] + 10), + id_text, (intbox[0], intbox[1] - 10), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=text_thickness) @@ -69,7 +71,7 @@ def plot_tracking(image, text = '{:.2f}'.format(float(scores[i])) cv2.putText( im, - text, (intbox[0], intbox[1] - 10), + text, (intbox[0], intbox[1] + 10), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), thickness=text_thickness) @@ -83,7 +85,7 @@ def plot_tracking_dict(image, scores_dict, frame_id=0, fps=0., - ids2=None): + ids2names=[]): im = np.ascontiguousarray(np.copy(image)) im_h, im_w = im.shape[:2] @@ -111,10 +113,12 @@ def plot_tracking_dict(image, x1, y1, w, h = tlwh intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) obj_id = int(obj_ids[i]) - if num_classes == 1: - id_text = '{}'.format(int(obj_id)) + + id_text = '{}'.format(int(obj_id)) + if ids2names != []: + id_text = '{}_{}'.format(ids2names[cls_id], id_text) else: - id_text = 'class{}_id{}'.format(cls_id, int(obj_id)) + id_text = 'class{}_{}'.format(cls_id, id_text) _line_thickness = 1 if obj_id <= 0 else line_thickness color = get_color(abs(obj_id)) @@ -126,7 +130,7 @@ def plot_tracking_dict(image, thickness=line_thickness) cv2.putText( im, - id_text, (intbox[0], intbox[1] + 10), + id_text, (intbox[0], intbox[1] - 10), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=text_thickness) @@ -135,7 +139,7 @@ def plot_tracking_dict(image, text = '{:.2f}'.format(float(scores[i])) cv2.putText( im, - text, (intbox[0], intbox[1] - 10), + text, (intbox[0], intbox[1] + 10), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), thickness=text_thickness) diff --git a/ppdet/modeling/necks/centernet_fpn.py b/ppdet/modeling/necks/centernet_fpn.py index 1ca1a4b58..9f2bb04b8 100755 --- a/ppdet/modeling/necks/centernet_fpn.py +++ b/ppdet/modeling/necks/centernet_fpn.py @@ -270,6 +270,8 @@ class CenterNetDLAFPN(nn.Layer): feat = ida_up_feats[-1] if self.with_sge: feat = self.sge_attention(feat) + if self.down_ratio != 4: + feat = F.interpolate(feat, scale_factor=self.down_ratio // 4, mode="bilinear", align_corners=True) return feat @property diff --git a/ppdet/modeling/post_process.py b/ppdet/modeling/post_process.py index 86dbaf9c0..679e09134 100644 --- a/ppdet/modeling/post_process.py +++ b/ppdet/modeling/post_process.py @@ -440,13 +440,13 @@ class CenterNetPostProcess(TTFBox): def __call__(self, hm, wh, reg, im_shape, scale_factor): heat = self._simple_nms(hm) scores, inds, topk_clses, ys, xs = self._topk(heat) - scores = paddle.tensor.unsqueeze(scores, [1]) - clses = paddle.tensor.unsqueeze(topk_clses, [1]) + scores = scores.unsqueeze(1) + clses = topk_clses.unsqueeze(1) reg_t = paddle.transpose(reg, [0, 2, 3, 1]) # Like TTFBox, batch size is 1. # TODO: support batch size > 1 - reg = paddle.reshape(reg_t, [-1, paddle.shape(reg_t)[-1]]) + reg = paddle.reshape(reg_t, [-1, reg_t.shape[-1]]) reg = paddle.gather(reg, inds) xs = paddle.cast(xs, 'float32') ys = paddle.cast(ys, 'float32') @@ -454,7 +454,7 @@ class CenterNetPostProcess(TTFBox): ys = ys + reg[:, 1:2] wh_t = paddle.transpose(wh, [0, 2, 3, 1]) - wh = paddle.reshape(wh_t, [-1, paddle.shape(wh_t)[-1]]) + wh = paddle.reshape(wh_t, [-1, wh_t.shape[-1]]) wh = paddle.gather(wh, inds) if self.regress_ltrb: @@ -486,8 +486,7 @@ class CenterNetPostProcess(TTFBox): scale_x = scale_factor[:, 1:2] scale_expand = paddle.concat( [scale_x, scale_y, scale_x, scale_y], axis=1) - boxes_shape = paddle.shape(bboxes) - boxes_shape.stop_gradient = True + boxes_shape = bboxes.shape[:] scale_expand = paddle.expand(scale_expand, shape=boxes_shape) bboxes = paddle.divide(bboxes, scale_expand) if self.for_mot: diff --git a/ppdet/modeling/reid/fairmot_embedding_head.py b/ppdet/modeling/reid/fairmot_embedding_head.py index f2f24eac6..98ca257fd 100755 --- a/ppdet/modeling/reid/fairmot_embedding_head.py +++ b/ppdet/modeling/reid/fairmot_embedding_head.py @@ -59,15 +59,11 @@ class FairMOTEmbeddingHead(nn.Layer): self.reid_loss = nn.CrossEntropyLoss(ignore_index=-1, reduction='sum') if num_classes == 1: - nID = self.num_identities_dict[0] # single class + nID = self.num_identities_dict[0] # single class self.classifier = nn.Linear( - ch_emb, - nID, - weight_attr=param_attr, - bias_attr=bias_attr) + ch_emb, nID, weight_attr=param_attr, bias_attr=bias_attr) # When num_identities(nID) is 1, emb_scale is set as 1 - self.emb_scale = math.sqrt(2) * math.log( - nID - 1) if nID > 1 else 1 + self.emb_scale = math.sqrt(2) * math.log(nID - 1) if nID > 1 else 1 else: self.classifiers = dict() self.emb_scale_dict = dict() @@ -84,7 +80,7 @@ class FairMOTEmbeddingHead(nn.Layer): input_shape = input_shape[0] return {'in_channels': input_shape.channels} - def process_by_class(self, det_outs, embedding, bbox_inds, topk_clses): + def process_by_class(self, bboxes, embedding, bbox_inds, topk_clses): pred_dets, pred_embs = [], [] for cls_id in range(self.num_classes): inds_masks = topk_clses == cls_id @@ -97,8 +93,8 @@ class FairMOTEmbeddingHead(nn.Layer): cls_inds_mask = inds_masks > 0 bbox_mask = paddle.nonzero(cls_inds_mask) - cls_det_outs = paddle.gather_nd(det_outs, bbox_mask) - pred_dets.append(cls_det_outs) + cls_bboxes = paddle.gather_nd(bboxes, bbox_mask) + pred_dets.append(cls_bboxes) cls_inds = paddle.masked_select(bbox_inds, cls_inds_mask) cls_inds = cls_inds.unsqueeze(-1) @@ -108,12 +104,12 @@ class FairMOTEmbeddingHead(nn.Layer): return paddle.concat(pred_dets), paddle.concat(pred_embs) def forward(self, - feat, + neck_feat, inputs, - det_outs=None, + bboxes=None, bbox_inds=None, topk_clses=None): - reid_feat = self.reid(feat) + reid_feat = self.reid(neck_feat) if self.training: if self.num_classes == 1: loss = self.get_loss(reid_feat, inputs) @@ -121,18 +117,18 @@ class FairMOTEmbeddingHead(nn.Layer): loss = self.get_mc_loss(reid_feat, inputs) return loss else: - assert det_outs is not None and bbox_inds is not None + assert bboxes is not None and bbox_inds is not None reid_feat = F.normalize(reid_feat) embedding = paddle.transpose(reid_feat, [0, 2, 3, 1]) embedding = paddle.reshape(embedding, [-1, self.ch_emb]) # embedding shape: [bs * h * w, ch_emb] if self.num_classes == 1: - pred_dets = det_outs + pred_dets = bboxes pred_embs = paddle.gather(embedding, bbox_inds) else: pred_dets, pred_embs = self.process_by_class( - det_outs, embedding, bbox_inds, topk_clses) + bboxes, embedding, bbox_inds, topk_clses) return pred_dets, pred_embs def get_loss(self, feat, inputs): diff --git a/ppdet/modeling/reid/jde_embedding_head.py b/ppdet/modeling/reid/jde_embedding_head.py index ff2ddf763..c35f8cfb0 100644 --- a/ppdet/modeling/reid/jde_embedding_head.py +++ b/ppdet/modeling/reid/jde_embedding_head.py @@ -17,6 +17,7 @@ from __future__ import division from __future__ import print_function import math +import numpy as np import paddle import paddle.nn as nn import paddle.nn.functional as F @@ -115,31 +116,58 @@ class JDEEmbeddingHead(nn.Layer): def forward(self, identify_feats, - targets=None, + targets, loss_confs=None, loss_boxes=None, - test_emb=False): + bboxes=None, + boxes_idx=None, + nms_keep_idx=None): + assert self.num_classes == 1, 'JDE only support sindle class MOT.' assert len(identify_feats) == self.anchor_levels ide_outs = [] for feat, ide_head in zip(identify_feats, self.identify_outputs): ide_outs.append(ide_head(feat)) if self.training: - assert targets != None assert len(loss_confs) == len(loss_boxes) == self.anchor_levels loss_ides = self.emb_loss(ide_outs, targets, self.emb_scale, self.classifier) - return self.jde_loss(loss_confs, loss_boxes, loss_ides, - self.loss_params_cls, self.loss_params_reg, - self.loss_params_ide, targets) + jde_losses = self.jde_loss( + loss_confs, loss_boxes, loss_ides, self.loss_params_cls, + self.loss_params_reg, self.loss_params_ide, targets) + return jde_losses else: - if test_emb: - assert targets != None - embs_and_gts = self.get_emb_and_gt_outs(ide_outs, targets) - return embs_and_gts - else: - emb_outs = self.get_emb_outs(ide_outs) - return emb_outs + assert bboxes is not None + assert boxes_idx is not None + assert nms_keep_idx is not None + + emb_outs = self.get_emb_outs(ide_outs) + emb_valid = paddle.gather_nd(emb_outs, boxes_idx) + pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx) + + input_shape = targets['image'].shape[2:] + # input_shape: [h, w], before data transforms, set in model config + im_shape = targets['im_shape'][0].numpy() + # im_shape: [new_h, new_w], after data transforms + scale_factor = targets['scale_factor'][0].numpy() + bboxes[:, 2:] = self.scale_coords(bboxes[:, 2:], input_shape, + im_shape, scale_factor) + # tlwhs, scores, cls_ids + pred_dets = paddle.concat( + (bboxes[:, 2:], bboxes[:, 1:2], bboxes[:, 0:1]), axis=1) + return pred_dets, pred_embs + + def scale_coords(self, coords, input_shape, im_shape, scale_factor): + ratio = scale_factor[0] + pad_w = (input_shape[1] - int(im_shape[1])) / 2 + pad_h = (input_shape[0] - int(im_shape[0])) / 2 + coords = paddle.cast(coords, 'float32') + coords[:, 0::2] -= pad_w + coords[:, 1::2] -= pad_h + coords[:, 0:4] /= ratio + coords[:, :4] = paddle.clip( + coords[:, :4], min=0, max=coords[:, :4].max()) + return coords.round() def get_emb_and_gt_outs(self, ide_outs, targets): emb_and_gts = [] diff --git a/ppdet/modeling/reid/pyramidal_embedding.py b/ppdet/modeling/reid/pyramidal_embedding.py index 10bb92b4c..a90d4e1ef 100644 --- a/ppdet/modeling/reid/pyramidal_embedding.py +++ b/ppdet/modeling/reid/pyramidal_embedding.py @@ -37,7 +37,8 @@ class PCBPyramid(nn.Layer): input_ch (int): Number of channels of the input feature. num_stripes (int): Number of sub-parts. used_levels (tuple): Whether the level is used, 1 means used. - num_classes (int): Number of classes for identities. + num_classes (int): Number of classes for identities, default 751 in + Market-1501 dataset. last_conv_stride (int): Stride of the last conv. last_conv_dilation (int): Dilation of the last conv. num_conv_out_channels (int): Number of channels of conv feature. diff --git a/tools/infer_mot.py b/tools/infer_mot.py index 9054f1575..29122f01f 100644 --- a/tools/infer_mot.py +++ b/tools/infer_mot.py @@ -103,7 +103,7 @@ def run(FLAGS, cfg): tracker.load_weights_jde(cfg.weights) # inference - tracker.mot_predict( + tracker.mot_predict_seq( video_file=FLAGS.video_file, frame_rate=FLAGS.frame_rate, image_dir=FLAGS.image_dir, -- GitLab