diff --git a/configs/datasets/dota_ms.yml b/configs/datasets/dota_ms.yml new file mode 100644 index 0000000000000000000000000000000000000000..802e8846d7f443a7032cf49a88bfe79328ea41db --- /dev/null +++ b/configs/datasets/dota_ms.yml @@ -0,0 +1,21 @@ +metric: RBOX +num_classes: 15 + +TrainDataset: + !COCODataSet + image_dir: trainval1024/images + anno_path: trainval1024/DOTA_trainval1024.json + dataset_dir: dataset/dota_ms/ + data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly'] + +EvalDataset: + !COCODataSet + image_dir: trainval1024/images + anno_path: trainval1024/DOTA_trainval1024.json + dataset_dir: dataset/dota_ms/ + data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly'] + +TestDataset: + !ImageFolder + anno_path: test1024/DOTA_test1024.json + dataset_dir: dataset/dota_ms/ diff --git a/configs/rotate/README.md b/configs/rotate/README.md index 574cb4ed5ece2992b7d04587bac977ba19f0d5a1..5558c4a873386687f7761d1481dfcab8eb07dc3e 100644 --- a/configs/rotate/README.md +++ b/configs/rotate/README.md @@ -16,7 +16,15 @@ | 模型 | mAP | 学习率策略 | 角度表示 | 数据增广 | GPU数目 | 每GPU图片数目 | 模型下载 | 配置文件 | |:---:|:----:|:---------:|:-----:|:--------:|:-----:|:------------:|:-------:|:------:| | [S2ANet](./s2anet/README.md) | 73.84 | 2x | le135 | - | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/s2anet_alignconv_2x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/s2anet/s2anet_alignconv_2x_dota.yml) | -| [FCOSR](./fcosr/README.md) | 76.62 | 3x | oc | - | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) | +| [FCOSR](./fcosr/README.md) | 76.62 | 3x | oc | RR | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) | +| [PP-YOLOE-R-s](./ppyoloe_r/README.md) | 73.82 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml) | +| [PP-YOLOE-R-s](./ppyoloe_r/README.md) | 79.42 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml) | +| [PP-YOLOE-R-m](./ppyoloe_r/README.md) | 77.64 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml) | +| [PP-YOLOE-R-m](./ppyoloe_r/README.md) | 79.71 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml) | +| [PP-YOLOE-R-l](./ppyoloe_r/README.md) | 78.14 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml) | +| [PP-YOLOE-R-l](./ppyoloe_r/README.md) | 80.02 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml) | +| [PP-YOLOE-R-x](./ppyoloe_r/README.md) | 78.28 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml) | +| [PP-YOLOE-R-x](./ppyoloe_r/README.md) | 80.73 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml) | **注意:** diff --git a/configs/rotate/README_en.md b/configs/rotate/README_en.md index ef5160ec9f4f0b8f8670a7a0989a05b2be5b982d..fc746ae1283b3f04ebc2592025318c9a34e6fd4e 100644 --- a/configs/rotate/README_en.md +++ b/configs/rotate/README_en.md @@ -15,7 +15,15 @@ Rotated object detection is used to detect rectangular bounding boxes with angle | Model | mAP | Lr Scheduler | Angle | Aug | GPU Number | images/GPU | download | config | |:---:|:----:|:---------:|:-----:|:--------:|:-----:|:------------:|:-------:|:------:| | [S2ANet](./s2anet/README_en.md) | 73.84 | 2x | le135 | - | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/s2anet_alignconv_2x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/s2anet/s2anet_alignconv_2x_dota.yml) | -| [FCOSR](./fcosr/README_en.md) | 76.62 | 3x | oc | - | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) | +| [FCOSR](./fcosr/README_en.md) | 76.62 | 3x | oc | RR | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) | +| [PP-YOLOE-R-s](./ppyoloe_r/README_en.md) | 73.82 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml) | +| [PP-YOLOE-R-s](./ppyoloe_r/README_en.md) | 79.42 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml) | +| [PP-YOLOE-R-m](./ppyoloe_r/README_en.md) | 77.64 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml) | +| [PP-YOLOE-R-m](./ppyoloe_r/README_en.md) | 79.71 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml) | +| [PP-YOLOE-R-l](./ppyoloe_r/README_en.md) | 78.14 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml) | +| [PP-YOLOE-R-l](./ppyoloe_r/README_en.md) | 80.02 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml) | +| [PP-YOLOE-R-x](./ppyoloe_r/README_en.md) | 78.28 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml) | +| [PP-YOLOE-R-x](./ppyoloe_r/README_en.md) | 80.73 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml) | **Notes:** diff --git a/configs/rotate/fcosr/README.md b/configs/rotate/fcosr/README.md index 0113ee1f8d6a9796a8bb91c02787308dd8bbac48..19888eea5a4f60faf611824b4cf4bf2e569b59b9 100644 --- a/configs/rotate/fcosr/README.md +++ b/configs/rotate/fcosr/README.md @@ -17,7 +17,7 @@ | 模型 | Backbone | mAP | 学习率策略 | 角度表示 | 数据增广 | GPU数目 | 每GPU图片数目 | 模型下载 | 配置文件 | |:---:|:--------:|:----:|:---------:|:-----:|:--------:|:-----:|:------------:|:-------:|:------:| -| FCOSR-M | ResNeXt-50 | 76.62 | 3x | oc | - | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) | +| FCOSR-M | ResNeXt-50 | 76.62 | 3x | oc | RR | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) | **注意:** diff --git a/configs/rotate/fcosr/README_en.md b/configs/rotate/fcosr/README_en.md index cf8e49ae47ad2d12badfd5ddfa89cbb3bc3eabe1..ee16d0edb1ca0c312757a5c0402a180c3e502bd2 100644 --- a/configs/rotate/fcosr/README_en.md +++ b/configs/rotate/fcosr/README_en.md @@ -17,7 +17,7 @@ English | [简体中文](README.md) | Model | Backbone | mAP | Lr Scheduler | Angle | Aug | GPU Number | images/GPU | download | config | |:---:|:--------:|:----:|:---------:|:-----:|:--------:|:-----:|:------------:|:-------:|:------:| -| FCOSR-M | ResNeXt-50 | 76.62 | 3x | oc | - | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) | +| FCOSR-M | ResNeXt-50 | 76.62 | 3x | oc | RR | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) | **Notes:** diff --git a/configs/rotate/ppyoloe_r/README.md b/configs/rotate/ppyoloe_r/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0892942ee3c2a11132580c5a24eaf3d866b7b16c --- /dev/null +++ b/configs/rotate/ppyoloe_r/README.md @@ -0,0 +1,147 @@ +简体中文 | [English](README_en.md) + +# PP-YOLOE-R + +## 内容 +- [简介](#简介) +- [模型库](#模型库) +- [使用说明](#使用说明) +- [预测部署](#预测部署) +- [附录](#附录) +- [引用](#引用) + +## 简介 +PP-YOLOE-R是一个高效的单阶段Anchor-free旋转框检测模型。基于PP-YOLOE, PP-YOLOE-R以极少的参数量和计算量为代价,引入了一系列有用的设计来提升检测精度。在DOTA 1.0数据集上,PP-YOLOE-R-l和PP-YOLOE-R-x在单尺度训练和测试的情况下分别达到了78.14和78.27 mAP,这超越了几乎所有的旋转框检测模型。通过多尺度训练和测试,PP-YOLOE-R-l和PP-YOLOE-R-x的检测精度进一步提升至80.02和80.73 mAP。在这种情况下,PP-YOLOE-R-x超越了所有的anchor-free方法并且和最先进的anchor-based的两阶段模型精度几乎相当。此外,PP-YOLOE-R-s和PP-YOLOE-R-m通过多尺度训练和测试可以达到79.42和79.71 mAP。考虑到这两个模型的参数量和计算量,其性能也非常卓越。在保持高精度的同时,PP-YOLOE-R避免使用特殊的算子,例如Deformable Convolution或Rotated RoI Align,以使其能轻松地部署在多种多样的硬件上。在1024x1024的输入分辨率下,PP-YOLOE-R-s/m/l/x在RTX 2080 Ti上使用TensorRT FP16分别能达到69.8/55.1/48.3/37.1 FPS,在Tesla V100上分别能达到114.5/86.8/69.7/50.7 FPS。更多细节可以参考我们的技术报告。 + +
+ +
+ +PP-YOLOE-R相较于PP-YOLOE做了以下几点改动: +- Rotated Task Alignment Learning +- 解耦的角度预测头 +- 使用DFL进行角度预测 +- 可学习的门控单元 +- [ProbIoU损失函数](https://arxiv.org/abs/2106.06072) + +## 模型库 + +| 模型 | Backbone | mAP | V100 TRT FP16 (FPS) | RTX 2080 Ti TRT FP16 (FPS) |学习率策略 | 角度表示 | 数据增广 | GPU数目 | 每GPU图片数目 | 模型下载 | 配置文件 | +|:---:|:--------:|:----:|:--------------------:|:------------:|:--------------------:|:-----:|:--------:|:-------:|:------:|:-----------:|:------:| +| PP-YOLOE-R-s | CRN-s | 73.82 | 114.5 | 69.8 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml) | +| PP-YOLOE-R-s | CRN-s | 79.42 | 114.5 | 69.8 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml) | +| PP-YOLOE-R-m | CRN-m | 77.64 | 86.8 | 55.1 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml) | +| PP-YOLOE-R-m | CRN-m | 79.71 | 86.8 | 55.1 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml) | +| PP-YOLOE-R-l | CRN-l | 78.14 | 69.7 | 48.3 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml) | +| PP-YOLOE-R-l | CRN-l | 80.02 | 69.7 | 48.3 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml) | +| PP-YOLOE-R-x | CRN-x | 78.28 | 50.7 | 37.1 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml) | +| PP-YOLOE-R-x | CRN-x | 80.73 | 50.7 | 37.1 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml) | + +**注意:** + +- 如果**GPU卡数**或者**batch size**发生了改变,你需要按照公式 **lrnew = lrdefault * (batch_sizenew * GPU_numbernew) / (batch_sizedefault * GPU_numberdefault)** 调整学习率。 +- 模型库中的模型默认使用单尺度训练单尺度测试。如果数据增广一栏标明MS,意味着使用多尺度训练和多尺度测试。如果数据增广一栏标明RR,意味着使用RandomRotate数据增广进行训练。 +- CRN表示在PP-YOLOE中提出的CSPRepResNet +- 速度测试使用TensorRT 8.2.3在DOTA测试集中测试2000张图片计算平均值得到。参考速度测试以复现[速度测试](#速度测试) + +## 使用说明 + +参考[数据准备](../README.md#数据准备)准备数据。 + +### 训练 + +GPU单卡训练 +``` bash +CUDA_VISIBLE_DEVICES=0 python tools/train.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml +``` + +GPU多卡训练 +``` bash +CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml +``` + +### 预测 + +执行以下命令预测单张图片,图片预测结果会默认保存在`output`文件夹下面 +``` bash +python tools/infer.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams --infer_img=demo/P0861__1.0__1154___824.png --draw_threshold=0.5 +``` + +### DOTA数据集评估 + +参考[DOTA Task](https://captain-whu.github.io/DOTA/tasks.html), 评估DOTA数据集需要生成一个包含所有检测结果的zip文件,每一类的检测结果储存在一个txt文件中,txt文件中每行格式为:`image_name score x1 y1 x2 y2 x3 y3 x4 y4`。将生成的zip文件提交到[DOTA Evaluation](https://captain-whu.github.io/DOTA/evaluation.html)的Task1进行评估。你可以执行以下命令得到test数据集的预测结果: +``` bash +python tools/infer.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams --infer_dir=/path/to/test/images --output_dir=output_ppyoloe_r --visualize=False --save_results=True +``` +将预测结果处理成官网评估所需要的格式: +``` bash +python configs/rotate/tools/generate_result.py --pred_txt_dir=output_ppyoloe_r/ --output_dir=submit/ --data_type=dota10 + +zip -r submit.zip submit +``` + +### 速度测试 +速度测试需要确保**TensorRT版本大于8.2, PaddlePaddle版本大于2.4.0rc0**。使用Paddle Inference且使用TensorRT进行测速,执行以下命令: + +``` bash +# 导出模型 +python tools/export_model.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams trt=True + +# 速度测试 +CUDA_VISIBLE_DEVICES=0 python configs/rotate/tools/inference_benchmark.py --model_dir output_inference/ppyoloe_r_crn_l_3x_dota/ --image_dir /path/to/dota/test/dir --run_mode trt_fp16 +``` + +## 预测部署 + +**使用Paddle Inference但不使用TensorRT**进行部署,执行以下命令: +``` bash +# 导出模型 +python tools/export_model.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams + +# 预测图片 +python deploy/python/infer.py --image_file demo/P0072__1.0__0___0.png --model_dir=output_inference/ppyoloe_r_crn_l_3x_dota --run_mode=paddle --device=gpu +``` + +**使用Paddle Inference且使用TensorRT**进行部署,执行以下命令: +``` +# 导出模型 +python tools/export_model.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams trt=True + +# 预测图片 +python deploy/python/infer.py --image_file demo/P0072__1.0__0___0.png --model_dir=output_inference/ppyoloe_r_crn_l_3x_dota --run_mode=trt_fp16 --device=gpu +``` + +**注意:** +- 使用Paddle-TRT使用确保PaddlePaddle版本大于2.4.0rc且TensorRT版本大于8.2. + + +## 附录 + +PP-YOLOE-R消融实验 + +| 模型 | mAP | 参数量(M) | FLOPs(G) | +| :-: | :-: | :------: | :------: | +| Baseline | 75.61 | 50.65 | 269.09 | +| +Rotated Task Alignment Learning | 77.24 | 50.65 | 269.09 | +| +Decoupled Angle Prediction Head | 77.78 | 52.20 | 272.72 | +| +Angle Prediction with DFL | 78.01 | 53.29 | 281.65 | +| +Learnable Gating Unit for RepVGG | 78.14 | 53.29 | 281.65 | + + +## 引用 + +``` +@article{xu2022pp, + title={PP-YOLOE: An evolved version of YOLO}, + author={Xu, Shangliang and Wang, Xinxin and Lv, Wenyu and Chang, Qinyao and Cui, Cheng and Deng, Kaipeng and Wang, Guanzhong and Dang, Qingqing and Wei, Shengyu and Du, Yuning and others}, + journal={arXiv preprint arXiv:2203.16250}, + year={2022} +} + +@article{llerena2021gaussian, + title={Gaussian Bounding Boxes and Probabilistic Intersection-over-Union for Object Detection}, + author={Llerena, Jeffri M and Zeni, Luis Felipe and Kristen, Lucas N and Jung, Claudio}, + journal={arXiv preprint arXiv:2106.06072}, + year={2021} +} +``` diff --git a/configs/rotate/ppyoloe_r/README_en.md b/configs/rotate/ppyoloe_r/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..b98cc736fde4b5aa7a392f5f8972f1d869be6c16 --- /dev/null +++ b/configs/rotate/ppyoloe_r/README_en.md @@ -0,0 +1,145 @@ +English | [简体中文](README.md) + +# PP-YOLOE-R + +## Content +- [Introduction](#Introduction) +- [Model Zoo](#Model-Zoo) +- [Getting Start](#Getting-Start) +- [Deployment](#Deployment) +- [Appendix](#Appendix) +- [Citations](#Citations) + +## Introduction +PP-YOLOE-R is an efficient anchor-free rotated object detector. Based on PP-YOLOE, PP-YOLOE-R introduces a bag of useful tricks to improve detection precision at the expense of marginal parameters and computations.PP-YOLOE-R-l and PP-YOLOE-R-x achieve 78.14 and 78.27 mAP respectively on DOTA 1.0 dataset with single-scale training and testing, which outperform almost all other rotated object detectors. With multi-scale training and testing, the detection precision of PP-YOLOE-R-l and PP-YOLOE-R-x is further improved to 80.02 and 80.73 mAP. In this case, PP-YOLOE-R-x surpasses all anchor-free methods and demonstrates competitive performance to state-of-the-art anchor-based two-stage model. Moreover, PP-YOLOE-R-s and PP-YOLOE-R-m can achieve 79.42 and 79.71 mAP with multi-scale training and testing, which is an excellent result considering the parameters and GLOPS of these two models. While maintaining high precision, PP-YOLOE-R avoids using special operators, such as Deformable Convolution or Rotated RoI Align, to be deployed friendly on various hardware. At the input resolution of 1024$\times$1024, PP-YOLOE-R-s/m/l/x can reach 69.8/55.1/48.3/37.1 FPS on RTX 2080 Ti and 114.5/86.8/69.7/50.7 FPS on Tesla V100 GPU with TensorRT and FP16-precision. For more details, please refer to our technical report. + +
+ +
+ +Compared with PP-YOLOE, PP-YOLOE-R has made the following changes: +- Rotated Task Alignment Learning +- Decoupled Angle Prediction Head +- Angle Prediction with DFL +- Learnable Gating Unit for RepVGG +- [ProbIoU Loss](https://arxiv.org/abs/2106.06072) + +## Model Zoo +| Model | Backbone | mAP | V100 TRT FP16 (FPS) | RTX 2080 Ti TRT FP16 (FPS) | Lr Scheduler | Angle | Aug | GPU Number | images/GPU | download | config | +|:---:|:--------:|:----:|:--------------------:|:------------:|:--------------------:|:-----:|:--------:|:-------:|:------:|:-----------:|:------:| +| PP-YOLOE-R-s | CRN-s | 73.82 | 114.5 | 69.8 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml) | +| PP-YOLOE-R-s | CRN-s | 79.42 | 114.5 | 69.8 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml) | +| PP-YOLOE-R-m | CRN-m | 77.64 | 86.8 | 55.1 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml) | +| PP-YOLOE-R-m | CRN-m | 79.71 | 86.8 | 55.1 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml) | +| PP-YOLOE-R-l | CRN-l | 78.14 | 69.7 | 48.3 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml) | +| PP-YOLOE-R-l | CRN-l | 80.02 | 69.7 | 48.3 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml) | +| PP-YOLOE-R-x | CRN-x | 78.28 | 50.7 | 37.1 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml) | +| PP-YOLOE-R-x | CRN-x | 80.73 | 50.7 | 37.1 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml) | + +**Notes:** + +- if **GPU number** or **mini-batch size** is changed, **learning rate** should be adjusted according to the formula **lrnew = lrdefault * (batch_sizenew * GPU_numbernew) / (batch_sizedefault * GPU_numberdefault)**. +- Models in model zoo is trained and tested with single scale by default. If `MS` is indicated in the data augmentation column, it means that multi-scale training and multi-scale testing are used. If `RR` is indicated in the data augmentation column, it means that RandomRotate data augmentation is used for training. +- CRN denotes CSPRepResNet proposed in PP-YOLOE +- Speed ​​is calculated and averaged by testing 2000 images on the DOTA test dataset. Refer to [Speed testing](#Speed-testing) to reproduce the results. + +## Getting Start + +Refer to [Data-Preparation](../README_en.md#Data-Preparation) to prepare data. + +### Training + +Single GPU Training +``` bash +CUDA_VISIBLE_DEVICES=0 python tools/train.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml +``` + +Multiple GPUs Training +``` bash +CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml +``` + +### Inference + +Run the follow command to infer single image, the result of inference will be saved in `output` directory by default. + +``` bash +python tools/infer.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams --infer_img=demo/P0861__1.0__1154___824.png --draw_threshold=0.5 +``` + +### Evaluation on DOTA Dataset +Refering to [DOTA Task](https://captain-whu.github.io/DOTA/tasks.html), You need to submit a zip file containing results for all test images for evaluation. The detection results of each category are stored in a txt file, each line of which is in the following format +`image_id score x1 y1 x2 y2 x3 y3 x4 y4`. To evaluate, you should submit the generated zip file to the Task1 of [DOTA Evaluation](https://captain-whu.github.io/DOTA/evaluation.html). You can run the following command to get the inference results of test dataset: +``` bash +python tools/infer.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams --infer_dir=/path/to/test/images --output_dir=output_ppyoloe_r --visualize=False --save_results=True +``` +Process the prediction results into the format required for the official website evaluation: +``` bash +python configs/rotate/tools/generate_result.py --pred_txt_dir=output_ppyoloe_r/ --output_dir=submit/ --data_type=dota10 + +zip -r submit.zip submit +``` + +### Speed testing + +To test speed, make sure that **the version of TensorRT is larger than 8.2 and the version of PaddlePaddle is larger than 2.4.0rc**. Using Paddle Inference with TensorRT to test speed, run following command + +``` bash +# export inference model +python tools/export_model.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams trt=True + +# speed testing +CUDA_VISIBLE_DEVICES=0 python configs/rotate/tools/inference_benchmark.py --model_dir output_inference/ppyoloe_r_crn_l_3x_dota/ --image_dir /path/to/dota/test/dir --run_mode trt_fp16 +``` + +## Deployment + +**Using Paddle Inference without TensorRT** to for deployment, run following command + +``` bash +# export inference model +python tools/export_model.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams + +# inference single image +python deploy/python/infer.py --image_file demo/P0072__1.0__0___0.png --model_dir=output_inference/ppyoloe_r_crn_l_3x_dota --run_mode=paddle --device=gpu +``` + +**Using Paddle Inference with TensorRT** to for deployment, run following command + +``` bash +# export inference model +python tools/export_model.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams trt=True + +# inference single image +python deploy/python/infer.py --image_file demo/P0072__1.0__0___0.png --model_dir=output_inference/ppyoloe_r_crn_l_3x_dota --run_mode=trt_fp16 --device=gpu +``` + +## Appendix + +Ablation experiments of PP-YOLOE-R + +| Model | mAP | Params(M) | FLOPs(G) | +| :-: | :-: | :------: | :------: | +| Baseline | 75.61 | 50.65 | 269.09 | +| +Rotated Task Alignment Learning | 77.24 | 50.65 | 269.09 | +| +Decoupled Angle Prediction Head | 77.78 | 52.20 | 272.72 | +| +Angle Prediction with DFL | 78.01 | 53.29 | 281.65 | +| +Learnable Gating Unit for RepVGG | 78.14 | 53.29 | 281.65 | + +## Citations + +``` +@article{xu2022pp, + title={PP-YOLOE: An evolved version of YOLO}, + author={Xu, Shangliang and Wang, Xinxin and Lv, Wenyu and Chang, Qinyao and Cui, Cheng and Deng, Kaipeng and Wang, Guanzhong and Dang, Qingqing and Wei, Shengyu and Du, Yuning and others}, + journal={arXiv preprint arXiv:2203.16250}, + year={2022} +} + +@article{llerena2021gaussian, + title={Gaussian Bounding Boxes and Probabilistic Intersection-over-Union for Object Detection}, + author={Llerena, Jeffri M and Zeni, Luis Felipe and Kristen, Lucas N and Jung, Claudio}, + journal={arXiv preprint arXiv:2106.06072}, + year={2021} +} +``` diff --git a/configs/rotate/ppyoloe_r/_base_/optimizer_3x.yml b/configs/rotate/ppyoloe_r/_base_/optimizer_3x.yml new file mode 100644 index 0000000000000000000000000000000000000000..1cdad4beb093deeef0b6918b88b81fc5964e95ce --- /dev/null +++ b/configs/rotate/ppyoloe_r/_base_/optimizer_3x.yml @@ -0,0 +1,19 @@ +epoch: 36 + +LearningRate: + base_lr: 0.008 + schedulers: + - !CosineDecay + max_epochs: 44 + - !LinearWarmup + start_factor: 0. + steps: 1000 + +OptimizerBuilder: + clip_grad_by_norm: 35. + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 diff --git a/configs/rotate/ppyoloe_r/_base_/ppyoloe_r_crn.yml b/configs/rotate/ppyoloe_r/_base_/ppyoloe_r_crn.yml new file mode 100644 index 0000000000000000000000000000000000000000..ab5bdb50aa731e3af664b68aa52b3c7293d715e8 --- /dev/null +++ b/configs/rotate/ppyoloe_r/_base_/ppyoloe_r_crn.yml @@ -0,0 +1,49 @@ +architecture: YOLOv3 +norm_type: sync_bn +use_ema: true +ema_decay: 0.9998 + +YOLOv3: + backbone: CSPResNet + neck: CustomCSPPAN + yolo_head: PPYOLOERHead + post_process: ~ + +CSPResNet: + layers: [3, 6, 6, 3] + channels: [64, 128, 256, 512, 1024] + return_idx: [1, 2, 3] + use_large_stem: True + use_alpha: True + +CustomCSPPAN: + out_channels: [768, 384, 192] + stage_num: 1 + block_num: 3 + act: 'swish' + spp: true + use_alpha: True + +PPYOLOERHead: + fpn_strides: [32, 16, 8] + grid_cell_offset: 0.5 + use_varifocal_loss: true + static_assigner_epoch: -1 + loss_weight: {class: 1.0, iou: 2.5, dfl: 0.05} + static_assigner: + name: FCOSRAssigner + factor: 12 + threshold: 0.23 + boundary: [[512, 10000], [256, 512], [-1, 256]] + assigner: + name: RotatedTaskAlignedAssigner + topk: 13 + alpha: 1.0 + beta: 6.0 + nms: + name: MultiClassNMS + nms_top_k: 2000 + keep_top_k: -1 + score_threshold: 0.1 + nms_threshold: 0.1 + normalized: False diff --git a/configs/rotate/ppyoloe_r/_base_/ppyoloe_r_reader.yml b/configs/rotate/ppyoloe_r/_base_/ppyoloe_r_reader.yml new file mode 100644 index 0000000000000000000000000000000000000000..aa9de88b60d62a09b38bfbe2a4b7af55c43626b1 --- /dev/null +++ b/configs/rotate/ppyoloe_r/_base_/ppyoloe_r_reader.yml @@ -0,0 +1,45 @@ +worker_num: 4 +image_height: &image_height 1024 +image_width: &image_width 1024 +image_size: &image_size [*image_height, *image_width] + +TrainReader: + sample_transforms: + - Decode: {} + - Poly2Array: {} + - RandomRFlip: {} + - RandomRRotate: {angle_mode: 'value', angle: [0, 90, 180, -90]} + - RandomRRotate: {angle_mode: 'value', angle: [30, 60], rotate_prob: 0.5} + - RResize: {target_size: *image_size, keep_ratio: True, interp: 2} + - Poly2RBox: {filter_threshold: 2, filter_mode: 'edge', rbox_type: 'oc'} + batch_transforms: + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + - PadRGT: {} + - PadBatch: {pad_to_stride: 32} + batch_size: 2 + shuffle: true + drop_last: true + use_shared_memory: true + collate_batch: true + +EvalReader: + sample_transforms: + - Decode: {} + - Poly2Array: {} + - RResize: {target_size: *image_size, keep_ratio: True, interp: 2} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + batch_transforms: + - PadBatch: {pad_to_stride: 32} + batch_size: 2 + +TestReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: *image_size, keep_ratio: True, interp: 2} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + batch_transforms: + - PadBatch: {pad_to_stride: 32} + batch_size: 8 diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml new file mode 100644 index 0000000000000000000000000000000000000000..b019d736c19b35423cb536eea0cf0e55036c2af7 --- /dev/null +++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml @@ -0,0 +1,15 @@ +_BASE_: [ + '../../datasets/dota.yml', + '../../runtime.yml', + '_base_/optimizer_3x.yml', + '_base_/ppyoloe_r_reader.yml', + '_base_/ppyoloe_r_crn.yml' +] + +log_iter: 50 +snapshot_epoch: 1 +weights: output/ppyoloe_r_crn_l_3x_dota/model_final + +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_l_pretrained.pdparams +depth_mult: 1.0 +width_mult: 1.0 diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml new file mode 100644 index 0000000000000000000000000000000000000000..a1411a3153dfae89d722d4895039b15370094c45 --- /dev/null +++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml @@ -0,0 +1,15 @@ +_BASE_: [ + '../../datasets/dota_ms.yml', + '../../runtime.yml', + '_base_/optimizer_3x.yml', + '_base_/ppyoloe_r_reader.yml', + '_base_/ppyoloe_r_crn.yml' +] + +log_iter: 50 +snapshot_epoch: 1 +weights: output/ppyoloe_r_crn_l_3x_dota/model_final + +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_l_pretrained.pdparams +depth_mult: 1.0 +width_mult: 1.0 diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml new file mode 100644 index 0000000000000000000000000000000000000000..755cf3f4e5bb93072779cf83344124c6d28cb925 --- /dev/null +++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml @@ -0,0 +1,15 @@ +_BASE_: [ + '../../datasets/dota.yml', + '../../runtime.yml', + '_base_/optimizer_3x.yml', + '_base_/ppyoloe_r_reader.yml', + '_base_/ppyoloe_r_crn.yml' +] + +log_iter: 50 +snapshot_epoch: 1 +weights: output/ppyoloe_r_crn_m_3x_dota/model_final + +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_m_pretrained.pdparams +depth_mult: 0.67 +width_mult: 0.75 diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml new file mode 100644 index 0000000000000000000000000000000000000000..d885b459ff61f5ab7b3dcdcf55b80f1d6a3d6a4f --- /dev/null +++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml @@ -0,0 +1,15 @@ +_BASE_: [ + '../../datasets/dota_ms.yml', + '../../runtime.yml', + '_base_/optimizer_3x.yml', + '_base_/ppyoloe_r_reader.yml', + '_base_/ppyoloe_r_crn.yml' +] + +log_iter: 50 +snapshot_epoch: 1 +weights: output/ppyoloe_r_crn_m_3x_dota/model_final + +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_m_pretrained.pdparams +depth_mult: 0.67 +width_mult: 0.75 diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml new file mode 100644 index 0000000000000000000000000000000000000000..a227f18ac2ddb93e7af79d2452ea7e043cfe3eb0 --- /dev/null +++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml @@ -0,0 +1,15 @@ +_BASE_: [ + '../../datasets/dota.yml', + '../../runtime.yml', + '_base_/optimizer_3x.yml', + '_base_/ppyoloe_r_reader.yml', + '_base_/ppyoloe_r_crn.yml' +] + +log_iter: 50 +snapshot_epoch: 1 +weights: output/ppyoloe_r_crn_s_3x_dota/model_final + +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_s_pretrained.pdparams +depth_mult: 0.33 +width_mult: 0.50 diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml new file mode 100644 index 0000000000000000000000000000000000000000..921a9d571b730d3f57865e51baca6d37080d42a1 --- /dev/null +++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml @@ -0,0 +1,15 @@ +_BASE_: [ + '../../datasets/dota_ms.yml', + '../../runtime.yml', + '_base_/optimizer_3x.yml', + '_base_/ppyoloe_r_reader.yml', + '_base_/ppyoloe_r_crn.yml' +] + +log_iter: 50 +snapshot_epoch: 1 +weights: output/ppyoloe_r_crn_s_3x_dota/model_final + +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_s_pretrained.pdparams +depth_mult: 0.33 +width_mult: 0.50 diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml new file mode 100644 index 0000000000000000000000000000000000000000..d81b5ef9861fcef9e044c792894f671886037182 --- /dev/null +++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml @@ -0,0 +1,15 @@ +_BASE_: [ + '../../datasets/dota.yml', + '../../runtime.yml', + '_base_/optimizer_3x.yml', + '_base_/ppyoloe_r_reader.yml', + '_base_/ppyoloe_r_crn.yml' +] + +log_iter: 50 +snapshot_epoch: 1 +weights: output/ppyoloe_r_crn_x_3x_dota/model_final + +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_x_pretrained.pdparams +depth_mult: 1.33 +width_mult: 1.25 diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml new file mode 100644 index 0000000000000000000000000000000000000000..d99cdb0787109cdd88054d15967ddf4bfbb2b52f --- /dev/null +++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml @@ -0,0 +1,15 @@ +_BASE_: [ + '../../datasets/dota_ms.yml', + '../../runtime.yml', + '_base_/optimizer_3x.yml', + '_base_/ppyoloe_r_reader.yml', + '_base_/ppyoloe_r_crn.yml' +] + +log_iter: 50 +snapshot_epoch: 1 +weights: output/ppyoloe_r_crn_x_3x_dota/model_final + +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_x_pretrained.pdparams +depth_mult: 1.33 +width_mult: 1.25 diff --git a/configs/rotate/tools/inference_benchmark.py b/configs/rotate/tools/inference_benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..dcce2d2feafacc2d00cae4b16252265d92776c91 --- /dev/null +++ b/configs/rotate/tools/inference_benchmark.py @@ -0,0 +1,356 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import six +import glob +import time +import yaml +import argparse +import cv2 +import numpy as np + +import paddle +import paddle.version as paddle_version +from paddle.inference import Config, create_predictor, PrecisionType, get_trt_runtime_version + + +TUNED_TRT_DYNAMIC_MODELS = {'DETR'} + +def check_version(version='2.2'): + err = "PaddlePaddle version {} or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code.".format(version) + + version_installed = [ + paddle_version.major, paddle_version.minor, paddle_version.patch, + paddle_version.rc + ] + + if version_installed == ['0', '0', '0', '0']: + return + + version_split = version.split('.') + + length = min(len(version_installed), len(version_split)) + for i in six.moves.range(length): + if version_installed[i] > version_split[i]: + return + if version_installed[i] < version_split[i]: + raise Exception(err) + + +def check_trt_version(version='8.2'): + err = "TensorRT version {} or higher is required," \ + "Please make sure the version is good with your code.".format(version) + version_split = list(map(int, version.split('.'))) + version_installed = get_trt_runtime_version() + length = min(len(version_installed), len(version_split)) + for i in six.moves.range(length): + if version_installed[i] > version_split[i]: + return + if version_installed[i] < version_split[i]: + raise Exception(err) + + +# preprocess ops +def decode_image(im_file, im_info): + if isinstance(im_file, str): + with open(im_file, 'rb') as f: + im_read = f.read() + data = np.frombuffer(im_read, dtype='uint8') + im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + else: + im = im_file + im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32) + im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32) + return im, im_info + +class Resize(object): + + def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR): + if isinstance(target_size, int): + target_size = [target_size, target_size] + self.target_size = target_size + self.keep_ratio = keep_ratio + self.interp = interp + + def __call__(self, im, im_info): + assert len(self.target_size) == 2 + assert self.target_size[0] > 0 and self.target_size[1] > 0 + im_channel = im.shape[2] + im_scale_y, im_scale_x = self.generate_scale(im) + im = cv2.resize( + im, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp) + im_info['im_shape'] = np.array(im.shape[:2]).astype('float32') + im_info['scale_factor'] = np.array( + [im_scale_y, im_scale_x]).astype('float32') + return im, im_info + + def generate_scale(self, im): + origin_shape = im.shape[:2] + im_c = im.shape[2] + if self.keep_ratio: + im_size_min = np.min(origin_shape) + im_size_max = np.max(origin_shape) + target_size_min = np.min(self.target_size) + target_size_max = np.max(self.target_size) + im_scale = float(target_size_min) / float(im_size_min) + if np.round(im_scale * im_size_max) > target_size_max: + im_scale = float(target_size_max) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + else: + resize_h, resize_w = self.target_size + im_scale_y = resize_h / float(origin_shape[0]) + im_scale_x = resize_w / float(origin_shape[1]) + return im_scale_y, im_scale_x + +class Permute(object): + + def __init__(self, ): + super(Permute, self).__init__() + + def __call__(self, im, im_info): + im = im.transpose((2, 0, 1)) + return im, im_info + +class NormalizeImage(object): + def __init__(self, mean, std, is_scale=True, norm_type='mean_std'): + self.mean = mean + self.std = std + self.is_scale = is_scale + self.norm_type = norm_type + + def __call__(self, im, im_info): + im = im.astype(np.float32, copy=False) + if self.is_scale: + scale = 1.0 / 255.0 + im *= scale + + if self.norm_type == 'mean_std': + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + im -= mean + im /= std + return im, im_info + + +class PadStride(object): + + def __init__(self, stride=0): + self.coarsest_stride = stride + + def __call__(self, im, im_info): + coarsest_stride = self.coarsest_stride + if coarsest_stride <= 0: + return im, im_info + im_c, im_h, im_w = im.shape + pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) + pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) + padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = im + return padding_im, im_info + + +def preprocess(im, preprocess_ops): + # process image by preprocess_ops + im_info = { + 'scale_factor': np.array( + [1., 1.], dtype=np.float32), + 'im_shape': None, + } + im, im_info = decode_image(im, im_info) + for operator in preprocess_ops: + im, im_info = operator(im, im_info) + return im, im_info + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--model_dir', type=str, help='directory of inference model') + parser.add_argument('--run_mode', type=str, default='paddle', help='running mode') + parser.add_argument('--batch_size', type=int, default=1, help='batch size') + parser.add_argument('--image_dir', type=str, default='/paddle/data/DOTA_1024_ss/test1024/images', help='directory of test images') + parser.add_argument('--warmup_iter', type=int, default=5, help='num of warmup iters') + parser.add_argument('--total_iter', type=int, default=2000, help='num of total iters') + parser.add_argument('--log_iter', type=int, default=50, help='num of log interval') + parser.add_argument('--tuned_trt_shape_file', type=str, default='shape_range_info.pbtxt', help='dynamic shape range info') + args = parser.parse_args() + return args + + +def init_predictor(FLAGS): + model_dir, run_mode, batch_size = FLAGS.model_dir, FLAGS.run_mode, FLAGS.batch_size + yaml_file = os.path.join(model_dir, 'infer_cfg.yml') + with open(yaml_file) as f: + yml_conf = yaml.safe_load(f) + + config = Config( + os.path.join(model_dir, 'model.pdmodel'), + os.path.join(model_dir, 'model.pdiparams')) + + # initial GPU memory(M), device ID + config.enable_use_gpu(200, 0) + # optimize graph and fuse op + config.switch_ir_optim(True) + + precision_map = { + 'trt_int8': Config.Precision.Int8, + 'trt_fp32': Config.Precision.Float32, + 'trt_fp16': Config.Precision.Half + } + + arch = yml_conf['arch'] + tuned_trt_shape_file = os.path.join(model_dir, FLAGS.tuned_trt_shape_file) + + if run_mode in precision_map.keys(): + if arch in TUNED_TRT_DYNAMIC_MODELS and not os.path.exists(tuned_trt_shape_file): + print('dynamic shape range info is saved in {}. After that, rerun the code'.format(tuned_trt_shape_file)) + config.collect_shape_range_info(tuned_trt_shape_file) + config.enable_tensorrt_engine( + workspace_size=(1 << 25) * batch_size, + max_batch_size=batch_size, + min_subgraph_size=yml_conf['min_subgraph_size'], + precision_mode=precision_map[run_mode], + use_static=True, + use_calib_mode=False) + + if yml_conf['use_dynamic_shape']: + if arch in TUNED_TRT_DYNAMIC_MODELS and os.path.exists(tuned_trt_shape_file): + config.enable_tuned_tensorrt_dynamic_shape(tuned_trt_shape_file, True) + else: + min_input_shape = { + 'image': [batch_size, 3, 640, 640], + 'scale_factor': [batch_size, 2] + } + max_input_shape = { + 'image': [batch_size, 3, 1280, 1280], + 'scale_factor': [batch_size, 2] + } + opt_input_shape = { + 'image': [batch_size, 3, 1024, 1024], + 'scale_factor': [batch_size, 2] + } + config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, + opt_input_shape) + + # disable print log when predict + config.disable_glog_info() + # enable shared memory + config.enable_memory_optim() + # disable feed, fetch OP, needed by zero_copy_run + config.switch_use_feed_fetch_ops(False) + predictor = create_predictor(config) + return predictor, yml_conf + +def create_preprocess_ops(yml_conf): + preprocess_ops = [] + for op_info in yml_conf['Preprocess']: + new_op_info = op_info.copy() + op_type = new_op_info.pop('type') + preprocess_ops.append(eval(op_type)(**new_op_info)) + return preprocess_ops + + +def get_test_images(image_dir): + images = set() + infer_dir = os.path.abspath(image_dir) + exts = ['jpg', 'jpeg', 'png', 'bmp'] + exts += [ext.upper() for ext in exts] + for ext in exts: + images.update(glob.glob('{}/*.{}'.format(infer_dir, ext))) + images = list(images) + return images + + +def create_inputs(image_files, preprocess_ops): + inputs = dict() + im_list, im_info_list = [], [] + for im_path in image_files: + im, im_info = preprocess(im_path, preprocess_ops) + im_list.append(im) + im_info_list.append(im_info) + + inputs['im_shape'] = np.stack([e['im_shape'] for e in im_info_list], axis=0).astype('float32') + inputs['scale_factor'] = np.stack([e['scale_factor'] for e in im_info_list], axis=0).astype('float32') + inputs['image'] = np.stack(im_list, axis=0).astype('float32') + return inputs + + +def measure_speed(FLAGS): + predictor, yml_conf = init_predictor(FLAGS) + input_names = predictor.get_input_names() + preprocess_ops = create_preprocess_ops(yml_conf) + + image_files = get_test_images(FLAGS.image_dir) + + batch_size = FLAGS.batch_size + warmup_iter, log_iter, total_iter = FLAGS.warmup_iter, FLAGS.log_iter, FLAGS.total_iter + + total_time = 0 + fps = 0 + for i in range(0, total_iter, batch_size): + # make data ready + inputs = create_inputs(image_files[i:i + batch_size], preprocess_ops) + for name in input_names: + input_tensor = predictor.get_input_handle(name) + input_tensor.copy_from_cpu(inputs[name]) + + paddle.device.cuda.synchronize() + # start running + start_time = time.perf_counter() + predictor.run() + paddle.device.cuda.synchronize() + + if i >= warmup_iter: + total_time += time.perf_counter() - start_time + if (i + 1) % log_iter == 0: + fps = (i + 1 - warmup_iter) / total_time + print( + f'Done image [{i + 1:<3}/ {total_iter}], ' + f'fps: {fps:.1f} img / s, ' + f'times per image: {1000 / fps:.1f} ms / img', + flush=True) + + if (i + 1) == total_iter: + fps = (i + 1 - warmup_iter) / total_time + print( + f'Overall fps: {fps:.1f} img / s, ' + f'times per image: {1000 / fps:.1f} ms / img', + flush=True) + break + +if __name__ == '__main__': + FLAGS = parse_args() + check_version('2.4') + check_trt_version('8.2') + measure_speed(FLAGS) + + + + + + diff --git a/docs/MODEL_ZOO_cn.md b/docs/MODEL_ZOO_cn.md index 50a835b50da1934876d5824a1b3d08ac32ed2490..7235b009ddb2d1264377a37d835d8cf5b9106b0f 100644 --- a/docs/MODEL_ZOO_cn.md +++ b/docs/MODEL_ZOO_cn.md @@ -110,9 +110,7 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型 ## 旋转框检测 -### S2ANet - -请参考[S2ANet](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dota/) +[旋转框检测模型库](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate) ## 关键点检测 diff --git a/docs/MODEL_ZOO_en.md b/docs/MODEL_ZOO_en.md index 599121ac08e77b4b8c346ef5f179d414c2ebc304..c26e5c1db155a63857cab458f426123583218aea 100644 --- a/docs/MODEL_ZOO_en.md +++ b/docs/MODEL_ZOO_en.md @@ -107,12 +107,9 @@ Please refer to[YOLOv6](https://github.com/nemonameless/PaddleDetection_YOLOSeri Please refer to[YOLOv7](https://github.com/nemonameless/PaddleDetection_YOLOSeries/tree/develop/configs/yolov7) -## Rotating frame detection - -### S2ANet - -Please refer to[S2ANet](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dota/) +## Rotated Object detection +[Model Zoo for Rotated Object Detection](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate) ## KeyPoint Detection diff --git a/docs/images/ppyoloe_r_map_fps.png b/docs/images/ppyoloe_r_map_fps.png new file mode 100644 index 0000000000000000000000000000000000000000..2d4553b97e96a63c428b08a2da9d0f8880e72be8 Binary files /dev/null and b/docs/images/ppyoloe_r_map_fps.png differ diff --git a/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc b/ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cc similarity index 68% rename from ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc rename to ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cc index 2c3c58b606c22607272d6d37877d11399d7542d9..b16e8c1f2ef93c322fe062af1735189d3eb98f47 100644 --- a/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc +++ b/ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cc @@ -13,14 +13,14 @@ // limitations under the License. // // The code is based on -// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated +// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/ +#include "../rbox_iou/rbox_iou_utils.h" #include "paddle/extension.h" -#include "rbox_iou_op.h" template void matched_rbox_iou_cpu_kernel(const int rbox_num, const T *rbox1_data_ptr, - const T *rbox2_data_ptr, T *output_data_ptr) { + const T *rbox2_data_ptr, T *output_data_ptr) { int i; for (i = 0; i < rbox_num; i++) { @@ -30,42 +30,43 @@ void matched_rbox_iou_cpu_kernel(const int rbox_num, const T *rbox1_data_ptr, } #define CHECK_INPUT_CPU(x) \ - PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.") + PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") -std::vector MatchedRboxIouCPUForward(const paddle::Tensor &rbox1, - const paddle::Tensor &rbox2) { +std::vector +MatchedRboxIouCPUForward(const paddle::Tensor &rbox1, + const paddle::Tensor &rbox2) { CHECK_INPUT_CPU(rbox1); CHECK_INPUT_CPU(rbox2); PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim"); auto rbox_num = rbox1.shape()[0]; - auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox_num}); + auto output = paddle::empty({rbox_num}, rbox1.dtype(), paddle::CPUPlace()); - PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "rotated_iou_cpu_kernel", ([&] { + PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "matched_rbox_iou_cpu_kernel", ([&] { matched_rbox_iou_cpu_kernel( rbox_num, rbox1.data(), - rbox2.data(), - output.mutable_data()); + rbox2.data(), output.data()); })); return {output}; } #ifdef PADDLE_WITH_CUDA -std::vector MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1, - const paddle::Tensor &rbox2); +std::vector +MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1, + const paddle::Tensor &rbox2); #endif #define CHECK_INPUT_SAME(x1, x2) \ PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.") std::vector MatchedRboxIouForward(const paddle::Tensor &rbox1, - const paddle::Tensor &rbox2) { + const paddle::Tensor &rbox2) { CHECK_INPUT_SAME(rbox1, rbox2); - if (rbox1.place() == paddle::PlaceType::kCPU) { + if (rbox1.is_cpu()) { return MatchedRboxIouCPUForward(rbox1, rbox2); #ifdef PADDLE_WITH_CUDA - } else if (rbox1.place() == paddle::PlaceType::kGPU) { + } else if (rbox1.is_gpu()) { return MatchedRboxIouCUDAForward(rbox1, rbox2); #endif } @@ -73,12 +74,12 @@ std::vector MatchedRboxIouForward(const paddle::Tensor &rbox1, std::vector> MatchedRboxIouInferShape(std::vector rbox1_shape, - std::vector rbox2_shape) { + std::vector rbox2_shape) { return {{rbox1_shape[0]}}; } std::vector MatchedRboxIouInferDtype(paddle::DataType t1, - paddle::DataType t2) { + paddle::DataType t2) { return {t1}; } diff --git a/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu b/ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cu similarity index 72% rename from ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu rename to ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cu index 8d03ecce6a775162980746adf727738a6beb102b..53454d106392f208e72a5e1d1fd6e9bcf609927f 100644 --- a/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu +++ b/ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cu @@ -13,21 +13,15 @@ // limitations under the License. // // The code is based on -// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated +// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/ +#include "../rbox_iou/rbox_iou_utils.h" #include "paddle/extension.h" -#include "rbox_iou_op.h" - -/** - Computes ceil(a / b) -*/ - -static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; } template __global__ void matched_rbox_iou_cuda_kernel(const int rbox_num, const T *rbox1_data_ptr, - const T *rbox2_data_ptr, T *output_data_ptr) { + const T *rbox2_data_ptr, T *output_data_ptr) { for (int tid = blockIdx.x * blockDim.x + threadIdx.x; tid < rbox_num; tid += blockDim.x * gridDim.x) { output_data_ptr[tid] = @@ -36,17 +30,18 @@ matched_rbox_iou_cuda_kernel(const int rbox_num, const T *rbox1_data_ptr, } #define CHECK_INPUT_GPU(x) \ - PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.") + PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.") -std::vector MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1, - const paddle::Tensor &rbox2) { +std::vector +MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1, + const paddle::Tensor &rbox2) { CHECK_INPUT_GPU(rbox1); CHECK_INPUT_GPU(rbox2); PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim"); auto rbox_num = rbox1.shape()[0]; - auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox_num}); + auto output = paddle::empty({rbox_num}, rbox1.dtype(), paddle::GPUPlace()); const int thread_per_block = 512; const int block_per_grid = CeilDiv(rbox_num, thread_per_block); @@ -56,7 +51,7 @@ std::vector MatchedRboxIouCUDAForward(const paddle::Tensor &rbox matched_rbox_iou_cuda_kernel< data_t><<>>( rbox_num, rbox1.data(), rbox2.data(), - output.mutable_data()); + output.data()); })); return {output}; diff --git a/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cc b/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cc new file mode 100644 index 0000000000000000000000000000000000000000..44f4eb62b851736176f7fade903248e6c95c6d83 --- /dev/null +++ b/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cc @@ -0,0 +1,121 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "../rbox_iou/rbox_iou_utils.h" +#include "paddle/extension.h" + +template +void nms_rotated_cpu_kernel(const T *boxes_data, const float threshold, + const int64_t num_boxes, int64_t *num_keep_boxes, + int64_t *output_data) { + + int num_masks = CeilDiv(num_boxes, 64); + std::vector masks(num_masks, 0); + for (int64_t i = 0; i < num_boxes; ++i) { + if (masks[i / 64] & 1ULL << (i % 64)) + continue; + T box_1[5]; + for (int k = 0; k < 5; ++k) { + box_1[k] = boxes_data[i * 5 + k]; + } + for (int64_t j = i + 1; j < num_boxes; ++j) { + if (masks[j / 64] & 1ULL << (j % 64)) + continue; + T box_2[5]; + for (int k = 0; k < 5; ++k) { + box_2[k] = boxes_data[j * 5 + k]; + } + if (rbox_iou_single(box_1, box_2) > threshold) { + masks[j / 64] |= 1ULL << (j % 64); + } + } + } + int64_t output_data_idx = 0; + for (int64_t i = 0; i < num_boxes; ++i) { + if (masks[i / 64] & 1ULL << (i % 64)) + continue; + output_data[output_data_idx++] = i; + } + *num_keep_boxes = output_data_idx; + for (; output_data_idx < num_boxes; ++output_data_idx) { + output_data[output_data_idx] = 0; + } +} + +#define CHECK_INPUT_CPU(x) \ + PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") + +std::vector NMSRotatedCPUForward(const paddle::Tensor &boxes, + const paddle::Tensor &scores, + float threshold) { + CHECK_INPUT_CPU(boxes); + CHECK_INPUT_CPU(scores); + + auto num_boxes = boxes.shape()[0]; + + auto order_t = + std::get<1>(paddle::argsort(scores, /* axis=*/0, /* descending=*/true)); + auto boxes_sorted = paddle::gather(boxes, order_t, /* axis=*/0); + + auto keep = + paddle::empty({num_boxes}, paddle::DataType::INT64, paddle::CPUPlace()); + int64_t num_keep_boxes = 0; + + PD_DISPATCH_FLOATING_TYPES(boxes.type(), "nms_rotated_cpu_kernel", ([&] { + nms_rotated_cpu_kernel( + boxes_sorted.data(), threshold, + num_boxes, &num_keep_boxes, + keep.data()); + })); + + keep = keep.slice(0, num_keep_boxes); + return {paddle::gather(order_t, keep, /* axis=*/0)}; +} + +#ifdef PADDLE_WITH_CUDA +std::vector NMSRotatedCUDAForward(const paddle::Tensor &boxes, + const paddle::Tensor &scores, + float threshold); +#endif + +std::vector NMSRotatedForward(const paddle::Tensor &boxes, + const paddle::Tensor &scores, + float threshold) { + if (boxes.is_cpu()) { + return NMSRotatedCPUForward(boxes, scores, threshold); +#ifdef PADDLE_WITH_CUDA + } else if (boxes.is_gpu()) { + return NMSRotatedCUDAForward(boxes, scores, threshold); +#endif + } +} + +std::vector> +NMSRotatedInferShape(std::vector boxes_shape, + std::vector scores_shape) { + return {{-1}}; +} + +std::vector NMSRotatedInferDtype(paddle::DataType t1, + paddle::DataType t2) { + return {paddle::DataType::INT64}; +} + +PD_BUILD_OP(nms_rotated) + .Inputs({"Boxes", "Scores"}) + .Outputs({"Output"}) + .Attrs({"threshold: float"}) + .SetKernelFn(PD_KERNEL(NMSRotatedForward)) + .SetInferShapeFn(PD_INFER_SHAPE(NMSRotatedInferShape)) + .SetInferDtypeFn(PD_INFER_DTYPE(NMSRotatedInferDtype)); \ No newline at end of file diff --git a/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cu b/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cu new file mode 100644 index 0000000000000000000000000000000000000000..d20dddb5739619de9fc616c1e0d59941952e73c5 --- /dev/null +++ b/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cu @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "../rbox_iou/rbox_iou_utils.h" +#include "paddle/extension.h" + +static const int64_t threadsPerBlock = sizeof(int64_t) * 8; + +template +__global__ void +nms_rotated_cuda_kernel(const T *boxes_data, const float threshold, + const int64_t num_boxes, int64_t *masks) { + auto raw_start = blockIdx.y; + auto col_start = blockIdx.x; + if (raw_start > col_start) + return; + const int raw_last_storage = + min(num_boxes - raw_start * threadsPerBlock, threadsPerBlock); + const int col_last_storage = + min(num_boxes - col_start * threadsPerBlock, threadsPerBlock); + if (threadIdx.x < raw_last_storage) { + int64_t mask = 0; + auto current_box_idx = raw_start * threadsPerBlock + threadIdx.x; + const T *current_box = boxes_data + current_box_idx * 5; + for (int i = 0; i < col_last_storage; ++i) { + const T *target_box = boxes_data + (col_start * threadsPerBlock + i) * 5; + if (rbox_iou_single(current_box, target_box) > threshold) { + mask |= 1ULL << i; + } + } + const int blocks_per_line = CeilDiv(num_boxes, threadsPerBlock); + masks[current_box_idx * blocks_per_line + col_start] = mask; + } +} + +#define CHECK_INPUT_GPU(x) \ + PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.") + +std::vector NMSRotatedCUDAForward(const paddle::Tensor &boxes, + const paddle::Tensor &scores, + float threshold) { + CHECK_INPUT_GPU(boxes); + CHECK_INPUT_GPU(scores); + + auto num_boxes = boxes.shape()[0]; + auto order_t = + std::get<1>(paddle::argsort(scores, /* axis=*/0, /* descending=*/true)); + auto boxes_sorted = paddle::gather(boxes, order_t, /* axis=*/0); + + const auto blocks_per_line = CeilDiv(num_boxes, threadsPerBlock); + dim3 block(threadsPerBlock); + dim3 grid(blocks_per_line, blocks_per_line); + auto mask_dev = paddle::empty({num_boxes * blocks_per_line}, + paddle::DataType::INT64, paddle::GPUPlace()); + + PD_DISPATCH_FLOATING_TYPES( + boxes.type(), "nms_rotated_cuda_kernel", ([&] { + nms_rotated_cuda_kernel<<>>( + boxes_sorted.data(), threshold, num_boxes, + mask_dev.data()); + })); + + auto mask_host = mask_dev.copy_to(paddle::CPUPlace(), true); + auto keep_host = + paddle::empty({num_boxes}, paddle::DataType::INT64, paddle::CPUPlace()); + int64_t *keep_host_ptr = keep_host.data(); + int64_t *mask_host_ptr = mask_host.data(); + std::vector remv(blocks_per_line); + int64_t last_box_num = 0; + for (int64_t i = 0; i < num_boxes; ++i) { + auto remv_element_id = i / threadsPerBlock; + auto remv_bit_id = i % threadsPerBlock; + if (!(remv[remv_element_id] & 1ULL << remv_bit_id)) { + keep_host_ptr[last_box_num++] = i; + int64_t *current_mask = mask_host_ptr + i * blocks_per_line; + for (auto j = remv_element_id; j < blocks_per_line; ++j) { + remv[j] |= current_mask[j]; + } + } + } + + keep_host = keep_host.slice(0, last_box_num); + auto keep_dev = keep_host.copy_to(paddle::GPUPlace(), true); + return {paddle::gather(order_t, keep_dev, /* axis=*/0)}; +} \ No newline at end of file diff --git a/ppdet/ext_op/csrc/rbox_iou/rbox_iou.cc b/ppdet/ext_op/csrc/rbox_iou/rbox_iou.cc new file mode 100644 index 0000000000000000000000000000000000000000..c8e7528d35857eb39b8be441558876a4130a7ce6 --- /dev/null +++ b/ppdet/ext_op/csrc/rbox_iou/rbox_iou.cc @@ -0,0 +1,95 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// The code is based on +// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/ + +#include "paddle/extension.h" +#include "rbox_iou_utils.h" + +template +void rbox_iou_cpu_kernel(const int rbox1_num, const int rbox2_num, + const T *rbox1_data_ptr, const T *rbox2_data_ptr, + T *output_data_ptr) { + + int i, j; + for (i = 0; i < rbox1_num; i++) { + for (j = 0; j < rbox2_num; j++) { + int offset = i * rbox2_num + j; + output_data_ptr[offset] = + rbox_iou_single(rbox1_data_ptr + i * 5, rbox2_data_ptr + j * 5); + } + } +} + +#define CHECK_INPUT_CPU(x) \ + PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") + +std::vector RboxIouCPUForward(const paddle::Tensor &rbox1, + const paddle::Tensor &rbox2) { + CHECK_INPUT_CPU(rbox1); + CHECK_INPUT_CPU(rbox2); + + auto rbox1_num = rbox1.shape()[0]; + auto rbox2_num = rbox2.shape()[0]; + + auto output = + paddle::empty({rbox1_num, rbox2_num}, rbox1.dtype(), paddle::CPUPlace()); + + PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "rbox_iou_cpu_kernel", ([&] { + rbox_iou_cpu_kernel( + rbox1_num, rbox2_num, rbox1.data(), + rbox2.data(), output.data()); + })); + + return {output}; +} + +#ifdef PADDLE_WITH_CUDA +std::vector RboxIouCUDAForward(const paddle::Tensor &rbox1, + const paddle::Tensor &rbox2); +#endif + +#define CHECK_INPUT_SAME(x1, x2) \ + PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.") + +std::vector RboxIouForward(const paddle::Tensor &rbox1, + const paddle::Tensor &rbox2) { + CHECK_INPUT_SAME(rbox1, rbox2); + if (rbox1.is_cpu()) { + return RboxIouCPUForward(rbox1, rbox2); +#ifdef PADDLE_WITH_CUDA + } else if (rbox1.is_gpu()) { + return RboxIouCUDAForward(rbox1, rbox2); +#endif + } +} + +std::vector> +RboxIouInferShape(std::vector rbox1_shape, + std::vector rbox2_shape) { + return {{rbox1_shape[0], rbox2_shape[0]}}; +} + +std::vector RboxIouInferDtype(paddle::DataType t1, + paddle::DataType t2) { + return {t1}; +} + +PD_BUILD_OP(rbox_iou) + .Inputs({"RBox1", "RBox2"}) + .Outputs({"Output"}) + .SetKernelFn(PD_KERNEL(RboxIouForward)) + .SetInferShapeFn(PD_INFER_SHAPE(RboxIouInferShape)) + .SetInferDtypeFn(PD_INFER_DTYPE(RboxIouInferDtype)); diff --git a/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu b/ppdet/ext_op/csrc/rbox_iou/rbox_iou.cu similarity index 90% rename from ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu rename to ppdet/ext_op/csrc/rbox_iou/rbox_iou.cu index 16d1d36f1002832d01db826743ce5c57ac557463..baedb6dedba6edbf207f4c68e84ab0b9b03b28ac 100644 --- a/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu +++ b/ppdet/ext_op/csrc/rbox_iou/rbox_iou.cu @@ -13,21 +13,15 @@ // limitations under the License. // // The code is based on -// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated +// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/ #include "paddle/extension.h" -#include "rbox_iou_op.h" +#include "rbox_iou_utils.h" // 2D block with 32 * 16 = 512 threads per block const int BLOCK_DIM_X = 32; const int BLOCK_DIM_Y = 16; -/** - Computes ceil(a / b) -*/ - -static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; } - template __global__ void rbox_iou_cuda_kernel(const int rbox1_num, const int rbox2_num, const T *rbox1_data_ptr, @@ -85,7 +79,7 @@ __global__ void rbox_iou_cuda_kernel(const int rbox1_num, const int rbox2_num, } #define CHECK_INPUT_GPU(x) \ - PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.") + PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.") std::vector RboxIouCUDAForward(const paddle::Tensor &rbox1, const paddle::Tensor &rbox2) { @@ -95,7 +89,8 @@ std::vector RboxIouCUDAForward(const paddle::Tensor &rbox1, auto rbox1_num = rbox1.shape()[0]; auto rbox2_num = rbox2.shape()[0]; - auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox1_num, rbox2_num}); + auto output = + paddle::empty({rbox1_num, rbox2_num}, rbox1.dtype(), paddle::GPUPlace()); const int blocks_x = CeilDiv(rbox1_num, BLOCK_DIM_X); const int blocks_y = CeilDiv(rbox2_num, BLOCK_DIM_Y); @@ -107,7 +102,7 @@ std::vector RboxIouCUDAForward(const paddle::Tensor &rbox1, rbox1.type(), "rbox_iou_cuda_kernel", ([&] { rbox_iou_cuda_kernel<<>>( rbox1_num, rbox2_num, rbox1.data(), rbox2.data(), - output.mutable_data()); + output.data()); })); return {output}; diff --git a/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc b/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc deleted file mode 100644 index 6031953d20e6302759621ac80b7a3e6ca35928db..0000000000000000000000000000000000000000 --- a/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// The code is based on https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated - -#include "rbox_iou_op.h" -#include "paddle/extension.h" - - -template -void rbox_iou_cpu_kernel( - const int rbox1_num, - const int rbox2_num, - const T* rbox1_data_ptr, - const T* rbox2_data_ptr, - T* output_data_ptr) { - - int i, j; - for (i = 0; i < rbox1_num; i++) { - for (j = 0; j < rbox2_num; j++) { - int offset = i * rbox2_num + j; - output_data_ptr[offset] = rbox_iou_single(rbox1_data_ptr + i * 5, rbox2_data_ptr + j * 5); - } - } -} - - -#define CHECK_INPUT_CPU(x) PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.") - -std::vector RboxIouCPUForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) { - CHECK_INPUT_CPU(rbox1); - CHECK_INPUT_CPU(rbox2); - - auto rbox1_num = rbox1.shape()[0]; - auto rbox2_num = rbox2.shape()[0]; - - auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox1_num, rbox2_num}); - - PD_DISPATCH_FLOATING_TYPES( - rbox1.type(), - "rbox_iou_cpu_kernel", - ([&] { - rbox_iou_cpu_kernel( - rbox1_num, - rbox2_num, - rbox1.data(), - rbox2.data(), - output.mutable_data()); - })); - - return {output}; -} - - -#ifdef PADDLE_WITH_CUDA -std::vector RboxIouCUDAForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2); -#endif - - -#define CHECK_INPUT_SAME(x1, x2) PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.") - -std::vector RboxIouForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) { - CHECK_INPUT_SAME(rbox1, rbox2); - if (rbox1.place() == paddle::PlaceType::kCPU) { - return RboxIouCPUForward(rbox1, rbox2); -#ifdef PADDLE_WITH_CUDA - } else if (rbox1.place() == paddle::PlaceType::kGPU) { - return RboxIouCUDAForward(rbox1, rbox2); -#endif - } -} - -std::vector> InferShape(std::vector rbox1_shape, std::vector rbox2_shape) { - return {{rbox1_shape[0], rbox2_shape[0]}}; -} - -std::vector InferDtype(paddle::DataType t1, paddle::DataType t2) { - return {t1}; -} - -PD_BUILD_OP(rbox_iou) - .Inputs({"RBOX1", "RBOX2"}) - .Outputs({"Output"}) - .SetKernelFn(PD_KERNEL(RboxIouForward)) - .SetInferShapeFn(PD_INFER_SHAPE(InferShape)) - .SetInferDtypeFn(PD_INFER_DTYPE(InferDtype)); diff --git a/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h b/ppdet/ext_op/csrc/rbox_iou/rbox_iou_utils.h similarity index 97% rename from ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h rename to ppdet/ext_op/csrc/rbox_iou/rbox_iou_utils.h index fce66dea00e829215ffdb3a38f8db6182a068609..6f275dd65a7d83962affc92be35fece8348a6a91 100644 --- a/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h +++ b/ppdet/ext_op/csrc/rbox_iou/rbox_iou_utils.h @@ -13,7 +13,7 @@ // limitations under the License. // // The code is based on -// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated +// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/ #pragma once @@ -336,13 +336,21 @@ HOST_DEVICE_INLINE T rbox_iou_single(T const *const box1_raw, box2.h = box2_raw[3]; box2.a = box2_raw[4]; - const T area1 = box1.w * box1.h; - const T area2 = box2.w * box2.h; - if (area1 < 1e-14 || area2 < 1e-14) { + if (box1.w < 1e-2 || box1.h < 1e-2 || box2.w < 1e-2 || box2.h < 1e-2) { return 0.f; } + const T area1 = box1.w * box1.h; + const T area2 = box2.w * box2.h; const T intersection = rboxes_intersection(box1, box2); const T iou = intersection / (area1 + area2 - intersection); return iou; } + +/** + Computes ceil(a / b) +*/ + +HOST_DEVICE inline int CeilDiv(const int a, const int b) { + return (a + b - 1) / b; +} \ No newline at end of file diff --git a/ppdet/modeling/assigners/__init__.py b/ppdet/modeling/assigners/__init__.py index ded98c9439cd896c99ca47bc3119d39effad3870..b344f0417e7efb02fefd16bdd3640e36e91ae93e 100644 --- a/ppdet/modeling/assigners/__init__.py +++ b/ppdet/modeling/assigners/__init__.py @@ -18,6 +18,7 @@ from . import atss_assigner from . import simota_assigner from . import max_iou_assigner from . import fcosr_assigner +from . import rotated_task_aligned_assigner from .utils import * from .task_aligned_assigner import * @@ -25,3 +26,4 @@ from .atss_assigner import * from .simota_assigner import * from .max_iou_assigner import * from .fcosr_assigner import * +from .rotated_task_aligned_assigner import * diff --git a/ppdet/modeling/assigners/fcosr_assigner.py b/ppdet/modeling/assigners/fcosr_assigner.py index 84f991023215b344e59c9f6e1e4f7643b3c00dc0..46b743e601ab592cb275a554d4adb4c5a0e05bba 100644 --- a/ppdet/modeling/assigners/fcosr_assigner.py +++ b/ppdet/modeling/assigners/fcosr_assigner.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/ppdet/modeling/assigners/rotated_task_aligned_assigner.py b/ppdet/modeling/assigners/rotated_task_aligned_assigner.py new file mode 100644 index 0000000000000000000000000000000000000000..eeb9a68b6705fd2cb1c2b51b7d1496a943c1cd79 --- /dev/null +++ b/ppdet/modeling/assigners/rotated_task_aligned_assigner.py @@ -0,0 +1,164 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from ppdet.core.workspace import register +from ..rbox_utils import rotated_iou_similarity, check_points_in_rotated_boxes +from .utils import gather_topk_anchors, compute_max_iou_anchor + +__all__ = ['RotatedTaskAlignedAssigner'] + + +@register +class RotatedTaskAlignedAssigner(nn.Layer): + """TOOD: Task-aligned One-stage Object Detection + """ + + def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9): + super(RotatedTaskAlignedAssigner, self).__init__() + self.topk = topk + self.alpha = alpha + self.beta = beta + self.eps = eps + + @paddle.no_grad() + def forward(self, + pred_scores, + pred_bboxes, + anchor_points, + num_anchors_list, + gt_labels, + gt_bboxes, + pad_gt_mask, + bg_index, + gt_scores=None): + r"""This code is based on + https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py + + The assignment is done in following steps + 1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt + 2. select top-k bbox as candidates for each gt + 3. limit the positive sample's center in gt (because the anchor-free detector + only can predict positive distance) + 4. if an anchor box is assigned to multiple gts, the one with the + highest iou will be selected. + Args: + pred_scores (Tensor, float32): predicted class probability, shape(B, L, C) + pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 5) + anchor_points (Tensor, float32): pre-defined anchors, shape(1, L, 2), "cxcy" format + num_anchors_list (List): num of anchors in each level, shape(L) + gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1) + gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 5) + pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1) + bg_index (int): background index + gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1) + Returns: + assigned_labels (Tensor): (B, L) + assigned_bboxes (Tensor): (B, L, 5) + assigned_scores (Tensor): (B, L, C) + """ + assert pred_scores.ndim == pred_bboxes.ndim + assert gt_labels.ndim == gt_bboxes.ndim and \ + gt_bboxes.ndim == 3 + + batch_size, num_anchors, num_classes = pred_scores.shape + _, num_max_boxes, _ = gt_bboxes.shape + + # negative batch + if num_max_boxes == 0: + assigned_labels = paddle.full( + [batch_size, num_anchors], bg_index, dtype=gt_labels.dtype) + assigned_bboxes = paddle.zeros([batch_size, num_anchors, 5]) + assigned_scores = paddle.zeros( + [batch_size, num_anchors, num_classes]) + return assigned_labels, assigned_bboxes, assigned_scores + + # compute iou between gt and pred bbox, [B, n, L] + ious = rotated_iou_similarity(gt_bboxes, pred_bboxes) + ious = paddle.where(ious > 1 + self.eps, paddle.zeros_like(ious), ious) + ious.stop_gradient = True + # gather pred bboxes class score + pred_scores = pred_scores.transpose([0, 2, 1]) + batch_ind = paddle.arange( + end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1) + gt_labels_ind = paddle.stack( + [batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)], + axis=-1) + bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind) + # compute alignment metrics, [B, n, L] + alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow( + self.beta) + + # check the positive sample's center in gt, [B, n, L] + is_in_gts = check_points_in_rotated_boxes(anchor_points, gt_bboxes) + + # select topk largest alignment metrics pred bbox as candidates + # for each gt, [B, n, L] + is_in_topk = gather_topk_anchors( + alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask) + + # select positive sample, [B, n, L] + mask_positive = is_in_topk * is_in_gts * pad_gt_mask + + # if an anchor box is assigned to multiple gts, + # the one with the highest iou will be selected, [B, n, L] + mask_positive_sum = mask_positive.sum(axis=-2) + if mask_positive_sum.max() > 1: + mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile( + [1, num_max_boxes, 1]) + is_max_iou = compute_max_iou_anchor(ious) + mask_positive = paddle.where(mask_multiple_gts, is_max_iou, + mask_positive) + mask_positive_sum = mask_positive.sum(axis=-2) + assigned_gt_index = mask_positive.argmax(axis=-2) + + # assigned target + assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes + assigned_labels = paddle.gather( + gt_labels.flatten(), assigned_gt_index.flatten(), axis=0) + assigned_labels = assigned_labels.reshape([batch_size, num_anchors]) + assigned_labels = paddle.where( + mask_positive_sum > 0, assigned_labels, + paddle.full_like(assigned_labels, bg_index)) + + assigned_bboxes = paddle.gather( + gt_bboxes.reshape([-1, 5]), assigned_gt_index.flatten(), axis=0) + assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 5]) + + assigned_scores = F.one_hot(assigned_labels, num_classes + 1) + ind = list(range(num_classes + 1)) + ind.remove(bg_index) + assigned_scores = paddle.index_select( + assigned_scores, paddle.to_tensor(ind), axis=-1) + # rescale alignment metrics + alignment_metrics *= mask_positive + max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True) + max_ious_per_instance = (ious * mask_positive).max(axis=-1, + keepdim=True) + alignment_metrics = alignment_metrics / ( + max_metrics_per_instance + self.eps) * max_ious_per_instance + alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1) + assigned_scores = assigned_scores * alignment_metrics + + assigned_bboxes.stop_gradient = True + assigned_scores.stop_gradient = True + assigned_labels.stop_gradient = True + return assigned_labels, assigned_bboxes, assigned_scores diff --git a/ppdet/modeling/heads/__init__.py b/ppdet/modeling/heads/__init__.py index 85c6b47bf9f22a3e16458f6bb2969d7bfd111354..1e7a6b97cb500114fea12ff6c2d9d4cdacfb4bdf 100644 --- a/ppdet/modeling/heads/__init__.py +++ b/ppdet/modeling/heads/__init__.py @@ -34,6 +34,7 @@ from . import tood_head from . import retina_head from . import ppyoloe_head from . import fcosr_head +from . import ppyoloe_r_head from . import ld_gfl_head from .bbox_head import * @@ -59,3 +60,4 @@ from .retina_head import * from .ppyoloe_head import * from .fcosr_head import * from .ld_gfl_head import * +from .ppyoloe_r_head import * diff --git a/ppdet/modeling/heads/fcosr_head.py b/ppdet/modeling/heads/fcosr_head.py index 06b84440e8eb1f8e252eaf2c723bbc03bb4ced0a..97cd949d70bf67f9344b10d680dc9fd649960912 100644 --- a/ppdet/modeling/heads/fcosr_head.py +++ b/ppdet/modeling/heads/fcosr_head.py @@ -205,8 +205,8 @@ class FCOSRHead(nn.Layer): anchor_points = [] stride_tensor = [] num_anchors_list = [] - for i, stride in enumerate(self.fpn_strides): - _, _, h, w = feats[i].shape + for feat, stride in zip(feats, self.fpn_strides): + _, _, h, w = paddle.shape(feat) shift_x = (paddle.arange(end=w) + 0.5) * stride shift_y = (paddle.arange(end=h) + 0.5) * stride shift_y, shift_x = paddle.meshgrid(shift_y, shift_x) diff --git a/ppdet/modeling/heads/ppyoloe_r_head.py b/ppdet/modeling/heads/ppyoloe_r_head.py new file mode 100644 index 0000000000000000000000000000000000000000..89cb0fa82a07085154665e45680460551e46fb8b --- /dev/null +++ b/ppdet/modeling/heads/ppyoloe_r_head.py @@ -0,0 +1,419 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from ppdet.core.workspace import register + +from ..losses import ProbIoULoss +from ..initializer import bias_init_with_prob, constant_, normal_, vector_ +from ppdet.modeling.backbones.cspresnet import ConvBNLayer +from ppdet.modeling.ops import get_static_shape, get_act_fn, anchor_generator +from ppdet.modeling.layers import MultiClassNMS + +__all__ = ['PPYOLOERHead'] + + +class ESEAttn(nn.Layer): + def __init__(self, feat_channels, act='swish'): + super(ESEAttn, self).__init__() + self.fc = nn.Conv2D(feat_channels, feat_channels, 1) + self.conv = ConvBNLayer(feat_channels, feat_channels, 1, act=act) + + self._init_weights() + + def _init_weights(self): + normal_(self.fc.weight, std=0.01) + + def forward(self, feat, avg_feat): + weight = F.sigmoid(self.fc(avg_feat)) + return self.conv(feat * weight) + + +@register +class PPYOLOERHead(nn.Layer): + __shared__ = ['num_classes', 'trt'] + __inject__ = ['static_assigner', 'assigner', 'nms'] + + def __init__(self, + in_channels=[1024, 512, 256], + num_classes=15, + act='swish', + fpn_strides=(32, 16, 8), + grid_cell_offset=0.5, + angle_max=90, + use_varifocal_loss=True, + static_assigner_epoch=4, + trt=False, + static_assigner='ATSSAssigner', + assigner='TaskAlignedAssigner', + nms='MultiClassNMS', + loss_weight={'class': 1.0, + 'iou': 2.5, + 'dfl': 0.05}): + super(PPYOLOERHead, self).__init__() + assert len(in_channels) > 0, "len(in_channels) should > 0" + self.in_channels = in_channels + self.num_classes = num_classes + self.fpn_strides = fpn_strides + self.grid_cell_offset = grid_cell_offset + self.angle_max = angle_max + self.loss_weight = loss_weight + self.use_varifocal_loss = use_varifocal_loss + self.half_pi = paddle.to_tensor( + [1.5707963267948966], dtype=paddle.float32) + self.half_pi_bin = self.half_pi / angle_max + self.iou_loss = ProbIoULoss() + self.static_assigner_epoch = static_assigner_epoch + self.static_assigner = static_assigner + self.assigner = assigner + self.nms = nms + # stem + self.stem_cls = nn.LayerList() + self.stem_reg = nn.LayerList() + self.stem_angle = nn.LayerList() + act = get_act_fn( + act, trt=trt) if act is None or isinstance(act, + (str, dict)) else act + self.trt = trt + for in_c in self.in_channels: + self.stem_cls.append(ESEAttn(in_c, act=act)) + self.stem_reg.append(ESEAttn(in_c, act=act)) + self.stem_angle.append(ESEAttn(in_c, act=act)) + # pred head + self.pred_cls = nn.LayerList() + self.pred_reg = nn.LayerList() + self.pred_angle = nn.LayerList() + for in_c in self.in_channels: + self.pred_cls.append( + nn.Conv2D( + in_c, self.num_classes, 3, padding=1)) + self.pred_reg.append(nn.Conv2D(in_c, 4, 3, padding=1)) + self.pred_angle.append( + nn.Conv2D( + in_c, self.angle_max + 1, 3, padding=1)) + self.angle_proj_conv = nn.Conv2D( + self.angle_max + 1, 1, 1, bias_attr=False) + self._init_weights() + + @classmethod + def from_config(cls, cfg, input_shape): + return {'in_channels': [i.channels for i in input_shape], } + + def _init_weights(self): + bias_cls = bias_init_with_prob(0.01) + bias_angle = [10.] + [1.] * self.angle_max + for cls_, reg_, angle_ in zip(self.pred_cls, self.pred_reg, + self.pred_angle): + normal_(cls_.weight, std=0.01) + constant_(cls_.bias, bias_cls) + normal_(reg_.weight, std=0.01) + constant_(reg_.bias) + constant_(angle_.weight) + vector_(angle_.bias, bias_angle) + + angle_proj = paddle.linspace(0, self.angle_max, self.angle_max + 1) + self.angle_proj = angle_proj * self.half_pi_bin + self.angle_proj_conv.weight.set_value( + self.angle_proj.reshape([1, self.angle_max + 1, 1, 1])) + self.angle_proj_conv.weight.stop_gradient = True + + def _generate_anchors(self, feats): + if self.trt: + anchor_points = [] + for feat, stride in zip(feats, self.fpn_strides): + _, _, h, w = paddle.shape(feat) + anchor, _ = anchor_generator( + feat, + stride * 4, + 1.0, [1.0, 1.0, 1.0, 1.0], [stride, stride], + offset=0.5) + x1, y1, x2, y2 = paddle.split(anchor, 4, axis=-1) + xc = (x1 + x2 + 1) / 2 + yc = (y1 + y2 + 1) / 2 + anchor_point = paddle.concat( + [xc, yc], axis=-1).reshape((1, h * w, 2)) + anchor_points.append(anchor_point) + anchor_points = paddle.concat(anchor_points, axis=1) + return anchor_points, None, None + else: + anchor_points = [] + stride_tensor = [] + num_anchors_list = [] + for feat, stride in zip(feats, self.fpn_strides): + _, _, h, w = paddle.shape(feat) + shift_x = (paddle.arange(end=w) + 0.5) * stride + shift_y = (paddle.arange(end=h) + 0.5) * stride + shift_y, shift_x = paddle.meshgrid(shift_y, shift_x) + anchor_point = paddle.cast( + paddle.stack( + [shift_x, shift_y], axis=-1), dtype='float32') + anchor_points.append(anchor_point.reshape([1, -1, 2])) + stride_tensor.append( + paddle.full( + [1, h * w, 1], stride, dtype='float32')) + num_anchors_list.append(h * w) + anchor_points = paddle.concat(anchor_points, axis=1) + stride_tensor = paddle.concat(stride_tensor, axis=1) + return anchor_points, stride_tensor, num_anchors_list + + def forward(self, feats, targets=None): + assert len(feats) == len(self.fpn_strides), \ + "The size of feats is not equal to size of fpn_strides" + + if self.training: + return self.forward_train(feats, targets) + else: + return self.forward_eval(feats) + + def forward_train(self, feats, targets): + anchor_points, stride_tensor, num_anchors_list = self._generate_anchors( + feats) + + cls_score_list, reg_dist_list, reg_angle_list = [], [], [] + for i, feat in enumerate(feats): + avg_feat = F.adaptive_avg_pool2d(feat, (1, 1)) + cls_logit = self.pred_cls[i](self.stem_cls[i](feat, avg_feat) + + feat) + reg_dist = self.pred_reg[i](self.stem_reg[i](feat, avg_feat)) + reg_angle = self.pred_angle[i](self.stem_angle[i](feat, avg_feat)) + # cls and reg + cls_score = F.sigmoid(cls_logit) + cls_score_list.append(cls_score.flatten(2).transpose([0, 2, 1])) + reg_dist_list.append(reg_dist.flatten(2).transpose([0, 2, 1])) + reg_angle_list.append(reg_angle.flatten(2).transpose([0, 2, 1])) + cls_score_list = paddle.concat(cls_score_list, axis=1) + reg_dist_list = paddle.concat(reg_dist_list, axis=1) + reg_angle_list = paddle.concat(reg_angle_list, axis=1) + + return self.get_loss([ + cls_score_list, reg_dist_list, reg_angle_list, anchor_points, + num_anchors_list, stride_tensor + ], targets) + + def forward_eval(self, feats): + cls_score_list, reg_box_list = [], [] + anchor_points, _, _ = self._generate_anchors(feats) + for i, (feat, stride) in enumerate(zip(feats, self.fpn_strides)): + b, _, h, w = paddle.shape(feat) + l = h * w + # cls + avg_feat = F.adaptive_avg_pool2d(feat, (1, 1)) + cls_logit = self.pred_cls[i](self.stem_cls[i](feat, avg_feat) + + feat) + # reg + reg_dist = self.pred_reg[i](self.stem_reg[i](feat, avg_feat)) + reg_xy, reg_wh = paddle.split(reg_dist, 2, axis=1) + reg_xy = reg_xy * stride + reg_wh = (F.elu(reg_wh) + 1.) * stride + reg_angle = self.pred_angle[i](self.stem_angle[i](feat, avg_feat)) + reg_angle = self.angle_proj_conv(F.softmax(reg_angle, axis=1)) + reg_box = paddle.concat([reg_xy, reg_wh, reg_angle], axis=1) + # cls and reg + cls_score = F.sigmoid(cls_logit) + cls_score_list.append(cls_score.reshape([b, self.num_classes, l])) + reg_box_list.append(reg_box.reshape([b, 5, l])) + + cls_score_list = paddle.concat(cls_score_list, axis=-1) + reg_box_list = paddle.concat(reg_box_list, axis=-1).transpose([0, 2, 1]) + reg_xy, reg_wha = paddle.split(reg_box_list, [2, 3], axis=-1) + reg_xy = reg_xy + anchor_points + reg_box_list = paddle.concat([reg_xy, reg_wha], axis=-1) + return cls_score_list, reg_box_list + + def _bbox_decode(self, points, pred_dist, pred_angle, stride_tensor): + # predict vector to x, y, w, h, angle + b, l = pred_angle.shape[:2] + xy, wh = paddle.split(pred_dist, 2, axis=-1) + xy = xy * stride_tensor + points + wh = (F.elu(wh) + 1.) * stride_tensor + angle = F.softmax(pred_angle.reshape([b, l, 1, self.angle_max + 1 + ])).matmul(self.angle_proj) + return paddle.concat([xy, wh, angle], axis=-1) + + def get_loss(self, head_outs, gt_meta): + pred_scores, pred_dist, pred_angle, \ + anchor_points, num_anchors_list, stride_tensor = head_outs + # [B, N, 5] -> [B, N, 5] + pred_bboxes = self._bbox_decode(anchor_points, pred_dist, pred_angle, + stride_tensor) + gt_labels = gt_meta['gt_class'] + # [B, N, 5] + gt_bboxes = gt_meta['gt_rbox'] + pad_gt_mask = gt_meta['pad_gt_mask'] + # label assignment + if gt_meta['epoch_id'] < self.static_assigner_epoch: + assigned_labels, assigned_bboxes, assigned_scores = \ + self.static_assigner( + anchor_points, + stride_tensor, + num_anchors_list, + gt_labels, + gt_meta['gt_bbox'], + gt_bboxes, + pad_gt_mask, + self.num_classes, + pred_bboxes.detach() + ) + else: + assigned_labels, assigned_bboxes, assigned_scores = \ + self.assigner( + pred_scores.detach(), + pred_bboxes.detach(), + anchor_points, + num_anchors_list, + gt_labels, + gt_bboxes, + pad_gt_mask, + bg_index=self.num_classes) + alpha_l = -1 + # cls loss + if self.use_varifocal_loss: + one_hot_label = F.one_hot(assigned_labels, + self.num_classes + 1)[..., :-1] + loss_cls = self._varifocal_loss(pred_scores, assigned_scores, + one_hot_label) + else: + loss_cls = self._focal_loss(pred_scores, assigned_scores, alpha_l) + + assigned_scores_sum = assigned_scores.sum() + if paddle.distributed.get_world_size() > 1: + paddle.distributed.all_reduce(assigned_scores_sum) + assigned_scores_sum = paddle.clip( + assigned_scores_sum / paddle.distributed.get_world_size(), + min=1.) + else: + assigned_scores_sum = paddle.clip(assigned_scores_sum, min=1.) + loss_cls /= assigned_scores_sum + + loss_iou, loss_dfl = self._bbox_loss(pred_angle, pred_bboxes, + anchor_points, assigned_labels, + assigned_bboxes, assigned_scores, + assigned_scores_sum, stride_tensor) + + loss = self.loss_weight['class'] * loss_cls + \ + self.loss_weight['iou'] * loss_iou + \ + self.loss_weight['dfl'] * loss_dfl + out_dict = { + 'loss': loss, + 'loss_cls': loss_cls, + 'loss_iou': loss_iou, + 'loss_dfl': loss_dfl + } + return out_dict + + @staticmethod + def _focal_loss(score, label, alpha=0.25, gamma=2.0): + weight = (score - label).pow(gamma) + if alpha > 0: + alpha_t = alpha * label + (1 - alpha) * (1 - label) + weight *= alpha_t + loss = F.binary_cross_entropy( + score, label, weight=weight, reduction='sum') + return loss + + @staticmethod + def _varifocal_loss(pred_score, gt_score, label, alpha=0.75, gamma=2.0): + weight = alpha * pred_score.pow(gamma) * (1 - label) + gt_score * label + loss = F.binary_cross_entropy( + pred_score, gt_score, weight=weight, reduction='sum') + return loss + + @staticmethod + def _df_loss(pred_dist, target): + target_left = paddle.cast(target, 'int64') + target_right = target_left + 1 + weight_left = target_right.astype('float32') - target + weight_right = 1 - weight_left + loss_left = F.cross_entropy( + pred_dist, target_left, reduction='none') * weight_left + loss_right = F.cross_entropy( + pred_dist, target_right, reduction='none') * weight_right + return (loss_left + loss_right).mean(-1, keepdim=True) + + def _bbox_loss(self, pred_angle, pred_bboxes, anchor_points, + assigned_labels, assigned_bboxes, assigned_scores, + assigned_scores_sum, stride_tensor): + # select positive samples mask + mask_positive = (assigned_labels != self.num_classes) + num_pos = mask_positive.sum() + # pos/neg loss + if num_pos > 0: + # iou + bbox_mask = mask_positive.unsqueeze(-1).tile([1, 1, 5]) + pred_bboxes_pos = paddle.masked_select(pred_bboxes, + bbox_mask).reshape([-1, 5]) + assigned_bboxes_pos = paddle.masked_select( + assigned_bboxes, bbox_mask).reshape([-1, 5]) + bbox_weight = paddle.masked_select( + assigned_scores.sum(-1), mask_positive).reshape([-1]) + + loss_iou = self.iou_loss(pred_bboxes_pos, + assigned_bboxes_pos) * bbox_weight + loss_iou = loss_iou.sum() / assigned_scores_sum + + # dfl + angle_mask = mask_positive.unsqueeze(-1).tile( + [1, 1, self.angle_max + 1]) + pred_angle_pos = paddle.masked_select( + pred_angle, angle_mask).reshape([-1, self.angle_max + 1]) + assigned_angle_pos = ( + assigned_bboxes_pos[:, 4] / + self.half_pi_bin).clip(0, self.angle_max - 0.01) + loss_dfl = self._df_loss(pred_angle_pos, assigned_angle_pos) + else: + loss_iou = pred_bboxes.sum() * 0. + loss_dfl = paddle.zeros([1]) + + return loss_iou, loss_dfl + + def _box2corners(self, pred_bboxes): + """ convert (x, y, w, h, angle) to (x1, y1, x2, y2, x3, y3, x4, y4) + + Args: + pred_bboxes (Tensor): [B, N, 5] + + Returns: + polys (Tensor): [B, N, 8] + """ + x, y, w, h, angle = paddle.split(pred_bboxes, 5, axis=-1) + cos_a_half = paddle.cos(angle) * 0.5 + sin_a_half = paddle.sin(angle) * 0.5 + w_x = cos_a_half * w + w_y = sin_a_half * w + h_x = -sin_a_half * h + h_y = cos_a_half * h + return paddle.concat( + [ + x + w_x + h_x, y + w_y + h_y, x - w_x + h_x, y - w_y + h_y, + x - w_x - h_x, y - w_y - h_y, x + w_x - h_x, y + w_y - h_y + ], + axis=-1) + + def post_process(self, head_outs, scale_factor): + pred_scores, pred_bboxes = head_outs + # [B, N, 5] -> [B, N, 8] + pred_bboxes = self._box2corners(pred_bboxes) + # scale bbox to origin + scale_y, scale_x = paddle.split(scale_factor, 2, axis=-1) + scale_factor = paddle.concat( + [ + scale_x, scale_y, scale_x, scale_y, scale_x, scale_y, scale_x, + scale_y + ], + axis=-1).reshape([-1, 1, 8]) + pred_bboxes /= scale_factor + bbox_pred, bbox_num, _ = self.nms(pred_bboxes, pred_scores) + return bbox_pred, bbox_num diff --git a/ppdet/modeling/initializer.py b/ppdet/modeling/initializer.py index b482f133dd9ac1e2568f5c971f004117c56a5368..758eed240eae4497e14b7fe1cb9e10aca702eb53 100644 --- a/ppdet/modeling/initializer.py +++ b/ppdet/modeling/initializer.py @@ -118,6 +118,12 @@ def zeros_(tensor): return _no_grad_fill_(tensor, 0) +def vector_(tensor, vector): + with paddle.no_grad(): + tensor.set_value(paddle.to_tensor(vector, dtype=tensor.dtype)) + return tensor + + def _calculate_fan_in_and_fan_out(tensor, reverse=False): """ Calculate (fan_in, _fan_out) for tensor diff --git a/ppdet/modeling/necks/custom_pan.py b/ppdet/modeling/necks/custom_pan.py index 08de226de5bed74d7acb2ca62b2b74599915482a..bb7123c66ff854eb5712bb1bfee254e8a8677b00 100644 --- a/ppdet/modeling/necks/custom_pan.py +++ b/ppdet/modeling/necks/custom_pan.py @@ -61,7 +61,14 @@ class SPP(nn.Layer): class CSPStage(nn.Layer): - def __init__(self, block_fn, ch_in, ch_out, n, act='swish', spp=False): + def __init__(self, + block_fn, + ch_in, + ch_out, + n, + act='swish', + spp=False, + use_alpha=False): super(CSPStage, self).__init__() ch_mid = int(ch_out // 2) @@ -72,7 +79,11 @@ class CSPStage(nn.Layer): for i in range(n): self.convs.add_sublayer( str(i), - eval(block_fn)(next_ch_in, ch_mid, act=act, shortcut=False)) + eval(block_fn)(next_ch_in, + ch_mid, + act=act, + shortcut=False, + use_alpha=use_alpha)) if i == (n - 1) // 2 and spp: self.convs.add_sublayer( 'spp', SPP(ch_mid * 4, ch_mid, 1, [5, 9, 13], act=act)) @@ -109,6 +120,7 @@ class CustomCSPPAN(nn.Layer): data_format='NCHW', width_mult=1.0, depth_mult=1.0, + use_alpha=False, trt=False): super(CustomCSPPAN, self).__init__() @@ -136,7 +148,8 @@ class CustomCSPPAN(nn.Layer): ch_out, block_num, act=act, - spp=(spp and i == 0))) + spp=(spp and i == 0), + use_alpha=use_alpha)) if drop_block: stage.add_sublayer('drop', DropBlock(block_size, keep_prob)) @@ -181,7 +194,8 @@ class CustomCSPPAN(nn.Layer): ch_out, block_num, act=act, - spp=False)) + spp=False, + use_alpha=use_alpha)) if drop_block: stage.add_sublayer('drop', DropBlock(block_size, keep_prob)) diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index fb9d98cf0f35458eb2af063487b7664a3fd8c2cc..d9a1192d7fb93ef855d06cf8fbebd688e21a7317 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -26,18 +26,9 @@ from paddle import in_dynamic_mode from paddle.common_ops_import import Variable, LayerHelper, check_variable_and_dtype, check_type, check_dtype __all__ = [ - 'prior_box', - 'generate_proposals', - 'box_coder', - 'multiclass_nms', - 'distribute_fpn_proposals', - 'matrix_nms', - 'batch_norm', - 'mish', - 'silu', - 'swish', - 'identity', - 'anchor_generator' + 'prior_box', 'generate_proposals', 'box_coder', 'multiclass_nms', + 'distribute_fpn_proposals', 'matrix_nms', 'batch_norm', 'mish', 'silu', + 'swish', 'identity', 'anchor_generator' ] @@ -118,6 +109,7 @@ def batch_norm(ch, return norm_layer + @paddle.jit.not_to_static def anchor_generator(input, anchor_sizes=None, diff --git a/ppdet/modeling/rbox_utils.py b/ppdet/modeling/rbox_utils.py index bde5320cb74ed85451b17a84016f314ac07398a7..a5f19a2949d9f46b05ff94e5534807dabc46600d 100644 --- a/ppdet/modeling/rbox_utils.py +++ b/ppdet/modeling/rbox_utils.py @@ -239,3 +239,57 @@ def check_points_in_polys(points, polys): is_in_polys = (ap_dot_ab >= 0) & (ap_dot_ab <= norm_ab) & ( ap_dot_ad >= 0) & (ap_dot_ad <= norm_ad) return is_in_polys + + +def check_points_in_rotated_boxes(points, boxes): + """Check whether point is in rotated boxes + + Args: + points (tensor): (1, L, 2) anchor points + boxes (tensor): [B, N, 5] gt_bboxes + eps (float): default 1e-9 + + Returns: + is_in_box (tensor): (B, N, L) + + """ + # [B, N, 5] -> [B, N, 4, 2] + corners = box2corners(boxes) + # [1, L, 2] -> [1, 1, L, 2] + points = points.unsqueeze(0) + # [B, N, 4, 2] -> [B, N, 1, 2] + a, b, c, d = corners.split(4, axis=2) + ab = b - a + ad = d - a + # [B, N, L, 2] + ap = points - a + # [B, N, L] + norm_ab = paddle.sum(ab * ab, axis=-1) + # [B, N, L] + norm_ad = paddle.sum(ad * ad, axis=-1) + # [B, N, L] dot product + ap_dot_ab = paddle.sum(ap * ab, axis=-1) + # [B, N, L] dot product + ap_dot_ad = paddle.sum(ap * ad, axis=-1) + # [B, N, L] = |A|*|B|*cos(theta) + is_in_box = (ap_dot_ab >= 0) & (ap_dot_ab <= norm_ab) & (ap_dot_ad >= 0) & ( + ap_dot_ad <= norm_ad) + return is_in_box + + +def rotated_iou_similarity(box1, box2, eps=1e-9, func=''): + """Calculate iou of box1 and box2 + + Args: + box1 (Tensor): box with the shape [N, M1, 5] + box2 (Tensor): box with the shape [N, M2, 5] + + Return: + iou (Tensor): iou between box1 and box2 with the shape [N, M1, M2] + """ + from ext_op import rbox_iou + rotated_ious = [] + for b1, b2 in zip(box1, box2): + rotated_ious.append(rbox_iou(b1, b2)) + + return paddle.stack(rotated_ious, axis=0)