diff --git a/configs/datasets/dota_ms.yml b/configs/datasets/dota_ms.yml
new file mode 100644
index 0000000000000000000000000000000000000000..802e8846d7f443a7032cf49a88bfe79328ea41db
--- /dev/null
+++ b/configs/datasets/dota_ms.yml
@@ -0,0 +1,21 @@
+metric: RBOX
+num_classes: 15
+
+TrainDataset:
+ !COCODataSet
+ image_dir: trainval1024/images
+ anno_path: trainval1024/DOTA_trainval1024.json
+ dataset_dir: dataset/dota_ms/
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+EvalDataset:
+ !COCODataSet
+ image_dir: trainval1024/images
+ anno_path: trainval1024/DOTA_trainval1024.json
+ dataset_dir: dataset/dota_ms/
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+TestDataset:
+ !ImageFolder
+ anno_path: test1024/DOTA_test1024.json
+ dataset_dir: dataset/dota_ms/
diff --git a/configs/rotate/README.md b/configs/rotate/README.md
index 574cb4ed5ece2992b7d04587bac977ba19f0d5a1..5558c4a873386687f7761d1481dfcab8eb07dc3e 100644
--- a/configs/rotate/README.md
+++ b/configs/rotate/README.md
@@ -16,7 +16,15 @@
| 模型 | mAP | 学习率策略 | 角度表示 | 数据增广 | GPU数目 | 每GPU图片数目 | 模型下载 | 配置文件 |
|:---:|:----:|:---------:|:-----:|:--------:|:-----:|:------------:|:-------:|:------:|
| [S2ANet](./s2anet/README.md) | 73.84 | 2x | le135 | - | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/s2anet_alignconv_2x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/s2anet/s2anet_alignconv_2x_dota.yml) |
-| [FCOSR](./fcosr/README.md) | 76.62 | 3x | oc | - | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) |
+| [FCOSR](./fcosr/README.md) | 76.62 | 3x | oc | RR | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) |
+| [PP-YOLOE-R-s](./ppyoloe_r/README.md) | 73.82 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml) |
+| [PP-YOLOE-R-s](./ppyoloe_r/README.md) | 79.42 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml) |
+| [PP-YOLOE-R-m](./ppyoloe_r/README.md) | 77.64 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml) |
+| [PP-YOLOE-R-m](./ppyoloe_r/README.md) | 79.71 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml) |
+| [PP-YOLOE-R-l](./ppyoloe_r/README.md) | 78.14 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml) |
+| [PP-YOLOE-R-l](./ppyoloe_r/README.md) | 80.02 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml) |
+| [PP-YOLOE-R-x](./ppyoloe_r/README.md) | 78.28 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml) |
+| [PP-YOLOE-R-x](./ppyoloe_r/README.md) | 80.73 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml) |
**注意:**
diff --git a/configs/rotate/README_en.md b/configs/rotate/README_en.md
index ef5160ec9f4f0b8f8670a7a0989a05b2be5b982d..fc746ae1283b3f04ebc2592025318c9a34e6fd4e 100644
--- a/configs/rotate/README_en.md
+++ b/configs/rotate/README_en.md
@@ -15,7 +15,15 @@ Rotated object detection is used to detect rectangular bounding boxes with angle
| Model | mAP | Lr Scheduler | Angle | Aug | GPU Number | images/GPU | download | config |
|:---:|:----:|:---------:|:-----:|:--------:|:-----:|:------------:|:-------:|:------:|
| [S2ANet](./s2anet/README_en.md) | 73.84 | 2x | le135 | - | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/s2anet_alignconv_2x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/s2anet/s2anet_alignconv_2x_dota.yml) |
-| [FCOSR](./fcosr/README_en.md) | 76.62 | 3x | oc | - | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) |
+| [FCOSR](./fcosr/README_en.md) | 76.62 | 3x | oc | RR | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) |
+| [PP-YOLOE-R-s](./ppyoloe_r/README_en.md) | 73.82 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml) |
+| [PP-YOLOE-R-s](./ppyoloe_r/README_en.md) | 79.42 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml) |
+| [PP-YOLOE-R-m](./ppyoloe_r/README_en.md) | 77.64 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml) |
+| [PP-YOLOE-R-m](./ppyoloe_r/README_en.md) | 79.71 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml) |
+| [PP-YOLOE-R-l](./ppyoloe_r/README_en.md) | 78.14 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml) |
+| [PP-YOLOE-R-l](./ppyoloe_r/README_en.md) | 80.02 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml) |
+| [PP-YOLOE-R-x](./ppyoloe_r/README_en.md) | 78.28 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml) |
+| [PP-YOLOE-R-x](./ppyoloe_r/README_en.md) | 80.73 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml) |
**Notes:**
diff --git a/configs/rotate/fcosr/README.md b/configs/rotate/fcosr/README.md
index 0113ee1f8d6a9796a8bb91c02787308dd8bbac48..19888eea5a4f60faf611824b4cf4bf2e569b59b9 100644
--- a/configs/rotate/fcosr/README.md
+++ b/configs/rotate/fcosr/README.md
@@ -17,7 +17,7 @@
| 模型 | Backbone | mAP | 学习率策略 | 角度表示 | 数据增广 | GPU数目 | 每GPU图片数目 | 模型下载 | 配置文件 |
|:---:|:--------:|:----:|:---------:|:-----:|:--------:|:-----:|:------------:|:-------:|:------:|
-| FCOSR-M | ResNeXt-50 | 76.62 | 3x | oc | - | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) |
+| FCOSR-M | ResNeXt-50 | 76.62 | 3x | oc | RR | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) |
**注意:**
diff --git a/configs/rotate/fcosr/README_en.md b/configs/rotate/fcosr/README_en.md
index cf8e49ae47ad2d12badfd5ddfa89cbb3bc3eabe1..ee16d0edb1ca0c312757a5c0402a180c3e502bd2 100644
--- a/configs/rotate/fcosr/README_en.md
+++ b/configs/rotate/fcosr/README_en.md
@@ -17,7 +17,7 @@ English | [简体中文](README.md)
| Model | Backbone | mAP | Lr Scheduler | Angle | Aug | GPU Number | images/GPU | download | config |
|:---:|:--------:|:----:|:---------:|:-----:|:--------:|:-----:|:------------:|:-------:|:------:|
-| FCOSR-M | ResNeXt-50 | 76.62 | 3x | oc | - | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) |
+| FCOSR-M | ResNeXt-50 | 76.62 | 3x | oc | RR | 4 | 4 | [model](https://paddledet.bj.bcebos.com/models/fcosr_x50_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/fcosr/fcosr_x50_3x_dota.yml) |
**Notes:**
diff --git a/configs/rotate/ppyoloe_r/README.md b/configs/rotate/ppyoloe_r/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0892942ee3c2a11132580c5a24eaf3d866b7b16c
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/README.md
@@ -0,0 +1,147 @@
+简体中文 | [English](README_en.md)
+
+# PP-YOLOE-R
+
+## 内容
+- [简介](#简介)
+- [模型库](#模型库)
+- [使用说明](#使用说明)
+- [预测部署](#预测部署)
+- [附录](#附录)
+- [引用](#引用)
+
+## 简介
+PP-YOLOE-R是一个高效的单阶段Anchor-free旋转框检测模型。基于PP-YOLOE, PP-YOLOE-R以极少的参数量和计算量为代价,引入了一系列有用的设计来提升检测精度。在DOTA 1.0数据集上,PP-YOLOE-R-l和PP-YOLOE-R-x在单尺度训练和测试的情况下分别达到了78.14和78.27 mAP,这超越了几乎所有的旋转框检测模型。通过多尺度训练和测试,PP-YOLOE-R-l和PP-YOLOE-R-x的检测精度进一步提升至80.02和80.73 mAP。在这种情况下,PP-YOLOE-R-x超越了所有的anchor-free方法并且和最先进的anchor-based的两阶段模型精度几乎相当。此外,PP-YOLOE-R-s和PP-YOLOE-R-m通过多尺度训练和测试可以达到79.42和79.71 mAP。考虑到这两个模型的参数量和计算量,其性能也非常卓越。在保持高精度的同时,PP-YOLOE-R避免使用特殊的算子,例如Deformable Convolution或Rotated RoI Align,以使其能轻松地部署在多种多样的硬件上。在1024x1024的输入分辨率下,PP-YOLOE-R-s/m/l/x在RTX 2080 Ti上使用TensorRT FP16分别能达到69.8/55.1/48.3/37.1 FPS,在Tesla V100上分别能达到114.5/86.8/69.7/50.7 FPS。更多细节可以参考我们的技术报告。
+
+
+
+
+
+PP-YOLOE-R相较于PP-YOLOE做了以下几点改动:
+- Rotated Task Alignment Learning
+- 解耦的角度预测头
+- 使用DFL进行角度预测
+- 可学习的门控单元
+- [ProbIoU损失函数](https://arxiv.org/abs/2106.06072)
+
+## 模型库
+
+| 模型 | Backbone | mAP | V100 TRT FP16 (FPS) | RTX 2080 Ti TRT FP16 (FPS) |学习率策略 | 角度表示 | 数据增广 | GPU数目 | 每GPU图片数目 | 模型下载 | 配置文件 |
+|:---:|:--------:|:----:|:--------------------:|:------------:|:--------------------:|:-----:|:--------:|:-------:|:------:|:-----------:|:------:|
+| PP-YOLOE-R-s | CRN-s | 73.82 | 114.5 | 69.8 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml) |
+| PP-YOLOE-R-s | CRN-s | 79.42 | 114.5 | 69.8 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml) |
+| PP-YOLOE-R-m | CRN-m | 77.64 | 86.8 | 55.1 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml) |
+| PP-YOLOE-R-m | CRN-m | 79.71 | 86.8 | 55.1 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml) |
+| PP-YOLOE-R-l | CRN-l | 78.14 | 69.7 | 48.3 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml) |
+| PP-YOLOE-R-l | CRN-l | 80.02 | 69.7 | 48.3 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml) |
+| PP-YOLOE-R-x | CRN-x | 78.28 | 50.7 | 37.1 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml) |
+| PP-YOLOE-R-x | CRN-x | 80.73 | 50.7 | 37.1 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml) |
+
+**注意:**
+
+- 如果**GPU卡数**或者**batch size**发生了改变,你需要按照公式 **lrnew = lrdefault * (batch_sizenew * GPU_numbernew) / (batch_sizedefault * GPU_numberdefault)** 调整学习率。
+- 模型库中的模型默认使用单尺度训练单尺度测试。如果数据增广一栏标明MS,意味着使用多尺度训练和多尺度测试。如果数据增广一栏标明RR,意味着使用RandomRotate数据增广进行训练。
+- CRN表示在PP-YOLOE中提出的CSPRepResNet
+- 速度测试使用TensorRT 8.2.3在DOTA测试集中测试2000张图片计算平均值得到。参考速度测试以复现[速度测试](#速度测试)
+
+## 使用说明
+
+参考[数据准备](../README.md#数据准备)准备数据。
+
+### 训练
+
+GPU单卡训练
+``` bash
+CUDA_VISIBLE_DEVICES=0 python tools/train.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml
+```
+
+GPU多卡训练
+``` bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml
+```
+
+### 预测
+
+执行以下命令预测单张图片,图片预测结果会默认保存在`output`文件夹下面
+``` bash
+python tools/infer.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams --infer_img=demo/P0861__1.0__1154___824.png --draw_threshold=0.5
+```
+
+### DOTA数据集评估
+
+参考[DOTA Task](https://captain-whu.github.io/DOTA/tasks.html), 评估DOTA数据集需要生成一个包含所有检测结果的zip文件,每一类的检测结果储存在一个txt文件中,txt文件中每行格式为:`image_name score x1 y1 x2 y2 x3 y3 x4 y4`。将生成的zip文件提交到[DOTA Evaluation](https://captain-whu.github.io/DOTA/evaluation.html)的Task1进行评估。你可以执行以下命令得到test数据集的预测结果:
+``` bash
+python tools/infer.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams --infer_dir=/path/to/test/images --output_dir=output_ppyoloe_r --visualize=False --save_results=True
+```
+将预测结果处理成官网评估所需要的格式:
+``` bash
+python configs/rotate/tools/generate_result.py --pred_txt_dir=output_ppyoloe_r/ --output_dir=submit/ --data_type=dota10
+
+zip -r submit.zip submit
+```
+
+### 速度测试
+速度测试需要确保**TensorRT版本大于8.2, PaddlePaddle版本大于2.4.0rc0**。使用Paddle Inference且使用TensorRT进行测速,执行以下命令:
+
+``` bash
+# 导出模型
+python tools/export_model.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams trt=True
+
+# 速度测试
+CUDA_VISIBLE_DEVICES=0 python configs/rotate/tools/inference_benchmark.py --model_dir output_inference/ppyoloe_r_crn_l_3x_dota/ --image_dir /path/to/dota/test/dir --run_mode trt_fp16
+```
+
+## 预测部署
+
+**使用Paddle Inference但不使用TensorRT**进行部署,执行以下命令:
+``` bash
+# 导出模型
+python tools/export_model.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams
+
+# 预测图片
+python deploy/python/infer.py --image_file demo/P0072__1.0__0___0.png --model_dir=output_inference/ppyoloe_r_crn_l_3x_dota --run_mode=paddle --device=gpu
+```
+
+**使用Paddle Inference且使用TensorRT**进行部署,执行以下命令:
+```
+# 导出模型
+python tools/export_model.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams trt=True
+
+# 预测图片
+python deploy/python/infer.py --image_file demo/P0072__1.0__0___0.png --model_dir=output_inference/ppyoloe_r_crn_l_3x_dota --run_mode=trt_fp16 --device=gpu
+```
+
+**注意:**
+- 使用Paddle-TRT使用确保PaddlePaddle版本大于2.4.0rc且TensorRT版本大于8.2.
+
+
+## 附录
+
+PP-YOLOE-R消融实验
+
+| 模型 | mAP | 参数量(M) | FLOPs(G) |
+| :-: | :-: | :------: | :------: |
+| Baseline | 75.61 | 50.65 | 269.09 |
+| +Rotated Task Alignment Learning | 77.24 | 50.65 | 269.09 |
+| +Decoupled Angle Prediction Head | 77.78 | 52.20 | 272.72 |
+| +Angle Prediction with DFL | 78.01 | 53.29 | 281.65 |
+| +Learnable Gating Unit for RepVGG | 78.14 | 53.29 | 281.65 |
+
+
+## 引用
+
+```
+@article{xu2022pp,
+ title={PP-YOLOE: An evolved version of YOLO},
+ author={Xu, Shangliang and Wang, Xinxin and Lv, Wenyu and Chang, Qinyao and Cui, Cheng and Deng, Kaipeng and Wang, Guanzhong and Dang, Qingqing and Wei, Shengyu and Du, Yuning and others},
+ journal={arXiv preprint arXiv:2203.16250},
+ year={2022}
+}
+
+@article{llerena2021gaussian,
+ title={Gaussian Bounding Boxes and Probabilistic Intersection-over-Union for Object Detection},
+ author={Llerena, Jeffri M and Zeni, Luis Felipe and Kristen, Lucas N and Jung, Claudio},
+ journal={arXiv preprint arXiv:2106.06072},
+ year={2021}
+}
+```
diff --git a/configs/rotate/ppyoloe_r/README_en.md b/configs/rotate/ppyoloe_r/README_en.md
new file mode 100644
index 0000000000000000000000000000000000000000..b98cc736fde4b5aa7a392f5f8972f1d869be6c16
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/README_en.md
@@ -0,0 +1,145 @@
+English | [简体中文](README.md)
+
+# PP-YOLOE-R
+
+## Content
+- [Introduction](#Introduction)
+- [Model Zoo](#Model-Zoo)
+- [Getting Start](#Getting-Start)
+- [Deployment](#Deployment)
+- [Appendix](#Appendix)
+- [Citations](#Citations)
+
+## Introduction
+PP-YOLOE-R is an efficient anchor-free rotated object detector. Based on PP-YOLOE, PP-YOLOE-R introduces a bag of useful tricks to improve detection precision at the expense of marginal parameters and computations.PP-YOLOE-R-l and PP-YOLOE-R-x achieve 78.14 and 78.27 mAP respectively on DOTA 1.0 dataset with single-scale training and testing, which outperform almost all other rotated object detectors. With multi-scale training and testing, the detection precision of PP-YOLOE-R-l and PP-YOLOE-R-x is further improved to 80.02 and 80.73 mAP. In this case, PP-YOLOE-R-x surpasses all anchor-free methods and demonstrates competitive performance to state-of-the-art anchor-based two-stage model. Moreover, PP-YOLOE-R-s and PP-YOLOE-R-m can achieve 79.42 and 79.71 mAP with multi-scale training and testing, which is an excellent result considering the parameters and GLOPS of these two models. While maintaining high precision, PP-YOLOE-R avoids using special operators, such as Deformable Convolution or Rotated RoI Align, to be deployed friendly on various hardware. At the input resolution of 1024$\times$1024, PP-YOLOE-R-s/m/l/x can reach 69.8/55.1/48.3/37.1 FPS on RTX 2080 Ti and 114.5/86.8/69.7/50.7 FPS on Tesla V100 GPU with TensorRT and FP16-precision. For more details, please refer to our technical report.
+
+
+
+
+
+Compared with PP-YOLOE, PP-YOLOE-R has made the following changes:
+- Rotated Task Alignment Learning
+- Decoupled Angle Prediction Head
+- Angle Prediction with DFL
+- Learnable Gating Unit for RepVGG
+- [ProbIoU Loss](https://arxiv.org/abs/2106.06072)
+
+## Model Zoo
+| Model | Backbone | mAP | V100 TRT FP16 (FPS) | RTX 2080 Ti TRT FP16 (FPS) | Lr Scheduler | Angle | Aug | GPU Number | images/GPU | download | config |
+|:---:|:--------:|:----:|:--------------------:|:------------:|:--------------------:|:-----:|:--------:|:-------:|:------:|:-----------:|:------:|
+| PP-YOLOE-R-s | CRN-s | 73.82 | 114.5 | 69.8 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml) |
+| PP-YOLOE-R-s | CRN-s | 79.42 | 114.5 | 69.8 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_s_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml) |
+| PP-YOLOE-R-m | CRN-m | 77.64 | 86.8 | 55.1 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml) |
+| PP-YOLOE-R-m | CRN-m | 79.71 | 86.8 | 55.1 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_m_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml) |
+| PP-YOLOE-R-l | CRN-l | 78.14 | 69.7 | 48.3 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml) |
+| PP-YOLOE-R-l | CRN-l | 80.02 | 69.7 | 48.3 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml) |
+| PP-YOLOE-R-x | CRN-x | 78.28 | 50.7 | 37.1 | 3x | oc | RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml) |
+| PP-YOLOE-R-x | CRN-x | 80.73 | 50.7 | 37.1 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_x_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml) |
+
+**Notes:**
+
+- if **GPU number** or **mini-batch size** is changed, **learning rate** should be adjusted according to the formula **lrnew = lrdefault * (batch_sizenew * GPU_numbernew) / (batch_sizedefault * GPU_numberdefault)**.
+- Models in model zoo is trained and tested with single scale by default. If `MS` is indicated in the data augmentation column, it means that multi-scale training and multi-scale testing are used. If `RR` is indicated in the data augmentation column, it means that RandomRotate data augmentation is used for training.
+- CRN denotes CSPRepResNet proposed in PP-YOLOE
+- Speed is calculated and averaged by testing 2000 images on the DOTA test dataset. Refer to [Speed testing](#Speed-testing) to reproduce the results.
+
+## Getting Start
+
+Refer to [Data-Preparation](../README_en.md#Data-Preparation) to prepare data.
+
+### Training
+
+Single GPU Training
+``` bash
+CUDA_VISIBLE_DEVICES=0 python tools/train.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml
+```
+
+Multiple GPUs Training
+``` bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml
+```
+
+### Inference
+
+Run the follow command to infer single image, the result of inference will be saved in `output` directory by default.
+
+``` bash
+python tools/infer.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams --infer_img=demo/P0861__1.0__1154___824.png --draw_threshold=0.5
+```
+
+### Evaluation on DOTA Dataset
+Refering to [DOTA Task](https://captain-whu.github.io/DOTA/tasks.html), You need to submit a zip file containing results for all test images for evaluation. The detection results of each category are stored in a txt file, each line of which is in the following format
+`image_id score x1 y1 x2 y2 x3 y3 x4 y4`. To evaluate, you should submit the generated zip file to the Task1 of [DOTA Evaluation](https://captain-whu.github.io/DOTA/evaluation.html). You can run the following command to get the inference results of test dataset:
+``` bash
+python tools/infer.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams --infer_dir=/path/to/test/images --output_dir=output_ppyoloe_r --visualize=False --save_results=True
+```
+Process the prediction results into the format required for the official website evaluation:
+``` bash
+python configs/rotate/tools/generate_result.py --pred_txt_dir=output_ppyoloe_r/ --output_dir=submit/ --data_type=dota10
+
+zip -r submit.zip submit
+```
+
+### Speed testing
+
+To test speed, make sure that **the version of TensorRT is larger than 8.2 and the version of PaddlePaddle is larger than 2.4.0rc**. Using Paddle Inference with TensorRT to test speed, run following command
+
+``` bash
+# export inference model
+python tools/export_model.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams trt=True
+
+# speed testing
+CUDA_VISIBLE_DEVICES=0 python configs/rotate/tools/inference_benchmark.py --model_dir output_inference/ppyoloe_r_crn_l_3x_dota/ --image_dir /path/to/dota/test/dir --run_mode trt_fp16
+```
+
+## Deployment
+
+**Using Paddle Inference without TensorRT** to for deployment, run following command
+
+``` bash
+# export inference model
+python tools/export_model.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams
+
+# inference single image
+python deploy/python/infer.py --image_file demo/P0072__1.0__0___0.png --model_dir=output_inference/ppyoloe_r_crn_l_3x_dota --run_mode=paddle --device=gpu
+```
+
+**Using Paddle Inference with TensorRT** to for deployment, run following command
+
+``` bash
+# export inference model
+python tools/export_model.py -c configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota.pdparams trt=True
+
+# inference single image
+python deploy/python/infer.py --image_file demo/P0072__1.0__0___0.png --model_dir=output_inference/ppyoloe_r_crn_l_3x_dota --run_mode=trt_fp16 --device=gpu
+```
+
+## Appendix
+
+Ablation experiments of PP-YOLOE-R
+
+| Model | mAP | Params(M) | FLOPs(G) |
+| :-: | :-: | :------: | :------: |
+| Baseline | 75.61 | 50.65 | 269.09 |
+| +Rotated Task Alignment Learning | 77.24 | 50.65 | 269.09 |
+| +Decoupled Angle Prediction Head | 77.78 | 52.20 | 272.72 |
+| +Angle Prediction with DFL | 78.01 | 53.29 | 281.65 |
+| +Learnable Gating Unit for RepVGG | 78.14 | 53.29 | 281.65 |
+
+## Citations
+
+```
+@article{xu2022pp,
+ title={PP-YOLOE: An evolved version of YOLO},
+ author={Xu, Shangliang and Wang, Xinxin and Lv, Wenyu and Chang, Qinyao and Cui, Cheng and Deng, Kaipeng and Wang, Guanzhong and Dang, Qingqing and Wei, Shengyu and Du, Yuning and others},
+ journal={arXiv preprint arXiv:2203.16250},
+ year={2022}
+}
+
+@article{llerena2021gaussian,
+ title={Gaussian Bounding Boxes and Probabilistic Intersection-over-Union for Object Detection},
+ author={Llerena, Jeffri M and Zeni, Luis Felipe and Kristen, Lucas N and Jung, Claudio},
+ journal={arXiv preprint arXiv:2106.06072},
+ year={2021}
+}
+```
diff --git a/configs/rotate/ppyoloe_r/_base_/optimizer_3x.yml b/configs/rotate/ppyoloe_r/_base_/optimizer_3x.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1cdad4beb093deeef0b6918b88b81fc5964e95ce
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/_base_/optimizer_3x.yml
@@ -0,0 +1,19 @@
+epoch: 36
+
+LearningRate:
+ base_lr: 0.008
+ schedulers:
+ - !CosineDecay
+ max_epochs: 44
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 1000
+
+OptimizerBuilder:
+ clip_grad_by_norm: 35.
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/rotate/ppyoloe_r/_base_/ppyoloe_r_crn.yml b/configs/rotate/ppyoloe_r/_base_/ppyoloe_r_crn.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ab5bdb50aa731e3af664b68aa52b3c7293d715e8
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/_base_/ppyoloe_r_crn.yml
@@ -0,0 +1,49 @@
+architecture: YOLOv3
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+ backbone: CSPResNet
+ neck: CustomCSPPAN
+ yolo_head: PPYOLOERHead
+ post_process: ~
+
+CSPResNet:
+ layers: [3, 6, 6, 3]
+ channels: [64, 128, 256, 512, 1024]
+ return_idx: [1, 2, 3]
+ use_large_stem: True
+ use_alpha: True
+
+CustomCSPPAN:
+ out_channels: [768, 384, 192]
+ stage_num: 1
+ block_num: 3
+ act: 'swish'
+ spp: true
+ use_alpha: True
+
+PPYOLOERHead:
+ fpn_strides: [32, 16, 8]
+ grid_cell_offset: 0.5
+ use_varifocal_loss: true
+ static_assigner_epoch: -1
+ loss_weight: {class: 1.0, iou: 2.5, dfl: 0.05}
+ static_assigner:
+ name: FCOSRAssigner
+ factor: 12
+ threshold: 0.23
+ boundary: [[512, 10000], [256, 512], [-1, 256]]
+ assigner:
+ name: RotatedTaskAlignedAssigner
+ topk: 13
+ alpha: 1.0
+ beta: 6.0
+ nms:
+ name: MultiClassNMS
+ nms_top_k: 2000
+ keep_top_k: -1
+ score_threshold: 0.1
+ nms_threshold: 0.1
+ normalized: False
diff --git a/configs/rotate/ppyoloe_r/_base_/ppyoloe_r_reader.yml b/configs/rotate/ppyoloe_r/_base_/ppyoloe_r_reader.yml
new file mode 100644
index 0000000000000000000000000000000000000000..aa9de88b60d62a09b38bfbe2a4b7af55c43626b1
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/_base_/ppyoloe_r_reader.yml
@@ -0,0 +1,45 @@
+worker_num: 4
+image_height: &image_height 1024
+image_width: &image_width 1024
+image_size: &image_size [*image_height, *image_width]
+
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - Poly2Array: {}
+ - RandomRFlip: {}
+ - RandomRRotate: {angle_mode: 'value', angle: [0, 90, 180, -90]}
+ - RandomRRotate: {angle_mode: 'value', angle: [30, 60], rotate_prob: 0.5}
+ - RResize: {target_size: *image_size, keep_ratio: True, interp: 2}
+ - Poly2RBox: {filter_threshold: 2, filter_mode: 'edge', rbox_type: 'oc'}
+ batch_transforms:
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ - PadRGT: {}
+ - PadBatch: {pad_to_stride: 32}
+ batch_size: 2
+ shuffle: true
+ drop_last: true
+ use_shared_memory: true
+ collate_batch: true
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Poly2Array: {}
+ - RResize: {target_size: *image_size, keep_ratio: True, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32}
+ batch_size: 2
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: *image_size, keep_ratio: True, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32}
+ batch_size: 8
diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b019d736c19b35423cb536eea0cf0e55036c2af7
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ '../../datasets/dota.yml',
+ '../../runtime.yml',
+ '_base_/optimizer_3x.yml',
+ '_base_/ppyoloe_r_reader.yml',
+ '_base_/ppyoloe_r_crn.yml'
+]
+
+log_iter: 50
+snapshot_epoch: 1
+weights: output/ppyoloe_r_crn_l_3x_dota/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_l_pretrained.pdparams
+depth_mult: 1.0
+width_mult: 1.0
diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a1411a3153dfae89d722d4895039b15370094c45
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ '../../datasets/dota_ms.yml',
+ '../../runtime.yml',
+ '_base_/optimizer_3x.yml',
+ '_base_/ppyoloe_r_reader.yml',
+ '_base_/ppyoloe_r_crn.yml'
+]
+
+log_iter: 50
+snapshot_epoch: 1
+weights: output/ppyoloe_r_crn_l_3x_dota/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_l_pretrained.pdparams
+depth_mult: 1.0
+width_mult: 1.0
diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml
new file mode 100644
index 0000000000000000000000000000000000000000..755cf3f4e5bb93072779cf83344124c6d28cb925
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ '../../datasets/dota.yml',
+ '../../runtime.yml',
+ '_base_/optimizer_3x.yml',
+ '_base_/ppyoloe_r_reader.yml',
+ '_base_/ppyoloe_r_crn.yml'
+]
+
+log_iter: 50
+snapshot_epoch: 1
+weights: output/ppyoloe_r_crn_m_3x_dota/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_m_pretrained.pdparams
+depth_mult: 0.67
+width_mult: 0.75
diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d885b459ff61f5ab7b3dcdcf55b80f1d6a3d6a4f
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_m_3x_dota_ms.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ '../../datasets/dota_ms.yml',
+ '../../runtime.yml',
+ '_base_/optimizer_3x.yml',
+ '_base_/ppyoloe_r_reader.yml',
+ '_base_/ppyoloe_r_crn.yml'
+]
+
+log_iter: 50
+snapshot_epoch: 1
+weights: output/ppyoloe_r_crn_m_3x_dota/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_m_pretrained.pdparams
+depth_mult: 0.67
+width_mult: 0.75
diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a227f18ac2ddb93e7af79d2452ea7e043cfe3eb0
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ '../../datasets/dota.yml',
+ '../../runtime.yml',
+ '_base_/optimizer_3x.yml',
+ '_base_/ppyoloe_r_reader.yml',
+ '_base_/ppyoloe_r_crn.yml'
+]
+
+log_iter: 50
+snapshot_epoch: 1
+weights: output/ppyoloe_r_crn_s_3x_dota/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_s_pretrained.pdparams
+depth_mult: 0.33
+width_mult: 0.50
diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml
new file mode 100644
index 0000000000000000000000000000000000000000..921a9d571b730d3f57865e51baca6d37080d42a1
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_dota_ms.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ '../../datasets/dota_ms.yml',
+ '../../runtime.yml',
+ '_base_/optimizer_3x.yml',
+ '_base_/ppyoloe_r_reader.yml',
+ '_base_/ppyoloe_r_crn.yml'
+]
+
+log_iter: 50
+snapshot_epoch: 1
+weights: output/ppyoloe_r_crn_s_3x_dota/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_s_pretrained.pdparams
+depth_mult: 0.33
+width_mult: 0.50
diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d81b5ef9861fcef9e044c792894f671886037182
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ '../../datasets/dota.yml',
+ '../../runtime.yml',
+ '_base_/optimizer_3x.yml',
+ '_base_/ppyoloe_r_reader.yml',
+ '_base_/ppyoloe_r_crn.yml'
+]
+
+log_iter: 50
+snapshot_epoch: 1
+weights: output/ppyoloe_r_crn_x_3x_dota/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_x_pretrained.pdparams
+depth_mult: 1.33
+width_mult: 1.25
diff --git a/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d99cdb0787109cdd88054d15967ddf4bfbb2b52f
--- /dev/null
+++ b/configs/rotate/ppyoloe_r/ppyoloe_r_crn_x_3x_dota_ms.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ '../../datasets/dota_ms.yml',
+ '../../runtime.yml',
+ '_base_/optimizer_3x.yml',
+ '_base_/ppyoloe_r_reader.yml',
+ '_base_/ppyoloe_r_crn.yml'
+]
+
+log_iter: 50
+snapshot_epoch: 1
+weights: output/ppyoloe_r_crn_x_3x_dota/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_x_pretrained.pdparams
+depth_mult: 1.33
+width_mult: 1.25
diff --git a/configs/rotate/tools/inference_benchmark.py b/configs/rotate/tools/inference_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..dcce2d2feafacc2d00cae4b16252265d92776c91
--- /dev/null
+++ b/configs/rotate/tools/inference_benchmark.py
@@ -0,0 +1,356 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import six
+import glob
+import time
+import yaml
+import argparse
+import cv2
+import numpy as np
+
+import paddle
+import paddle.version as paddle_version
+from paddle.inference import Config, create_predictor, PrecisionType, get_trt_runtime_version
+
+
+TUNED_TRT_DYNAMIC_MODELS = {'DETR'}
+
+def check_version(version='2.2'):
+ err = "PaddlePaddle version {} or higher is required, " \
+ "or a suitable develop version is satisfied as well. \n" \
+ "Please make sure the version is good with your code.".format(version)
+
+ version_installed = [
+ paddle_version.major, paddle_version.minor, paddle_version.patch,
+ paddle_version.rc
+ ]
+
+ if version_installed == ['0', '0', '0', '0']:
+ return
+
+ version_split = version.split('.')
+
+ length = min(len(version_installed), len(version_split))
+ for i in six.moves.range(length):
+ if version_installed[i] > version_split[i]:
+ return
+ if version_installed[i] < version_split[i]:
+ raise Exception(err)
+
+
+def check_trt_version(version='8.2'):
+ err = "TensorRT version {} or higher is required," \
+ "Please make sure the version is good with your code.".format(version)
+ version_split = list(map(int, version.split('.')))
+ version_installed = get_trt_runtime_version()
+ length = min(len(version_installed), len(version_split))
+ for i in six.moves.range(length):
+ if version_installed[i] > version_split[i]:
+ return
+ if version_installed[i] < version_split[i]:
+ raise Exception(err)
+
+
+# preprocess ops
+def decode_image(im_file, im_info):
+ if isinstance(im_file, str):
+ with open(im_file, 'rb') as f:
+ im_read = f.read()
+ data = np.frombuffer(im_read, dtype='uint8')
+ im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
+ im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+ else:
+ im = im_file
+ im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
+ im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
+ return im, im_info
+
+class Resize(object):
+
+ def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
+ if isinstance(target_size, int):
+ target_size = [target_size, target_size]
+ self.target_size = target_size
+ self.keep_ratio = keep_ratio
+ self.interp = interp
+
+ def __call__(self, im, im_info):
+ assert len(self.target_size) == 2
+ assert self.target_size[0] > 0 and self.target_size[1] > 0
+ im_channel = im.shape[2]
+ im_scale_y, im_scale_x = self.generate_scale(im)
+ im = cv2.resize(
+ im,
+ None,
+ None,
+ fx=im_scale_x,
+ fy=im_scale_y,
+ interpolation=self.interp)
+ im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
+ im_info['scale_factor'] = np.array(
+ [im_scale_y, im_scale_x]).astype('float32')
+ return im, im_info
+
+ def generate_scale(self, im):
+ origin_shape = im.shape[:2]
+ im_c = im.shape[2]
+ if self.keep_ratio:
+ im_size_min = np.min(origin_shape)
+ im_size_max = np.max(origin_shape)
+ target_size_min = np.min(self.target_size)
+ target_size_max = np.max(self.target_size)
+ im_scale = float(target_size_min) / float(im_size_min)
+ if np.round(im_scale * im_size_max) > target_size_max:
+ im_scale = float(target_size_max) / float(im_size_max)
+ im_scale_x = im_scale
+ im_scale_y = im_scale
+ else:
+ resize_h, resize_w = self.target_size
+ im_scale_y = resize_h / float(origin_shape[0])
+ im_scale_x = resize_w / float(origin_shape[1])
+ return im_scale_y, im_scale_x
+
+class Permute(object):
+
+ def __init__(self, ):
+ super(Permute, self).__init__()
+
+ def __call__(self, im, im_info):
+ im = im.transpose((2, 0, 1))
+ return im, im_info
+
+class NormalizeImage(object):
+ def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
+ self.mean = mean
+ self.std = std
+ self.is_scale = is_scale
+ self.norm_type = norm_type
+
+ def __call__(self, im, im_info):
+ im = im.astype(np.float32, copy=False)
+ if self.is_scale:
+ scale = 1.0 / 255.0
+ im *= scale
+
+ if self.norm_type == 'mean_std':
+ mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+ std = np.array(self.std)[np.newaxis, np.newaxis, :]
+ im -= mean
+ im /= std
+ return im, im_info
+
+
+class PadStride(object):
+
+ def __init__(self, stride=0):
+ self.coarsest_stride = stride
+
+ def __call__(self, im, im_info):
+ coarsest_stride = self.coarsest_stride
+ if coarsest_stride <= 0:
+ return im, im_info
+ im_c, im_h, im_w = im.shape
+ pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
+ pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
+ padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
+ padding_im[:, :im_h, :im_w] = im
+ return padding_im, im_info
+
+
+def preprocess(im, preprocess_ops):
+ # process image by preprocess_ops
+ im_info = {
+ 'scale_factor': np.array(
+ [1., 1.], dtype=np.float32),
+ 'im_shape': None,
+ }
+ im, im_info = decode_image(im, im_info)
+ for operator in preprocess_ops:
+ im, im_info = operator(im, im_info)
+ return im, im_info
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--model_dir', type=str, help='directory of inference model')
+ parser.add_argument('--run_mode', type=str, default='paddle', help='running mode')
+ parser.add_argument('--batch_size', type=int, default=1, help='batch size')
+ parser.add_argument('--image_dir', type=str, default='/paddle/data/DOTA_1024_ss/test1024/images', help='directory of test images')
+ parser.add_argument('--warmup_iter', type=int, default=5, help='num of warmup iters')
+ parser.add_argument('--total_iter', type=int, default=2000, help='num of total iters')
+ parser.add_argument('--log_iter', type=int, default=50, help='num of log interval')
+ parser.add_argument('--tuned_trt_shape_file', type=str, default='shape_range_info.pbtxt', help='dynamic shape range info')
+ args = parser.parse_args()
+ return args
+
+
+def init_predictor(FLAGS):
+ model_dir, run_mode, batch_size = FLAGS.model_dir, FLAGS.run_mode, FLAGS.batch_size
+ yaml_file = os.path.join(model_dir, 'infer_cfg.yml')
+ with open(yaml_file) as f:
+ yml_conf = yaml.safe_load(f)
+
+ config = Config(
+ os.path.join(model_dir, 'model.pdmodel'),
+ os.path.join(model_dir, 'model.pdiparams'))
+
+ # initial GPU memory(M), device ID
+ config.enable_use_gpu(200, 0)
+ # optimize graph and fuse op
+ config.switch_ir_optim(True)
+
+ precision_map = {
+ 'trt_int8': Config.Precision.Int8,
+ 'trt_fp32': Config.Precision.Float32,
+ 'trt_fp16': Config.Precision.Half
+ }
+
+ arch = yml_conf['arch']
+ tuned_trt_shape_file = os.path.join(model_dir, FLAGS.tuned_trt_shape_file)
+
+ if run_mode in precision_map.keys():
+ if arch in TUNED_TRT_DYNAMIC_MODELS and not os.path.exists(tuned_trt_shape_file):
+ print('dynamic shape range info is saved in {}. After that, rerun the code'.format(tuned_trt_shape_file))
+ config.collect_shape_range_info(tuned_trt_shape_file)
+ config.enable_tensorrt_engine(
+ workspace_size=(1 << 25) * batch_size,
+ max_batch_size=batch_size,
+ min_subgraph_size=yml_conf['min_subgraph_size'],
+ precision_mode=precision_map[run_mode],
+ use_static=True,
+ use_calib_mode=False)
+
+ if yml_conf['use_dynamic_shape']:
+ if arch in TUNED_TRT_DYNAMIC_MODELS and os.path.exists(tuned_trt_shape_file):
+ config.enable_tuned_tensorrt_dynamic_shape(tuned_trt_shape_file, True)
+ else:
+ min_input_shape = {
+ 'image': [batch_size, 3, 640, 640],
+ 'scale_factor': [batch_size, 2]
+ }
+ max_input_shape = {
+ 'image': [batch_size, 3, 1280, 1280],
+ 'scale_factor': [batch_size, 2]
+ }
+ opt_input_shape = {
+ 'image': [batch_size, 3, 1024, 1024],
+ 'scale_factor': [batch_size, 2]
+ }
+ config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
+ opt_input_shape)
+
+ # disable print log when predict
+ config.disable_glog_info()
+ # enable shared memory
+ config.enable_memory_optim()
+ # disable feed, fetch OP, needed by zero_copy_run
+ config.switch_use_feed_fetch_ops(False)
+ predictor = create_predictor(config)
+ return predictor, yml_conf
+
+def create_preprocess_ops(yml_conf):
+ preprocess_ops = []
+ for op_info in yml_conf['Preprocess']:
+ new_op_info = op_info.copy()
+ op_type = new_op_info.pop('type')
+ preprocess_ops.append(eval(op_type)(**new_op_info))
+ return preprocess_ops
+
+
+def get_test_images(image_dir):
+ images = set()
+ infer_dir = os.path.abspath(image_dir)
+ exts = ['jpg', 'jpeg', 'png', 'bmp']
+ exts += [ext.upper() for ext in exts]
+ for ext in exts:
+ images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
+ images = list(images)
+ return images
+
+
+def create_inputs(image_files, preprocess_ops):
+ inputs = dict()
+ im_list, im_info_list = [], []
+ for im_path in image_files:
+ im, im_info = preprocess(im_path, preprocess_ops)
+ im_list.append(im)
+ im_info_list.append(im_info)
+
+ inputs['im_shape'] = np.stack([e['im_shape'] for e in im_info_list], axis=0).astype('float32')
+ inputs['scale_factor'] = np.stack([e['scale_factor'] for e in im_info_list], axis=0).astype('float32')
+ inputs['image'] = np.stack(im_list, axis=0).astype('float32')
+ return inputs
+
+
+def measure_speed(FLAGS):
+ predictor, yml_conf = init_predictor(FLAGS)
+ input_names = predictor.get_input_names()
+ preprocess_ops = create_preprocess_ops(yml_conf)
+
+ image_files = get_test_images(FLAGS.image_dir)
+
+ batch_size = FLAGS.batch_size
+ warmup_iter, log_iter, total_iter = FLAGS.warmup_iter, FLAGS.log_iter, FLAGS.total_iter
+
+ total_time = 0
+ fps = 0
+ for i in range(0, total_iter, batch_size):
+ # make data ready
+ inputs = create_inputs(image_files[i:i + batch_size], preprocess_ops)
+ for name in input_names:
+ input_tensor = predictor.get_input_handle(name)
+ input_tensor.copy_from_cpu(inputs[name])
+
+ paddle.device.cuda.synchronize()
+ # start running
+ start_time = time.perf_counter()
+ predictor.run()
+ paddle.device.cuda.synchronize()
+
+ if i >= warmup_iter:
+ total_time += time.perf_counter() - start_time
+ if (i + 1) % log_iter == 0:
+ fps = (i + 1 - warmup_iter) / total_time
+ print(
+ f'Done image [{i + 1:<3}/ {total_iter}], '
+ f'fps: {fps:.1f} img / s, '
+ f'times per image: {1000 / fps:.1f} ms / img',
+ flush=True)
+
+ if (i + 1) == total_iter:
+ fps = (i + 1 - warmup_iter) / total_time
+ print(
+ f'Overall fps: {fps:.1f} img / s, '
+ f'times per image: {1000 / fps:.1f} ms / img',
+ flush=True)
+ break
+
+if __name__ == '__main__':
+ FLAGS = parse_args()
+ check_version('2.4')
+ check_trt_version('8.2')
+ measure_speed(FLAGS)
+
+
+
+
+
+
diff --git a/docs/MODEL_ZOO_cn.md b/docs/MODEL_ZOO_cn.md
index 50a835b50da1934876d5824a1b3d08ac32ed2490..7235b009ddb2d1264377a37d835d8cf5b9106b0f 100644
--- a/docs/MODEL_ZOO_cn.md
+++ b/docs/MODEL_ZOO_cn.md
@@ -110,9 +110,7 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型
## 旋转框检测
-### S2ANet
-
-请参考[S2ANet](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dota/)
+[旋转框检测模型库](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate)
## 关键点检测
diff --git a/docs/MODEL_ZOO_en.md b/docs/MODEL_ZOO_en.md
index 599121ac08e77b4b8c346ef5f179d414c2ebc304..c26e5c1db155a63857cab458f426123583218aea 100644
--- a/docs/MODEL_ZOO_en.md
+++ b/docs/MODEL_ZOO_en.md
@@ -107,12 +107,9 @@ Please refer to[YOLOv6](https://github.com/nemonameless/PaddleDetection_YOLOSeri
Please refer to[YOLOv7](https://github.com/nemonameless/PaddleDetection_YOLOSeries/tree/develop/configs/yolov7)
-## Rotating frame detection
-
-### S2ANet
-
-Please refer to[S2ANet](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dota/)
+## Rotated Object detection
+[Model Zoo for Rotated Object Detection](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate)
## KeyPoint Detection
diff --git a/docs/images/ppyoloe_r_map_fps.png b/docs/images/ppyoloe_r_map_fps.png
new file mode 100644
index 0000000000000000000000000000000000000000..2d4553b97e96a63c428b08a2da9d0f8880e72be8
Binary files /dev/null and b/docs/images/ppyoloe_r_map_fps.png differ
diff --git a/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc b/ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cc
similarity index 68%
rename from ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc
rename to ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cc
index 2c3c58b606c22607272d6d37877d11399d7542d9..b16e8c1f2ef93c322fe062af1735189d3eb98f47 100644
--- a/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc
+++ b/ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cc
@@ -13,14 +13,14 @@
// limitations under the License.
//
// The code is based on
-// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/
+#include "../rbox_iou/rbox_iou_utils.h"
#include "paddle/extension.h"
-#include "rbox_iou_op.h"
template
void matched_rbox_iou_cpu_kernel(const int rbox_num, const T *rbox1_data_ptr,
- const T *rbox2_data_ptr, T *output_data_ptr) {
+ const T *rbox2_data_ptr, T *output_data_ptr) {
int i;
for (i = 0; i < rbox_num; i++) {
@@ -30,42 +30,43 @@ void matched_rbox_iou_cpu_kernel(const int rbox_num, const T *rbox1_data_ptr,
}
#define CHECK_INPUT_CPU(x) \
- PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
+ PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.")
-std::vector MatchedRboxIouCPUForward(const paddle::Tensor &rbox1,
- const paddle::Tensor &rbox2) {
+std::vector
+MatchedRboxIouCPUForward(const paddle::Tensor &rbox1,
+ const paddle::Tensor &rbox2) {
CHECK_INPUT_CPU(rbox1);
CHECK_INPUT_CPU(rbox2);
PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim");
auto rbox_num = rbox1.shape()[0];
- auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox_num});
+ auto output = paddle::empty({rbox_num}, rbox1.dtype(), paddle::CPUPlace());
- PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "rotated_iou_cpu_kernel", ([&] {
+ PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "matched_rbox_iou_cpu_kernel", ([&] {
matched_rbox_iou_cpu_kernel(
rbox_num, rbox1.data(),
- rbox2.data(),
- output.mutable_data());
+ rbox2.data(), output.data());
}));
return {output};
}
#ifdef PADDLE_WITH_CUDA
-std::vector MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
- const paddle::Tensor &rbox2);
+std::vector
+MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
+ const paddle::Tensor &rbox2);
#endif
#define CHECK_INPUT_SAME(x1, x2) \
PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
std::vector MatchedRboxIouForward(const paddle::Tensor &rbox1,
- const paddle::Tensor &rbox2) {
+ const paddle::Tensor &rbox2) {
CHECK_INPUT_SAME(rbox1, rbox2);
- if (rbox1.place() == paddle::PlaceType::kCPU) {
+ if (rbox1.is_cpu()) {
return MatchedRboxIouCPUForward(rbox1, rbox2);
#ifdef PADDLE_WITH_CUDA
- } else if (rbox1.place() == paddle::PlaceType::kGPU) {
+ } else if (rbox1.is_gpu()) {
return MatchedRboxIouCUDAForward(rbox1, rbox2);
#endif
}
@@ -73,12 +74,12 @@ std::vector MatchedRboxIouForward(const paddle::Tensor &rbox1,
std::vector>
MatchedRboxIouInferShape(std::vector rbox1_shape,
- std::vector rbox2_shape) {
+ std::vector rbox2_shape) {
return {{rbox1_shape[0]}};
}
std::vector MatchedRboxIouInferDtype(paddle::DataType t1,
- paddle::DataType t2) {
+ paddle::DataType t2) {
return {t1};
}
diff --git a/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu b/ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cu
similarity index 72%
rename from ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu
rename to ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cu
index 8d03ecce6a775162980746adf727738a6beb102b..53454d106392f208e72a5e1d1fd6e9bcf609927f 100644
--- a/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu
+++ b/ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cu
@@ -13,21 +13,15 @@
// limitations under the License.
//
// The code is based on
-// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/
+#include "../rbox_iou/rbox_iou_utils.h"
#include "paddle/extension.h"
-#include "rbox_iou_op.h"
-
-/**
- Computes ceil(a / b)
-*/
-
-static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; }
template
__global__ void
matched_rbox_iou_cuda_kernel(const int rbox_num, const T *rbox1_data_ptr,
- const T *rbox2_data_ptr, T *output_data_ptr) {
+ const T *rbox2_data_ptr, T *output_data_ptr) {
for (int tid = blockIdx.x * blockDim.x + threadIdx.x; tid < rbox_num;
tid += blockDim.x * gridDim.x) {
output_data_ptr[tid] =
@@ -36,17 +30,18 @@ matched_rbox_iou_cuda_kernel(const int rbox_num, const T *rbox1_data_ptr,
}
#define CHECK_INPUT_GPU(x) \
- PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.")
+ PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.")
-std::vector MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
- const paddle::Tensor &rbox2) {
+std::vector
+MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
+ const paddle::Tensor &rbox2) {
CHECK_INPUT_GPU(rbox1);
CHECK_INPUT_GPU(rbox2);
PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim");
auto rbox_num = rbox1.shape()[0];
- auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox_num});
+ auto output = paddle::empty({rbox_num}, rbox1.dtype(), paddle::GPUPlace());
const int thread_per_block = 512;
const int block_per_grid = CeilDiv(rbox_num, thread_per_block);
@@ -56,7 +51,7 @@ std::vector MatchedRboxIouCUDAForward(const paddle::Tensor &rbox
matched_rbox_iou_cuda_kernel<
data_t><<>>(
rbox_num, rbox1.data(), rbox2.data(),
- output.mutable_data());
+ output.data());
}));
return {output};
diff --git a/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cc b/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cc
new file mode 100644
index 0000000000000000000000000000000000000000..44f4eb62b851736176f7fade903248e6c95c6d83
--- /dev/null
+++ b/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cc
@@ -0,0 +1,121 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "../rbox_iou/rbox_iou_utils.h"
+#include "paddle/extension.h"
+
+template
+void nms_rotated_cpu_kernel(const T *boxes_data, const float threshold,
+ const int64_t num_boxes, int64_t *num_keep_boxes,
+ int64_t *output_data) {
+
+ int num_masks = CeilDiv(num_boxes, 64);
+ std::vector masks(num_masks, 0);
+ for (int64_t i = 0; i < num_boxes; ++i) {
+ if (masks[i / 64] & 1ULL << (i % 64))
+ continue;
+ T box_1[5];
+ for (int k = 0; k < 5; ++k) {
+ box_1[k] = boxes_data[i * 5 + k];
+ }
+ for (int64_t j = i + 1; j < num_boxes; ++j) {
+ if (masks[j / 64] & 1ULL << (j % 64))
+ continue;
+ T box_2[5];
+ for (int k = 0; k < 5; ++k) {
+ box_2[k] = boxes_data[j * 5 + k];
+ }
+ if (rbox_iou_single(box_1, box_2) > threshold) {
+ masks[j / 64] |= 1ULL << (j % 64);
+ }
+ }
+ }
+ int64_t output_data_idx = 0;
+ for (int64_t i = 0; i < num_boxes; ++i) {
+ if (masks[i / 64] & 1ULL << (i % 64))
+ continue;
+ output_data[output_data_idx++] = i;
+ }
+ *num_keep_boxes = output_data_idx;
+ for (; output_data_idx < num_boxes; ++output_data_idx) {
+ output_data[output_data_idx] = 0;
+ }
+}
+
+#define CHECK_INPUT_CPU(x) \
+ PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.")
+
+std::vector NMSRotatedCPUForward(const paddle::Tensor &boxes,
+ const paddle::Tensor &scores,
+ float threshold) {
+ CHECK_INPUT_CPU(boxes);
+ CHECK_INPUT_CPU(scores);
+
+ auto num_boxes = boxes.shape()[0];
+
+ auto order_t =
+ std::get<1>(paddle::argsort(scores, /* axis=*/0, /* descending=*/true));
+ auto boxes_sorted = paddle::gather(boxes, order_t, /* axis=*/0);
+
+ auto keep =
+ paddle::empty({num_boxes}, paddle::DataType::INT64, paddle::CPUPlace());
+ int64_t num_keep_boxes = 0;
+
+ PD_DISPATCH_FLOATING_TYPES(boxes.type(), "nms_rotated_cpu_kernel", ([&] {
+ nms_rotated_cpu_kernel(
+ boxes_sorted.data(), threshold,
+ num_boxes, &num_keep_boxes,
+ keep.data());
+ }));
+
+ keep = keep.slice(0, num_keep_boxes);
+ return {paddle::gather(order_t, keep, /* axis=*/0)};
+}
+
+#ifdef PADDLE_WITH_CUDA
+std::vector NMSRotatedCUDAForward(const paddle::Tensor &boxes,
+ const paddle::Tensor &scores,
+ float threshold);
+#endif
+
+std::vector NMSRotatedForward(const paddle::Tensor &boxes,
+ const paddle::Tensor &scores,
+ float threshold) {
+ if (boxes.is_cpu()) {
+ return NMSRotatedCPUForward(boxes, scores, threshold);
+#ifdef PADDLE_WITH_CUDA
+ } else if (boxes.is_gpu()) {
+ return NMSRotatedCUDAForward(boxes, scores, threshold);
+#endif
+ }
+}
+
+std::vector>
+NMSRotatedInferShape(std::vector boxes_shape,
+ std::vector scores_shape) {
+ return {{-1}};
+}
+
+std::vector NMSRotatedInferDtype(paddle::DataType t1,
+ paddle::DataType t2) {
+ return {paddle::DataType::INT64};
+}
+
+PD_BUILD_OP(nms_rotated)
+ .Inputs({"Boxes", "Scores"})
+ .Outputs({"Output"})
+ .Attrs({"threshold: float"})
+ .SetKernelFn(PD_KERNEL(NMSRotatedForward))
+ .SetInferShapeFn(PD_INFER_SHAPE(NMSRotatedInferShape))
+ .SetInferDtypeFn(PD_INFER_DTYPE(NMSRotatedInferDtype));
\ No newline at end of file
diff --git a/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cu b/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cu
new file mode 100644
index 0000000000000000000000000000000000000000..d20dddb5739619de9fc616c1e0d59941952e73c5
--- /dev/null
+++ b/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cu
@@ -0,0 +1,96 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "../rbox_iou/rbox_iou_utils.h"
+#include "paddle/extension.h"
+
+static const int64_t threadsPerBlock = sizeof(int64_t) * 8;
+
+template
+__global__ void
+nms_rotated_cuda_kernel(const T *boxes_data, const float threshold,
+ const int64_t num_boxes, int64_t *masks) {
+ auto raw_start = blockIdx.y;
+ auto col_start = blockIdx.x;
+ if (raw_start > col_start)
+ return;
+ const int raw_last_storage =
+ min(num_boxes - raw_start * threadsPerBlock, threadsPerBlock);
+ const int col_last_storage =
+ min(num_boxes - col_start * threadsPerBlock, threadsPerBlock);
+ if (threadIdx.x < raw_last_storage) {
+ int64_t mask = 0;
+ auto current_box_idx = raw_start * threadsPerBlock + threadIdx.x;
+ const T *current_box = boxes_data + current_box_idx * 5;
+ for (int i = 0; i < col_last_storage; ++i) {
+ const T *target_box = boxes_data + (col_start * threadsPerBlock + i) * 5;
+ if (rbox_iou_single(current_box, target_box) > threshold) {
+ mask |= 1ULL << i;
+ }
+ }
+ const int blocks_per_line = CeilDiv(num_boxes, threadsPerBlock);
+ masks[current_box_idx * blocks_per_line + col_start] = mask;
+ }
+}
+
+#define CHECK_INPUT_GPU(x) \
+ PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.")
+
+std::vector NMSRotatedCUDAForward(const paddle::Tensor &boxes,
+ const paddle::Tensor &scores,
+ float threshold) {
+ CHECK_INPUT_GPU(boxes);
+ CHECK_INPUT_GPU(scores);
+
+ auto num_boxes = boxes.shape()[0];
+ auto order_t =
+ std::get<1>(paddle::argsort(scores, /* axis=*/0, /* descending=*/true));
+ auto boxes_sorted = paddle::gather(boxes, order_t, /* axis=*/0);
+
+ const auto blocks_per_line = CeilDiv(num_boxes, threadsPerBlock);
+ dim3 block(threadsPerBlock);
+ dim3 grid(blocks_per_line, blocks_per_line);
+ auto mask_dev = paddle::empty({num_boxes * blocks_per_line},
+ paddle::DataType::INT64, paddle::GPUPlace());
+
+ PD_DISPATCH_FLOATING_TYPES(
+ boxes.type(), "nms_rotated_cuda_kernel", ([&] {
+ nms_rotated_cuda_kernel<<>>(
+ boxes_sorted.data(), threshold, num_boxes,
+ mask_dev.data());
+ }));
+
+ auto mask_host = mask_dev.copy_to(paddle::CPUPlace(), true);
+ auto keep_host =
+ paddle::empty({num_boxes}, paddle::DataType::INT64, paddle::CPUPlace());
+ int64_t *keep_host_ptr = keep_host.data();
+ int64_t *mask_host_ptr = mask_host.data();
+ std::vector remv(blocks_per_line);
+ int64_t last_box_num = 0;
+ for (int64_t i = 0; i < num_boxes; ++i) {
+ auto remv_element_id = i / threadsPerBlock;
+ auto remv_bit_id = i % threadsPerBlock;
+ if (!(remv[remv_element_id] & 1ULL << remv_bit_id)) {
+ keep_host_ptr[last_box_num++] = i;
+ int64_t *current_mask = mask_host_ptr + i * blocks_per_line;
+ for (auto j = remv_element_id; j < blocks_per_line; ++j) {
+ remv[j] |= current_mask[j];
+ }
+ }
+ }
+
+ keep_host = keep_host.slice(0, last_box_num);
+ auto keep_dev = keep_host.copy_to(paddle::GPUPlace(), true);
+ return {paddle::gather(order_t, keep_dev, /* axis=*/0)};
+}
\ No newline at end of file
diff --git a/ppdet/ext_op/csrc/rbox_iou/rbox_iou.cc b/ppdet/ext_op/csrc/rbox_iou/rbox_iou.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c8e7528d35857eb39b8be441558876a4130a7ce6
--- /dev/null
+++ b/ppdet/ext_op/csrc/rbox_iou/rbox_iou.cc
@@ -0,0 +1,95 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The code is based on
+// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/
+
+#include "paddle/extension.h"
+#include "rbox_iou_utils.h"
+
+template
+void rbox_iou_cpu_kernel(const int rbox1_num, const int rbox2_num,
+ const T *rbox1_data_ptr, const T *rbox2_data_ptr,
+ T *output_data_ptr) {
+
+ int i, j;
+ for (i = 0; i < rbox1_num; i++) {
+ for (j = 0; j < rbox2_num; j++) {
+ int offset = i * rbox2_num + j;
+ output_data_ptr[offset] =
+ rbox_iou_single(rbox1_data_ptr + i * 5, rbox2_data_ptr + j * 5);
+ }
+ }
+}
+
+#define CHECK_INPUT_CPU(x) \
+ PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.")
+
+std::vector RboxIouCPUForward(const paddle::Tensor &rbox1,
+ const paddle::Tensor &rbox2) {
+ CHECK_INPUT_CPU(rbox1);
+ CHECK_INPUT_CPU(rbox2);
+
+ auto rbox1_num = rbox1.shape()[0];
+ auto rbox2_num = rbox2.shape()[0];
+
+ auto output =
+ paddle::empty({rbox1_num, rbox2_num}, rbox1.dtype(), paddle::CPUPlace());
+
+ PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "rbox_iou_cpu_kernel", ([&] {
+ rbox_iou_cpu_kernel(
+ rbox1_num, rbox2_num, rbox1.data(),
+ rbox2.data(), output.data());
+ }));
+
+ return {output};
+}
+
+#ifdef PADDLE_WITH_CUDA
+std::vector RboxIouCUDAForward(const paddle::Tensor &rbox1,
+ const paddle::Tensor &rbox2);
+#endif
+
+#define CHECK_INPUT_SAME(x1, x2) \
+ PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
+
+std::vector RboxIouForward(const paddle::Tensor &rbox1,
+ const paddle::Tensor &rbox2) {
+ CHECK_INPUT_SAME(rbox1, rbox2);
+ if (rbox1.is_cpu()) {
+ return RboxIouCPUForward(rbox1, rbox2);
+#ifdef PADDLE_WITH_CUDA
+ } else if (rbox1.is_gpu()) {
+ return RboxIouCUDAForward(rbox1, rbox2);
+#endif
+ }
+}
+
+std::vector>
+RboxIouInferShape(std::vector rbox1_shape,
+ std::vector rbox2_shape) {
+ return {{rbox1_shape[0], rbox2_shape[0]}};
+}
+
+std::vector RboxIouInferDtype(paddle::DataType t1,
+ paddle::DataType t2) {
+ return {t1};
+}
+
+PD_BUILD_OP(rbox_iou)
+ .Inputs({"RBox1", "RBox2"})
+ .Outputs({"Output"})
+ .SetKernelFn(PD_KERNEL(RboxIouForward))
+ .SetInferShapeFn(PD_INFER_SHAPE(RboxIouInferShape))
+ .SetInferDtypeFn(PD_INFER_DTYPE(RboxIouInferDtype));
diff --git a/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu b/ppdet/ext_op/csrc/rbox_iou/rbox_iou.cu
similarity index 90%
rename from ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu
rename to ppdet/ext_op/csrc/rbox_iou/rbox_iou.cu
index 16d1d36f1002832d01db826743ce5c57ac557463..baedb6dedba6edbf207f4c68e84ab0b9b03b28ac 100644
--- a/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu
+++ b/ppdet/ext_op/csrc/rbox_iou/rbox_iou.cu
@@ -13,21 +13,15 @@
// limitations under the License.
//
// The code is based on
-// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/
#include "paddle/extension.h"
-#include "rbox_iou_op.h"
+#include "rbox_iou_utils.h"
// 2D block with 32 * 16 = 512 threads per block
const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 16;
-/**
- Computes ceil(a / b)
-*/
-
-static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; }
-
template
__global__ void rbox_iou_cuda_kernel(const int rbox1_num, const int rbox2_num,
const T *rbox1_data_ptr,
@@ -85,7 +79,7 @@ __global__ void rbox_iou_cuda_kernel(const int rbox1_num, const int rbox2_num,
}
#define CHECK_INPUT_GPU(x) \
- PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.")
+ PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.")
std::vector RboxIouCUDAForward(const paddle::Tensor &rbox1,
const paddle::Tensor &rbox2) {
@@ -95,7 +89,8 @@ std::vector RboxIouCUDAForward(const paddle::Tensor &rbox1,
auto rbox1_num = rbox1.shape()[0];
auto rbox2_num = rbox2.shape()[0];
- auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox1_num, rbox2_num});
+ auto output =
+ paddle::empty({rbox1_num, rbox2_num}, rbox1.dtype(), paddle::GPUPlace());
const int blocks_x = CeilDiv(rbox1_num, BLOCK_DIM_X);
const int blocks_y = CeilDiv(rbox2_num, BLOCK_DIM_Y);
@@ -107,7 +102,7 @@ std::vector RboxIouCUDAForward(const paddle::Tensor &rbox1,
rbox1.type(), "rbox_iou_cuda_kernel", ([&] {
rbox_iou_cuda_kernel<<>>(
rbox1_num, rbox2_num, rbox1.data(), rbox2.data(),
- output.mutable_data());
+ output.data());
}));
return {output};
diff --git a/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc b/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc
deleted file mode 100644
index 6031953d20e6302759621ac80b7a3e6ca35928db..0000000000000000000000000000000000000000
--- a/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// The code is based on https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
-
-#include "rbox_iou_op.h"
-#include "paddle/extension.h"
-
-
-template
-void rbox_iou_cpu_kernel(
- const int rbox1_num,
- const int rbox2_num,
- const T* rbox1_data_ptr,
- const T* rbox2_data_ptr,
- T* output_data_ptr) {
-
- int i, j;
- for (i = 0; i < rbox1_num; i++) {
- for (j = 0; j < rbox2_num; j++) {
- int offset = i * rbox2_num + j;
- output_data_ptr[offset] = rbox_iou_single(rbox1_data_ptr + i * 5, rbox2_data_ptr + j * 5);
- }
- }
-}
-
-
-#define CHECK_INPUT_CPU(x) PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
-
-std::vector RboxIouCPUForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) {
- CHECK_INPUT_CPU(rbox1);
- CHECK_INPUT_CPU(rbox2);
-
- auto rbox1_num = rbox1.shape()[0];
- auto rbox2_num = rbox2.shape()[0];
-
- auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox1_num, rbox2_num});
-
- PD_DISPATCH_FLOATING_TYPES(
- rbox1.type(),
- "rbox_iou_cpu_kernel",
- ([&] {
- rbox_iou_cpu_kernel(
- rbox1_num,
- rbox2_num,
- rbox1.data(),
- rbox2.data(),
- output.mutable_data());
- }));
-
- return {output};
-}
-
-
-#ifdef PADDLE_WITH_CUDA
-std::vector RboxIouCUDAForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2);
-#endif
-
-
-#define CHECK_INPUT_SAME(x1, x2) PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
-
-std::vector RboxIouForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) {
- CHECK_INPUT_SAME(rbox1, rbox2);
- if (rbox1.place() == paddle::PlaceType::kCPU) {
- return RboxIouCPUForward(rbox1, rbox2);
-#ifdef PADDLE_WITH_CUDA
- } else if (rbox1.place() == paddle::PlaceType::kGPU) {
- return RboxIouCUDAForward(rbox1, rbox2);
-#endif
- }
-}
-
-std::vector> InferShape(std::vector rbox1_shape, std::vector rbox2_shape) {
- return {{rbox1_shape[0], rbox2_shape[0]}};
-}
-
-std::vector InferDtype(paddle::DataType t1, paddle::DataType t2) {
- return {t1};
-}
-
-PD_BUILD_OP(rbox_iou)
- .Inputs({"RBOX1", "RBOX2"})
- .Outputs({"Output"})
- .SetKernelFn(PD_KERNEL(RboxIouForward))
- .SetInferShapeFn(PD_INFER_SHAPE(InferShape))
- .SetInferDtypeFn(PD_INFER_DTYPE(InferDtype));
diff --git a/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h b/ppdet/ext_op/csrc/rbox_iou/rbox_iou_utils.h
similarity index 97%
rename from ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h
rename to ppdet/ext_op/csrc/rbox_iou/rbox_iou_utils.h
index fce66dea00e829215ffdb3a38f8db6182a068609..6f275dd65a7d83962affc92be35fece8348a6a91 100644
--- a/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h
+++ b/ppdet/ext_op/csrc/rbox_iou/rbox_iou_utils.h
@@ -13,7 +13,7 @@
// limitations under the License.
//
// The code is based on
-// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/
#pragma once
@@ -336,13 +336,21 @@ HOST_DEVICE_INLINE T rbox_iou_single(T const *const box1_raw,
box2.h = box2_raw[3];
box2.a = box2_raw[4];
- const T area1 = box1.w * box1.h;
- const T area2 = box2.w * box2.h;
- if (area1 < 1e-14 || area2 < 1e-14) {
+ if (box1.w < 1e-2 || box1.h < 1e-2 || box2.w < 1e-2 || box2.h < 1e-2) {
return 0.f;
}
+ const T area1 = box1.w * box1.h;
+ const T area2 = box2.w * box2.h;
const T intersection = rboxes_intersection(box1, box2);
const T iou = intersection / (area1 + area2 - intersection);
return iou;
}
+
+/**
+ Computes ceil(a / b)
+*/
+
+HOST_DEVICE inline int CeilDiv(const int a, const int b) {
+ return (a + b - 1) / b;
+}
\ No newline at end of file
diff --git a/ppdet/modeling/assigners/__init__.py b/ppdet/modeling/assigners/__init__.py
index ded98c9439cd896c99ca47bc3119d39effad3870..b344f0417e7efb02fefd16bdd3640e36e91ae93e 100644
--- a/ppdet/modeling/assigners/__init__.py
+++ b/ppdet/modeling/assigners/__init__.py
@@ -18,6 +18,7 @@ from . import atss_assigner
from . import simota_assigner
from . import max_iou_assigner
from . import fcosr_assigner
+from . import rotated_task_aligned_assigner
from .utils import *
from .task_aligned_assigner import *
@@ -25,3 +26,4 @@ from .atss_assigner import *
from .simota_assigner import *
from .max_iou_assigner import *
from .fcosr_assigner import *
+from .rotated_task_aligned_assigner import *
diff --git a/ppdet/modeling/assigners/fcosr_assigner.py b/ppdet/modeling/assigners/fcosr_assigner.py
index 84f991023215b344e59c9f6e1e4f7643b3c00dc0..46b743e601ab592cb275a554d4adb4c5a0e05bba 100644
--- a/ppdet/modeling/assigners/fcosr_assigner.py
+++ b/ppdet/modeling/assigners/fcosr_assigner.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/ppdet/modeling/assigners/rotated_task_aligned_assigner.py b/ppdet/modeling/assigners/rotated_task_aligned_assigner.py
new file mode 100644
index 0000000000000000000000000000000000000000..eeb9a68b6705fd2cb1c2b51b7d1496a943c1cd79
--- /dev/null
+++ b/ppdet/modeling/assigners/rotated_task_aligned_assigner.py
@@ -0,0 +1,164 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from ppdet.core.workspace import register
+from ..rbox_utils import rotated_iou_similarity, check_points_in_rotated_boxes
+from .utils import gather_topk_anchors, compute_max_iou_anchor
+
+__all__ = ['RotatedTaskAlignedAssigner']
+
+
+@register
+class RotatedTaskAlignedAssigner(nn.Layer):
+ """TOOD: Task-aligned One-stage Object Detection
+ """
+
+ def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9):
+ super(RotatedTaskAlignedAssigner, self).__init__()
+ self.topk = topk
+ self.alpha = alpha
+ self.beta = beta
+ self.eps = eps
+
+ @paddle.no_grad()
+ def forward(self,
+ pred_scores,
+ pred_bboxes,
+ anchor_points,
+ num_anchors_list,
+ gt_labels,
+ gt_bboxes,
+ pad_gt_mask,
+ bg_index,
+ gt_scores=None):
+ r"""This code is based on
+ https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
+
+ The assignment is done in following steps
+ 1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
+ 2. select top-k bbox as candidates for each gt
+ 3. limit the positive sample's center in gt (because the anchor-free detector
+ only can predict positive distance)
+ 4. if an anchor box is assigned to multiple gts, the one with the
+ highest iou will be selected.
+ Args:
+ pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
+ pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 5)
+ anchor_points (Tensor, float32): pre-defined anchors, shape(1, L, 2), "cxcy" format
+ num_anchors_list (List): num of anchors in each level, shape(L)
+ gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
+ gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 5)
+ pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
+ bg_index (int): background index
+ gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
+ Returns:
+ assigned_labels (Tensor): (B, L)
+ assigned_bboxes (Tensor): (B, L, 5)
+ assigned_scores (Tensor): (B, L, C)
+ """
+ assert pred_scores.ndim == pred_bboxes.ndim
+ assert gt_labels.ndim == gt_bboxes.ndim and \
+ gt_bboxes.ndim == 3
+
+ batch_size, num_anchors, num_classes = pred_scores.shape
+ _, num_max_boxes, _ = gt_bboxes.shape
+
+ # negative batch
+ if num_max_boxes == 0:
+ assigned_labels = paddle.full(
+ [batch_size, num_anchors], bg_index, dtype=gt_labels.dtype)
+ assigned_bboxes = paddle.zeros([batch_size, num_anchors, 5])
+ assigned_scores = paddle.zeros(
+ [batch_size, num_anchors, num_classes])
+ return assigned_labels, assigned_bboxes, assigned_scores
+
+ # compute iou between gt and pred bbox, [B, n, L]
+ ious = rotated_iou_similarity(gt_bboxes, pred_bboxes)
+ ious = paddle.where(ious > 1 + self.eps, paddle.zeros_like(ious), ious)
+ ious.stop_gradient = True
+ # gather pred bboxes class score
+ pred_scores = pred_scores.transpose([0, 2, 1])
+ batch_ind = paddle.arange(
+ end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
+ gt_labels_ind = paddle.stack(
+ [batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
+ axis=-1)
+ bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
+ # compute alignment metrics, [B, n, L]
+ alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
+ self.beta)
+
+ # check the positive sample's center in gt, [B, n, L]
+ is_in_gts = check_points_in_rotated_boxes(anchor_points, gt_bboxes)
+
+ # select topk largest alignment metrics pred bbox as candidates
+ # for each gt, [B, n, L]
+ is_in_topk = gather_topk_anchors(
+ alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)
+
+ # select positive sample, [B, n, L]
+ mask_positive = is_in_topk * is_in_gts * pad_gt_mask
+
+ # if an anchor box is assigned to multiple gts,
+ # the one with the highest iou will be selected, [B, n, L]
+ mask_positive_sum = mask_positive.sum(axis=-2)
+ if mask_positive_sum.max() > 1:
+ mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
+ [1, num_max_boxes, 1])
+ is_max_iou = compute_max_iou_anchor(ious)
+ mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
+ mask_positive)
+ mask_positive_sum = mask_positive.sum(axis=-2)
+ assigned_gt_index = mask_positive.argmax(axis=-2)
+
+ # assigned target
+ assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
+ assigned_labels = paddle.gather(
+ gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
+ assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
+ assigned_labels = paddle.where(
+ mask_positive_sum > 0, assigned_labels,
+ paddle.full_like(assigned_labels, bg_index))
+
+ assigned_bboxes = paddle.gather(
+ gt_bboxes.reshape([-1, 5]), assigned_gt_index.flatten(), axis=0)
+ assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 5])
+
+ assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
+ ind = list(range(num_classes + 1))
+ ind.remove(bg_index)
+ assigned_scores = paddle.index_select(
+ assigned_scores, paddle.to_tensor(ind), axis=-1)
+ # rescale alignment metrics
+ alignment_metrics *= mask_positive
+ max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
+ max_ious_per_instance = (ious * mask_positive).max(axis=-1,
+ keepdim=True)
+ alignment_metrics = alignment_metrics / (
+ max_metrics_per_instance + self.eps) * max_ious_per_instance
+ alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
+ assigned_scores = assigned_scores * alignment_metrics
+
+ assigned_bboxes.stop_gradient = True
+ assigned_scores.stop_gradient = True
+ assigned_labels.stop_gradient = True
+ return assigned_labels, assigned_bboxes, assigned_scores
diff --git a/ppdet/modeling/heads/__init__.py b/ppdet/modeling/heads/__init__.py
index 85c6b47bf9f22a3e16458f6bb2969d7bfd111354..1e7a6b97cb500114fea12ff6c2d9d4cdacfb4bdf 100644
--- a/ppdet/modeling/heads/__init__.py
+++ b/ppdet/modeling/heads/__init__.py
@@ -34,6 +34,7 @@ from . import tood_head
from . import retina_head
from . import ppyoloe_head
from . import fcosr_head
+from . import ppyoloe_r_head
from . import ld_gfl_head
from .bbox_head import *
@@ -59,3 +60,4 @@ from .retina_head import *
from .ppyoloe_head import *
from .fcosr_head import *
from .ld_gfl_head import *
+from .ppyoloe_r_head import *
diff --git a/ppdet/modeling/heads/fcosr_head.py b/ppdet/modeling/heads/fcosr_head.py
index 06b84440e8eb1f8e252eaf2c723bbc03bb4ced0a..97cd949d70bf67f9344b10d680dc9fd649960912 100644
--- a/ppdet/modeling/heads/fcosr_head.py
+++ b/ppdet/modeling/heads/fcosr_head.py
@@ -205,8 +205,8 @@ class FCOSRHead(nn.Layer):
anchor_points = []
stride_tensor = []
num_anchors_list = []
- for i, stride in enumerate(self.fpn_strides):
- _, _, h, w = feats[i].shape
+ for feat, stride in zip(feats, self.fpn_strides):
+ _, _, h, w = paddle.shape(feat)
shift_x = (paddle.arange(end=w) + 0.5) * stride
shift_y = (paddle.arange(end=h) + 0.5) * stride
shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
diff --git a/ppdet/modeling/heads/ppyoloe_r_head.py b/ppdet/modeling/heads/ppyoloe_r_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..89cb0fa82a07085154665e45680460551e46fb8b
--- /dev/null
+++ b/ppdet/modeling/heads/ppyoloe_r_head.py
@@ -0,0 +1,419 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from ppdet.core.workspace import register
+
+from ..losses import ProbIoULoss
+from ..initializer import bias_init_with_prob, constant_, normal_, vector_
+from ppdet.modeling.backbones.cspresnet import ConvBNLayer
+from ppdet.modeling.ops import get_static_shape, get_act_fn, anchor_generator
+from ppdet.modeling.layers import MultiClassNMS
+
+__all__ = ['PPYOLOERHead']
+
+
+class ESEAttn(nn.Layer):
+ def __init__(self, feat_channels, act='swish'):
+ super(ESEAttn, self).__init__()
+ self.fc = nn.Conv2D(feat_channels, feat_channels, 1)
+ self.conv = ConvBNLayer(feat_channels, feat_channels, 1, act=act)
+
+ self._init_weights()
+
+ def _init_weights(self):
+ normal_(self.fc.weight, std=0.01)
+
+ def forward(self, feat, avg_feat):
+ weight = F.sigmoid(self.fc(avg_feat))
+ return self.conv(feat * weight)
+
+
+@register
+class PPYOLOERHead(nn.Layer):
+ __shared__ = ['num_classes', 'trt']
+ __inject__ = ['static_assigner', 'assigner', 'nms']
+
+ def __init__(self,
+ in_channels=[1024, 512, 256],
+ num_classes=15,
+ act='swish',
+ fpn_strides=(32, 16, 8),
+ grid_cell_offset=0.5,
+ angle_max=90,
+ use_varifocal_loss=True,
+ static_assigner_epoch=4,
+ trt=False,
+ static_assigner='ATSSAssigner',
+ assigner='TaskAlignedAssigner',
+ nms='MultiClassNMS',
+ loss_weight={'class': 1.0,
+ 'iou': 2.5,
+ 'dfl': 0.05}):
+ super(PPYOLOERHead, self).__init__()
+ assert len(in_channels) > 0, "len(in_channels) should > 0"
+ self.in_channels = in_channels
+ self.num_classes = num_classes
+ self.fpn_strides = fpn_strides
+ self.grid_cell_offset = grid_cell_offset
+ self.angle_max = angle_max
+ self.loss_weight = loss_weight
+ self.use_varifocal_loss = use_varifocal_loss
+ self.half_pi = paddle.to_tensor(
+ [1.5707963267948966], dtype=paddle.float32)
+ self.half_pi_bin = self.half_pi / angle_max
+ self.iou_loss = ProbIoULoss()
+ self.static_assigner_epoch = static_assigner_epoch
+ self.static_assigner = static_assigner
+ self.assigner = assigner
+ self.nms = nms
+ # stem
+ self.stem_cls = nn.LayerList()
+ self.stem_reg = nn.LayerList()
+ self.stem_angle = nn.LayerList()
+ act = get_act_fn(
+ act, trt=trt) if act is None or isinstance(act,
+ (str, dict)) else act
+ self.trt = trt
+ for in_c in self.in_channels:
+ self.stem_cls.append(ESEAttn(in_c, act=act))
+ self.stem_reg.append(ESEAttn(in_c, act=act))
+ self.stem_angle.append(ESEAttn(in_c, act=act))
+ # pred head
+ self.pred_cls = nn.LayerList()
+ self.pred_reg = nn.LayerList()
+ self.pred_angle = nn.LayerList()
+ for in_c in self.in_channels:
+ self.pred_cls.append(
+ nn.Conv2D(
+ in_c, self.num_classes, 3, padding=1))
+ self.pred_reg.append(nn.Conv2D(in_c, 4, 3, padding=1))
+ self.pred_angle.append(
+ nn.Conv2D(
+ in_c, self.angle_max + 1, 3, padding=1))
+ self.angle_proj_conv = nn.Conv2D(
+ self.angle_max + 1, 1, 1, bias_attr=False)
+ self._init_weights()
+
+ @classmethod
+ def from_config(cls, cfg, input_shape):
+ return {'in_channels': [i.channels for i in input_shape], }
+
+ def _init_weights(self):
+ bias_cls = bias_init_with_prob(0.01)
+ bias_angle = [10.] + [1.] * self.angle_max
+ for cls_, reg_, angle_ in zip(self.pred_cls, self.pred_reg,
+ self.pred_angle):
+ normal_(cls_.weight, std=0.01)
+ constant_(cls_.bias, bias_cls)
+ normal_(reg_.weight, std=0.01)
+ constant_(reg_.bias)
+ constant_(angle_.weight)
+ vector_(angle_.bias, bias_angle)
+
+ angle_proj = paddle.linspace(0, self.angle_max, self.angle_max + 1)
+ self.angle_proj = angle_proj * self.half_pi_bin
+ self.angle_proj_conv.weight.set_value(
+ self.angle_proj.reshape([1, self.angle_max + 1, 1, 1]))
+ self.angle_proj_conv.weight.stop_gradient = True
+
+ def _generate_anchors(self, feats):
+ if self.trt:
+ anchor_points = []
+ for feat, stride in zip(feats, self.fpn_strides):
+ _, _, h, w = paddle.shape(feat)
+ anchor, _ = anchor_generator(
+ feat,
+ stride * 4,
+ 1.0, [1.0, 1.0, 1.0, 1.0], [stride, stride],
+ offset=0.5)
+ x1, y1, x2, y2 = paddle.split(anchor, 4, axis=-1)
+ xc = (x1 + x2 + 1) / 2
+ yc = (y1 + y2 + 1) / 2
+ anchor_point = paddle.concat(
+ [xc, yc], axis=-1).reshape((1, h * w, 2))
+ anchor_points.append(anchor_point)
+ anchor_points = paddle.concat(anchor_points, axis=1)
+ return anchor_points, None, None
+ else:
+ anchor_points = []
+ stride_tensor = []
+ num_anchors_list = []
+ for feat, stride in zip(feats, self.fpn_strides):
+ _, _, h, w = paddle.shape(feat)
+ shift_x = (paddle.arange(end=w) + 0.5) * stride
+ shift_y = (paddle.arange(end=h) + 0.5) * stride
+ shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
+ anchor_point = paddle.cast(
+ paddle.stack(
+ [shift_x, shift_y], axis=-1), dtype='float32')
+ anchor_points.append(anchor_point.reshape([1, -1, 2]))
+ stride_tensor.append(
+ paddle.full(
+ [1, h * w, 1], stride, dtype='float32'))
+ num_anchors_list.append(h * w)
+ anchor_points = paddle.concat(anchor_points, axis=1)
+ stride_tensor = paddle.concat(stride_tensor, axis=1)
+ return anchor_points, stride_tensor, num_anchors_list
+
+ def forward(self, feats, targets=None):
+ assert len(feats) == len(self.fpn_strides), \
+ "The size of feats is not equal to size of fpn_strides"
+
+ if self.training:
+ return self.forward_train(feats, targets)
+ else:
+ return self.forward_eval(feats)
+
+ def forward_train(self, feats, targets):
+ anchor_points, stride_tensor, num_anchors_list = self._generate_anchors(
+ feats)
+
+ cls_score_list, reg_dist_list, reg_angle_list = [], [], []
+ for i, feat in enumerate(feats):
+ avg_feat = F.adaptive_avg_pool2d(feat, (1, 1))
+ cls_logit = self.pred_cls[i](self.stem_cls[i](feat, avg_feat) +
+ feat)
+ reg_dist = self.pred_reg[i](self.stem_reg[i](feat, avg_feat))
+ reg_angle = self.pred_angle[i](self.stem_angle[i](feat, avg_feat))
+ # cls and reg
+ cls_score = F.sigmoid(cls_logit)
+ cls_score_list.append(cls_score.flatten(2).transpose([0, 2, 1]))
+ reg_dist_list.append(reg_dist.flatten(2).transpose([0, 2, 1]))
+ reg_angle_list.append(reg_angle.flatten(2).transpose([0, 2, 1]))
+ cls_score_list = paddle.concat(cls_score_list, axis=1)
+ reg_dist_list = paddle.concat(reg_dist_list, axis=1)
+ reg_angle_list = paddle.concat(reg_angle_list, axis=1)
+
+ return self.get_loss([
+ cls_score_list, reg_dist_list, reg_angle_list, anchor_points,
+ num_anchors_list, stride_tensor
+ ], targets)
+
+ def forward_eval(self, feats):
+ cls_score_list, reg_box_list = [], []
+ anchor_points, _, _ = self._generate_anchors(feats)
+ for i, (feat, stride) in enumerate(zip(feats, self.fpn_strides)):
+ b, _, h, w = paddle.shape(feat)
+ l = h * w
+ # cls
+ avg_feat = F.adaptive_avg_pool2d(feat, (1, 1))
+ cls_logit = self.pred_cls[i](self.stem_cls[i](feat, avg_feat) +
+ feat)
+ # reg
+ reg_dist = self.pred_reg[i](self.stem_reg[i](feat, avg_feat))
+ reg_xy, reg_wh = paddle.split(reg_dist, 2, axis=1)
+ reg_xy = reg_xy * stride
+ reg_wh = (F.elu(reg_wh) + 1.) * stride
+ reg_angle = self.pred_angle[i](self.stem_angle[i](feat, avg_feat))
+ reg_angle = self.angle_proj_conv(F.softmax(reg_angle, axis=1))
+ reg_box = paddle.concat([reg_xy, reg_wh, reg_angle], axis=1)
+ # cls and reg
+ cls_score = F.sigmoid(cls_logit)
+ cls_score_list.append(cls_score.reshape([b, self.num_classes, l]))
+ reg_box_list.append(reg_box.reshape([b, 5, l]))
+
+ cls_score_list = paddle.concat(cls_score_list, axis=-1)
+ reg_box_list = paddle.concat(reg_box_list, axis=-1).transpose([0, 2, 1])
+ reg_xy, reg_wha = paddle.split(reg_box_list, [2, 3], axis=-1)
+ reg_xy = reg_xy + anchor_points
+ reg_box_list = paddle.concat([reg_xy, reg_wha], axis=-1)
+ return cls_score_list, reg_box_list
+
+ def _bbox_decode(self, points, pred_dist, pred_angle, stride_tensor):
+ # predict vector to x, y, w, h, angle
+ b, l = pred_angle.shape[:2]
+ xy, wh = paddle.split(pred_dist, 2, axis=-1)
+ xy = xy * stride_tensor + points
+ wh = (F.elu(wh) + 1.) * stride_tensor
+ angle = F.softmax(pred_angle.reshape([b, l, 1, self.angle_max + 1
+ ])).matmul(self.angle_proj)
+ return paddle.concat([xy, wh, angle], axis=-1)
+
+ def get_loss(self, head_outs, gt_meta):
+ pred_scores, pred_dist, pred_angle, \
+ anchor_points, num_anchors_list, stride_tensor = head_outs
+ # [B, N, 5] -> [B, N, 5]
+ pred_bboxes = self._bbox_decode(anchor_points, pred_dist, pred_angle,
+ stride_tensor)
+ gt_labels = gt_meta['gt_class']
+ # [B, N, 5]
+ gt_bboxes = gt_meta['gt_rbox']
+ pad_gt_mask = gt_meta['pad_gt_mask']
+ # label assignment
+ if gt_meta['epoch_id'] < self.static_assigner_epoch:
+ assigned_labels, assigned_bboxes, assigned_scores = \
+ self.static_assigner(
+ anchor_points,
+ stride_tensor,
+ num_anchors_list,
+ gt_labels,
+ gt_meta['gt_bbox'],
+ gt_bboxes,
+ pad_gt_mask,
+ self.num_classes,
+ pred_bboxes.detach()
+ )
+ else:
+ assigned_labels, assigned_bboxes, assigned_scores = \
+ self.assigner(
+ pred_scores.detach(),
+ pred_bboxes.detach(),
+ anchor_points,
+ num_anchors_list,
+ gt_labels,
+ gt_bboxes,
+ pad_gt_mask,
+ bg_index=self.num_classes)
+ alpha_l = -1
+ # cls loss
+ if self.use_varifocal_loss:
+ one_hot_label = F.one_hot(assigned_labels,
+ self.num_classes + 1)[..., :-1]
+ loss_cls = self._varifocal_loss(pred_scores, assigned_scores,
+ one_hot_label)
+ else:
+ loss_cls = self._focal_loss(pred_scores, assigned_scores, alpha_l)
+
+ assigned_scores_sum = assigned_scores.sum()
+ if paddle.distributed.get_world_size() > 1:
+ paddle.distributed.all_reduce(assigned_scores_sum)
+ assigned_scores_sum = paddle.clip(
+ assigned_scores_sum / paddle.distributed.get_world_size(),
+ min=1.)
+ else:
+ assigned_scores_sum = paddle.clip(assigned_scores_sum, min=1.)
+ loss_cls /= assigned_scores_sum
+
+ loss_iou, loss_dfl = self._bbox_loss(pred_angle, pred_bboxes,
+ anchor_points, assigned_labels,
+ assigned_bboxes, assigned_scores,
+ assigned_scores_sum, stride_tensor)
+
+ loss = self.loss_weight['class'] * loss_cls + \
+ self.loss_weight['iou'] * loss_iou + \
+ self.loss_weight['dfl'] * loss_dfl
+ out_dict = {
+ 'loss': loss,
+ 'loss_cls': loss_cls,
+ 'loss_iou': loss_iou,
+ 'loss_dfl': loss_dfl
+ }
+ return out_dict
+
+ @staticmethod
+ def _focal_loss(score, label, alpha=0.25, gamma=2.0):
+ weight = (score - label).pow(gamma)
+ if alpha > 0:
+ alpha_t = alpha * label + (1 - alpha) * (1 - label)
+ weight *= alpha_t
+ loss = F.binary_cross_entropy(
+ score, label, weight=weight, reduction='sum')
+ return loss
+
+ @staticmethod
+ def _varifocal_loss(pred_score, gt_score, label, alpha=0.75, gamma=2.0):
+ weight = alpha * pred_score.pow(gamma) * (1 - label) + gt_score * label
+ loss = F.binary_cross_entropy(
+ pred_score, gt_score, weight=weight, reduction='sum')
+ return loss
+
+ @staticmethod
+ def _df_loss(pred_dist, target):
+ target_left = paddle.cast(target, 'int64')
+ target_right = target_left + 1
+ weight_left = target_right.astype('float32') - target
+ weight_right = 1 - weight_left
+ loss_left = F.cross_entropy(
+ pred_dist, target_left, reduction='none') * weight_left
+ loss_right = F.cross_entropy(
+ pred_dist, target_right, reduction='none') * weight_right
+ return (loss_left + loss_right).mean(-1, keepdim=True)
+
+ def _bbox_loss(self, pred_angle, pred_bboxes, anchor_points,
+ assigned_labels, assigned_bboxes, assigned_scores,
+ assigned_scores_sum, stride_tensor):
+ # select positive samples mask
+ mask_positive = (assigned_labels != self.num_classes)
+ num_pos = mask_positive.sum()
+ # pos/neg loss
+ if num_pos > 0:
+ # iou
+ bbox_mask = mask_positive.unsqueeze(-1).tile([1, 1, 5])
+ pred_bboxes_pos = paddle.masked_select(pred_bboxes,
+ bbox_mask).reshape([-1, 5])
+ assigned_bboxes_pos = paddle.masked_select(
+ assigned_bboxes, bbox_mask).reshape([-1, 5])
+ bbox_weight = paddle.masked_select(
+ assigned_scores.sum(-1), mask_positive).reshape([-1])
+
+ loss_iou = self.iou_loss(pred_bboxes_pos,
+ assigned_bboxes_pos) * bbox_weight
+ loss_iou = loss_iou.sum() / assigned_scores_sum
+
+ # dfl
+ angle_mask = mask_positive.unsqueeze(-1).tile(
+ [1, 1, self.angle_max + 1])
+ pred_angle_pos = paddle.masked_select(
+ pred_angle, angle_mask).reshape([-1, self.angle_max + 1])
+ assigned_angle_pos = (
+ assigned_bboxes_pos[:, 4] /
+ self.half_pi_bin).clip(0, self.angle_max - 0.01)
+ loss_dfl = self._df_loss(pred_angle_pos, assigned_angle_pos)
+ else:
+ loss_iou = pred_bboxes.sum() * 0.
+ loss_dfl = paddle.zeros([1])
+
+ return loss_iou, loss_dfl
+
+ def _box2corners(self, pred_bboxes):
+ """ convert (x, y, w, h, angle) to (x1, y1, x2, y2, x3, y3, x4, y4)
+
+ Args:
+ pred_bboxes (Tensor): [B, N, 5]
+
+ Returns:
+ polys (Tensor): [B, N, 8]
+ """
+ x, y, w, h, angle = paddle.split(pred_bboxes, 5, axis=-1)
+ cos_a_half = paddle.cos(angle) * 0.5
+ sin_a_half = paddle.sin(angle) * 0.5
+ w_x = cos_a_half * w
+ w_y = sin_a_half * w
+ h_x = -sin_a_half * h
+ h_y = cos_a_half * h
+ return paddle.concat(
+ [
+ x + w_x + h_x, y + w_y + h_y, x - w_x + h_x, y - w_y + h_y,
+ x - w_x - h_x, y - w_y - h_y, x + w_x - h_x, y + w_y - h_y
+ ],
+ axis=-1)
+
+ def post_process(self, head_outs, scale_factor):
+ pred_scores, pred_bboxes = head_outs
+ # [B, N, 5] -> [B, N, 8]
+ pred_bboxes = self._box2corners(pred_bboxes)
+ # scale bbox to origin
+ scale_y, scale_x = paddle.split(scale_factor, 2, axis=-1)
+ scale_factor = paddle.concat(
+ [
+ scale_x, scale_y, scale_x, scale_y, scale_x, scale_y, scale_x,
+ scale_y
+ ],
+ axis=-1).reshape([-1, 1, 8])
+ pred_bboxes /= scale_factor
+ bbox_pred, bbox_num, _ = self.nms(pred_bboxes, pred_scores)
+ return bbox_pred, bbox_num
diff --git a/ppdet/modeling/initializer.py b/ppdet/modeling/initializer.py
index b482f133dd9ac1e2568f5c971f004117c56a5368..758eed240eae4497e14b7fe1cb9e10aca702eb53 100644
--- a/ppdet/modeling/initializer.py
+++ b/ppdet/modeling/initializer.py
@@ -118,6 +118,12 @@ def zeros_(tensor):
return _no_grad_fill_(tensor, 0)
+def vector_(tensor, vector):
+ with paddle.no_grad():
+ tensor.set_value(paddle.to_tensor(vector, dtype=tensor.dtype))
+ return tensor
+
+
def _calculate_fan_in_and_fan_out(tensor, reverse=False):
"""
Calculate (fan_in, _fan_out) for tensor
diff --git a/ppdet/modeling/necks/custom_pan.py b/ppdet/modeling/necks/custom_pan.py
index 08de226de5bed74d7acb2ca62b2b74599915482a..bb7123c66ff854eb5712bb1bfee254e8a8677b00 100644
--- a/ppdet/modeling/necks/custom_pan.py
+++ b/ppdet/modeling/necks/custom_pan.py
@@ -61,7 +61,14 @@ class SPP(nn.Layer):
class CSPStage(nn.Layer):
- def __init__(self, block_fn, ch_in, ch_out, n, act='swish', spp=False):
+ def __init__(self,
+ block_fn,
+ ch_in,
+ ch_out,
+ n,
+ act='swish',
+ spp=False,
+ use_alpha=False):
super(CSPStage, self).__init__()
ch_mid = int(ch_out // 2)
@@ -72,7 +79,11 @@ class CSPStage(nn.Layer):
for i in range(n):
self.convs.add_sublayer(
str(i),
- eval(block_fn)(next_ch_in, ch_mid, act=act, shortcut=False))
+ eval(block_fn)(next_ch_in,
+ ch_mid,
+ act=act,
+ shortcut=False,
+ use_alpha=use_alpha))
if i == (n - 1) // 2 and spp:
self.convs.add_sublayer(
'spp', SPP(ch_mid * 4, ch_mid, 1, [5, 9, 13], act=act))
@@ -109,6 +120,7 @@ class CustomCSPPAN(nn.Layer):
data_format='NCHW',
width_mult=1.0,
depth_mult=1.0,
+ use_alpha=False,
trt=False):
super(CustomCSPPAN, self).__init__()
@@ -136,7 +148,8 @@ class CustomCSPPAN(nn.Layer):
ch_out,
block_num,
act=act,
- spp=(spp and i == 0)))
+ spp=(spp and i == 0),
+ use_alpha=use_alpha))
if drop_block:
stage.add_sublayer('drop', DropBlock(block_size, keep_prob))
@@ -181,7 +194,8 @@ class CustomCSPPAN(nn.Layer):
ch_out,
block_num,
act=act,
- spp=False))
+ spp=False,
+ use_alpha=use_alpha))
if drop_block:
stage.add_sublayer('drop', DropBlock(block_size, keep_prob))
diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py
index fb9d98cf0f35458eb2af063487b7664a3fd8c2cc..d9a1192d7fb93ef855d06cf8fbebd688e21a7317 100644
--- a/ppdet/modeling/ops.py
+++ b/ppdet/modeling/ops.py
@@ -26,18 +26,9 @@ from paddle import in_dynamic_mode
from paddle.common_ops_import import Variable, LayerHelper, check_variable_and_dtype, check_type, check_dtype
__all__ = [
- 'prior_box',
- 'generate_proposals',
- 'box_coder',
- 'multiclass_nms',
- 'distribute_fpn_proposals',
- 'matrix_nms',
- 'batch_norm',
- 'mish',
- 'silu',
- 'swish',
- 'identity',
- 'anchor_generator'
+ 'prior_box', 'generate_proposals', 'box_coder', 'multiclass_nms',
+ 'distribute_fpn_proposals', 'matrix_nms', 'batch_norm', 'mish', 'silu',
+ 'swish', 'identity', 'anchor_generator'
]
@@ -118,6 +109,7 @@ def batch_norm(ch,
return norm_layer
+
@paddle.jit.not_to_static
def anchor_generator(input,
anchor_sizes=None,
diff --git a/ppdet/modeling/rbox_utils.py b/ppdet/modeling/rbox_utils.py
index bde5320cb74ed85451b17a84016f314ac07398a7..a5f19a2949d9f46b05ff94e5534807dabc46600d 100644
--- a/ppdet/modeling/rbox_utils.py
+++ b/ppdet/modeling/rbox_utils.py
@@ -239,3 +239,57 @@ def check_points_in_polys(points, polys):
is_in_polys = (ap_dot_ab >= 0) & (ap_dot_ab <= norm_ab) & (
ap_dot_ad >= 0) & (ap_dot_ad <= norm_ad)
return is_in_polys
+
+
+def check_points_in_rotated_boxes(points, boxes):
+ """Check whether point is in rotated boxes
+
+ Args:
+ points (tensor): (1, L, 2) anchor points
+ boxes (tensor): [B, N, 5] gt_bboxes
+ eps (float): default 1e-9
+
+ Returns:
+ is_in_box (tensor): (B, N, L)
+
+ """
+ # [B, N, 5] -> [B, N, 4, 2]
+ corners = box2corners(boxes)
+ # [1, L, 2] -> [1, 1, L, 2]
+ points = points.unsqueeze(0)
+ # [B, N, 4, 2] -> [B, N, 1, 2]
+ a, b, c, d = corners.split(4, axis=2)
+ ab = b - a
+ ad = d - a
+ # [B, N, L, 2]
+ ap = points - a
+ # [B, N, L]
+ norm_ab = paddle.sum(ab * ab, axis=-1)
+ # [B, N, L]
+ norm_ad = paddle.sum(ad * ad, axis=-1)
+ # [B, N, L] dot product
+ ap_dot_ab = paddle.sum(ap * ab, axis=-1)
+ # [B, N, L] dot product
+ ap_dot_ad = paddle.sum(ap * ad, axis=-1)
+ # [B, N, L] = |A|*|B|*cos(theta)
+ is_in_box = (ap_dot_ab >= 0) & (ap_dot_ab <= norm_ab) & (ap_dot_ad >= 0) & (
+ ap_dot_ad <= norm_ad)
+ return is_in_box
+
+
+def rotated_iou_similarity(box1, box2, eps=1e-9, func=''):
+ """Calculate iou of box1 and box2
+
+ Args:
+ box1 (Tensor): box with the shape [N, M1, 5]
+ box2 (Tensor): box with the shape [N, M2, 5]
+
+ Return:
+ iou (Tensor): iou between box1 and box2 with the shape [N, M1, M2]
+ """
+ from ext_op import rbox_iou
+ rotated_ious = []
+ for b1, b2 in zip(box1, box2):
+ rotated_ious.append(rbox_iou(b1, b2))
+
+ return paddle.stack(rotated_ious, axis=0)