diff --git a/configs/sparse_rcnn/README.md b/configs/sparse_rcnn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5443b037f247938fc9a72194fff62c9a27cedc50 --- /dev/null +++ b/configs/sparse_rcnn/README.md @@ -0,0 +1,25 @@ +# Sparse R-CNN: End-to-End Object Detection with Learnable Proposals + + +## Introduction +Sparse RCNN is a purely sparse method for object detection in images. + + +## Model Zoo + +| Backbone | Proposals | lr schedule | Box AP | download | config | +| :-------------- | :-----: | :------------: | :-----: | :-----: | :-----: | +| ResNet50-FPN | 100 | 3x | 43.0 | [download](https://paddledet.bj.bcebos.com/models/sparse_rcnn_r50_fpn_3x_pro100_coco.pdparams) | [config](./sparse_rcnn_r50_fpn_3x_pro100_coco.yml) | +| ResNet50-FPN | 300 | 3x | 44.6 | [download](https://paddledet.bj.bcebos.com/models/sparse_rcnn_r50_fpn_3x_pro300_coco.pdparams) | [config](./sparse_rcnn_r50_fpn_3x_pro300_coco.yml) | + +## Citations +``` +@misc{sun2021sparse, + title={Sparse R-CNN: End-to-End Object Detection with Learnable Proposals}, + author={Peize Sun and Rufeng Zhang and Yi Jiang and Tao Kong and Chenfeng Xu and Wei Zhan and Masayoshi Tomizuka and Lei Li and Zehuan Yuan and Changhu Wang and Ping Luo}, + year={2021}, + eprint={2011.12450}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` diff --git a/configs/sparse_rcnn/_base_/optimizer_3x.yml b/configs/sparse_rcnn/_base_/optimizer_3x.yml new file mode 100644 index 0000000000000000000000000000000000000000..19e1037130158909632a4d6515f6adf53cf5ad3c --- /dev/null +++ b/configs/sparse_rcnn/_base_/optimizer_3x.yml @@ -0,0 +1,17 @@ +epoch: 36 + +LearningRate: + base_lr: 0.000025 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [28, 34] + - !LinearWarmup + start_factor: 0.01 + steps: 1000 + +OptimizerBuilder: + clip_grad_by_norm: 1.0 + optimizer: + type: AdamW + weight_decay: 0.0001 diff --git a/configs/sparse_rcnn/_base_/sparse_rcnn_r50_fpn.yml b/configs/sparse_rcnn/_base_/sparse_rcnn_r50_fpn.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f7516fcd8652c866ad660f2f0afc9e36f1a6033 --- /dev/null +++ b/configs/sparse_rcnn/_base_/sparse_rcnn_r50_fpn.yml @@ -0,0 +1,44 @@ +architecture: SparseRCNN +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams + +SparseRCNN: + backbone: ResNet + neck: FPN + head: SparseRCNNHead + postprocess: SparsePostProcess + +ResNet: + # index 0 stands for res2 + depth: 50 + norm_type: bn + freeze_at: 0 + return_idx: [0,1,2,3] + num_stages: 4 + +FPN: + out_channel: 256 + +SparseRCNNHead: + head_hidden_dim: 256 + head_dim_feedforward: 2048 + nhead: 8 + head_dropout: 0.0 + head_cls: 1 + head_reg: 3 + head_dim_dynamic: 64 + head_num_dynamic: 2 + head_num_heads: 6 + deep_supervision: true + num_proposals: 100 + loss_func: SparseRCNNLoss + +SparseRCNNLoss: + losses: ["labels", "boxes"] + focal_loss_alpha: 0.25 + focal_loss_gamma: 2.0 + class_weight: 2.0 + l1_weight: 5.0 + giou_weight: 2.0 + +SparsePostProcess: + num_proposals: 100 diff --git a/configs/sparse_rcnn/_base_/sparse_rcnn_reader.yml b/configs/sparse_rcnn/_base_/sparse_rcnn_reader.yml new file mode 100644 index 0000000000000000000000000000000000000000..f4226473d78168c407337ee156b25ab7b7462b67 --- /dev/null +++ b/configs/sparse_rcnn/_base_/sparse_rcnn_reader.yml @@ -0,0 +1,44 @@ +worker_num: 4 +use_process: true + +TrainReader: + sample_transforms: + - Decode: {} + - RandomResize: {target_size: [[480, 1333], [512, 1333], [544, 1333], [576, 1333], [608, 1333], [640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: true, interp: 1} + - RandomFlip: {prob: 0.5} + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - Permute: {} + batch_transforms: + - PadBatch: {pad_to_stride: 32} + - Gt2SparseRCNNTarget: {} + batch_size: 4 + shuffle: true + drop_last: true + collate_batch: false + use_process: true + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True} + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - Permute: {} + batch_transforms: + - PadBatch: {pad_to_stride: 32} + - Gt2SparseRCNNTarget: {} + batch_size: 1 + shuffle: false + drop_last: false + use_process: true + +TestReader: + sample_transforms: + - Decode: {} + - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True} + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - Permute: {} + batch_transforms: + - PadBatch: {pad_to_stride: 32} + - Gt2SparseRCNNTarget: {train: false} + batch_size: 1 + shuffle: false diff --git a/configs/sparse_rcnn/sparse_rcnn_r50_fpn_3x_pro100_coco.yml b/configs/sparse_rcnn/sparse_rcnn_r50_fpn_3x_pro100_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..495d6af390c5c71587c76d2a0e44d1d296ee919e --- /dev/null +++ b/configs/sparse_rcnn/sparse_rcnn_r50_fpn_3x_pro100_coco.yml @@ -0,0 +1,10 @@ +_BASE_: [ + '../datasets/coco_detection.yml', + '../runtime.yml', + '_base_/sparse_rcnn_r50_fpn.yml', + '_base_/optimizer_3x.yml', + '_base_/sparse_rcnn_reader.yml', +] + +num_classes: 80 +weights: output/sparse_rcnn_r50b_fpn_pro100/model_final diff --git a/configs/sparse_rcnn/sparse_rcnn_r50_fpn_3x_pro300_coco.yml b/configs/sparse_rcnn/sparse_rcnn_r50_fpn_3x_pro300_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..9268c5ffde14cbbbd5b6336f487e0be3f86bb910 --- /dev/null +++ b/configs/sparse_rcnn/sparse_rcnn_r50_fpn_3x_pro300_coco.yml @@ -0,0 +1,19 @@ +_BASE_: [ + '../datasets/coco_detection.yml', + '../runtime.yml', + '_base_/sparse_rcnn_r50_fpn.yml', + '_base_/optimizer_3x.yml', + '_base_/sparse_rcnn_reader.yml', +] + +num_classes: 80 +weights: output/sparse_rcnn_r50b_fpn_pro300/model_final + +snapshot_epoch: 1 + + +SparseRCNNHead: + num_proposals: 300 + +SparsePostProcess: + num_proposals: 300