diff --git a/configs/deformable_detr/README.md b/configs/deformable_detr/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6b24c4ffab67073df5058c1d76f82bbdf835d1b0 --- /dev/null +++ b/configs/deformable_detr/README.md @@ -0,0 +1,30 @@ +# Deformable DETR + +## Introduction + + +Deformable DETR is an object detection model based on DETR. We reproduced the model of the paper. + + +## Model Zoo + +| Backbone | Model | Images/GPU | Inf time (fps) | Box AP | Config | Download | +|:------:|:--------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | Deformable DETR | 2 | --- | 44.1 | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/deformable_detr/deformable_detr_r50_1x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/deformable_detr_r50_1x_coco.pdparams) | + +**Notes:** + +- Deformable DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`. +- Deformable DETR uses 8GPU to train 50 epochs. + +## Citations +``` +@inproceedings{ +zhu2021deformable, +title={Deformable DETR: Deformable Transformers for End-to-End Object Detection}, +author={Xizhou Zhu and Weijie Su and Lewei Lu and Bin Li and Xiaogang Wang and Jifeng Dai}, +booktitle={International Conference on Learning Representations}, +year={2021}, +url={https://openreview.net/forum?id=gZ9hCDWe6ke} +} +``` diff --git a/configs/deformable_detr/_base_/deformable_detr_r50.yml b/configs/deformable_detr/_base_/deformable_detr_r50.yml new file mode 100644 index 0000000000000000000000000000000000000000..641129a6e519dd234d1a418d702f31bd97e6365a --- /dev/null +++ b/configs/deformable_detr/_base_/deformable_detr_r50.yml @@ -0,0 +1,48 @@ +architecture: DETR +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams +hidden_dim: 256 +use_focal_loss: True + + +DETR: + backbone: ResNet + transformer: DeformableTransformer + detr_head: DeformableDETRHead + post_process: DETRBBoxPostProcess + + +ResNet: + # index 0 stands for res2 + depth: 50 + norm_type: bn + freeze_at: 0 + return_idx: [1, 2, 3] + lr_mult_list: [0.0, 0.1, 0.1, 0.1] + num_stages: 4 + + +DeformableTransformer: + num_queries: 300 + position_embed_type: sine + nhead: 8 + num_encoder_layers: 6 + num_decoder_layers: 6 + dim_feedforward: 1024 + dropout: 0.1 + activation: relu + num_feature_levels: 4 + num_encoder_points: 4 + num_decoder_points: 4 + + +DeformableDETRHead: + num_mlp_layers: 3 + + +DETRLoss: + loss_coeff: {class: 2, bbox: 5, giou: 2, mask: 1, dice: 1} + aux_loss: True + + +HungarianMatcher: + matcher_coeff: {class: 2, bbox: 5, giou: 2} diff --git a/configs/deformable_detr/_base_/deformable_detr_reader.yml b/configs/deformable_detr/_base_/deformable_detr_reader.yml new file mode 100644 index 0000000000000000000000000000000000000000..5a7199296ae2f148c718e222002674093a29511f --- /dev/null +++ b/configs/deformable_detr/_base_/deformable_detr_reader.yml @@ -0,0 +1,49 @@ +worker_num: 0 +TrainReader: + sample_transforms: + - Decode: {} + - RandomFlip: {prob: 0.5} + - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ], + transforms2: [ + RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] }, + RandomSizeCrop: { min_size: 384, max_size: 600 }, + RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ] + } + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - NormalizeBox: {} + - BboxXYXY2XYWH: {} + - Permute: {} + batch_transforms: + - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true} + batch_size: 2 + shuffle: true + drop_last: true + collate_batch: false + use_shared_memory: false + + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [800, 1333], keep_ratio: True} + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - Permute: {} + batch_transforms: + - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true} + batch_size: 1 + shuffle: false + drop_last: false + drop_empty: false + + +TestReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [800, 1333], keep_ratio: True} + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - Permute: {} + batch_transforms: + - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true} + batch_size: 1 + shuffle: false + drop_last: false diff --git a/configs/deformable_detr/_base_/deformable_optimizer_1x.yml b/configs/deformable_detr/_base_/deformable_optimizer_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..c068f4de493fabb52fac94d3d55c8b2b04efd850 --- /dev/null +++ b/configs/deformable_detr/_base_/deformable_optimizer_1x.yml @@ -0,0 +1,16 @@ +epoch: 50 + +LearningRate: + base_lr: 0.0002 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [40] + use_warmup: false + +OptimizerBuilder: + clip_grad_by_norm: 0.1 + regularizer: false + optimizer: + type: AdamW + weight_decay: 0.0001 diff --git a/configs/deformable_detr/deformable_detr_r50_1x_coco.yml b/configs/deformable_detr/deformable_detr_r50_1x_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..f924c4e2e23b4a5c8b6eaad20ad340b761a476af --- /dev/null +++ b/configs/deformable_detr/deformable_detr_r50_1x_coco.yml @@ -0,0 +1,8 @@ +_BASE_: [ + '../datasets/coco_detection.yml', + '../runtime.yml', + '_base_/deformable_optimizer_1x.yml', + '_base_/deformable_detr_r50.yml', + '_base_/deformable_detr_reader.yml', +] +weights: output/deformable_detr_r50_1x_coco/model_final diff --git a/configs/detr/README.md b/configs/detr/README.md new file mode 100644 index 0000000000000000000000000000000000000000..26f83a3fd2f3dc20bd32310c9cb441da4ee278ff --- /dev/null +++ b/configs/detr/README.md @@ -0,0 +1,33 @@ +# DETR + +## Introduction + + +DETR is an object detection model based on transformer. We reproduced the model of the paper. + + +## Model Zoo + +| Backbone | Model | Images/GPU | Inf time (fps) | Box AP | Config | Download | +|:------:|:--------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | DETR | 4 | --- | 42.3 | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/detr/detr_r50_1x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/detr_r50_1x_coco.pdparams) | + +**Notes:** + +- DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`. +- DETR uses 8GPU to train 500 epochs. + +## Citations +``` +@inproceedings{detr, + author = {Nicolas Carion and + Francisco Massa and + Gabriel Synnaeve and + Nicolas Usunier and + Alexander Kirillov and + Sergey Zagoruyko}, + title = {End-to-End Object Detection with Transformers}, + booktitle = {ECCV}, + year = {2020} +} +``` diff --git a/configs/detr/_base_/detr_r50.yml b/configs/detr/_base_/detr_r50.yml index d234fadd63e58ae208db7e569d35c44d00185f64..5006f11937c9a7c2566913a08144fbb6ee3d0efa 100644 --- a/configs/detr/_base_/detr_r50.yml +++ b/configs/detr/_base_/detr_r50.yml @@ -1,5 +1,5 @@ architecture: DETR -pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams hidden_dim: 256