diff --git a/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x.yml b/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..558bb689dc1c880827b6e1b9d4c27cb71a606d06 --- /dev/null +++ b/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x.yml @@ -0,0 +1,139 @@ +architecture: FasterRCNN +train_feed: FasterRCNNTrainFeed +eval_feed: FasterRCNNEvalFeed +test_feed: FasterRCNNTestFeed +max_iters: 90000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +log_iter: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar +weights: output/faster_rcnn_dcn_r101_vd_fpn_1x/model_final +metric: COCO +num_classes: 81 + +FasterRCNN: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: BBoxHead + bbox_assigner: BBoxAssigner + +ResNet: + depth: 101 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + norm_type: bn + variant: d + dcn_v2_stages: [3, 4, 5] + +FPN: + max_level: 6 + min_level: 2 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + max_level: 6 + min_level: 2 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_negative_overlap: 0.3 + rpn_positive_overlap: 0.7 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + post_nms_top_n: 2000 + pre_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + post_nms_top_n: 1000 + pre_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + max_level: 5 + min_level: 2 + box_resolution: 7 + sampling_ratio: 2 + +BBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] + bg_thresh_hi: 0.5 + bg_thresh_lo: 0.0 + fg_fraction: 0.25 + fg_thresh: 0.5 + +BBoxHead: + head: TwoFCHead + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +TwoFCHead: + num_chan: 1024 + +LearningRate: + base_lr: 0.02 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [60000, 80000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +FasterRCNNTrainFeed: + # batch size per device + batch_size: 2 + dataset: + dataset_dir: dataset/coco + image_dir: train2017 + annotation: annotations/instances_train2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +FasterRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +FasterRCNNTestFeed: + batch_size: 1 + dataset: + annotation: annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 diff --git a/configs/dcn/faster_rcnn_dcn_r50_fpn_1x.yml b/configs/dcn/faster_rcnn_dcn_r50_fpn_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..51f5a7461a379011df1203985cf065a84eaaccaf --- /dev/null +++ b/configs/dcn/faster_rcnn_dcn_r50_fpn_1x.yml @@ -0,0 +1,138 @@ +architecture: FasterRCNN +train_feed: FasterRCNNTrainFeed +eval_feed: FasterRCNNEvalFeed +test_feed: FasterRCNNTestFeed +max_iters: 90000 +use_gpu: true +snapshot_iter: 10000 +log_smooth_window: 20 +log_iter: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar +metric: COCO +weights: output/faster_rcnn_dcn_r50_fpn_1x/model_final +num_classes: 81 + +FasterRCNN: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: BBoxHead + bbox_assigner: BBoxAssigner + +ResNet: + depth: 50 + norm_type: bn + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + dcn_v2_stages: [3, 4, 5] + +FPN: + min_level: 2 + max_level: 6 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + min_level: 2 + max_level: 6 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_positive_overlap: 0.7 + rpn_negative_overlap: 0.3 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + min_level: 2 + max_level: 5 + box_resolution: 7 + sampling_ratio: 2 + +BBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] + bg_thresh_lo: 0.0 + bg_thresh_hi: 0.5 + fg_fraction: 0.25 + fg_thresh: 0.5 + +BBoxHead: + head: TwoFCHead + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +TwoFCHead: + num_chan: 1024 + +LearningRate: + base_lr: 0.02 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [60000, 80000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +FasterRCNNTrainFeed: + batch_size: 2 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_train2017.json + image_dir: train2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 + +FasterRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + +FasterRCNNTestFeed: + batch_size: 1 + dataset: + annotation: annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 diff --git a/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x.yml b/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x.yml new file mode 100644 index 0000000000000000000000000000000000000000..172b4e781068aacac0cca664786b970904ed26ad --- /dev/null +++ b/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x.yml @@ -0,0 +1,139 @@ +architecture: FasterRCNN +train_feed: FasterRCNNTrainFeed +eval_feed: FasterRCNNEvalFeed +test_feed: FasterRCNNTestFeed +max_iters: 180000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +log_iter: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar +weights: output/faster_rcnn_dcn_r50_vd_fpn_2x/model_final +metric: COCO +num_classes: 81 + +FasterRCNN: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: BBoxHead + bbox_assigner: BBoxAssigner + +ResNet: + depth: 50 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + norm_type: bn + variant: d + dcn_v2_stages: [3, 4, 5] + +FPN: + max_level: 6 + min_level: 2 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + max_level: 6 + min_level: 2 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_negative_overlap: 0.3 + rpn_positive_overlap: 0.7 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + post_nms_top_n: 2000 + pre_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + post_nms_top_n: 1000 + pre_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + max_level: 5 + min_level: 2 + box_resolution: 7 + sampling_ratio: 2 + +BBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] + bg_thresh_hi: 0.5 + bg_thresh_lo: 0.0 + fg_fraction: 0.25 + fg_thresh: 0.5 + +BBoxHead: + head: TwoFCHead + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +TwoFCHead: + num_chan: 1024 + +LearningRate: + base_lr: 0.02 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 160000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +FasterRCNNTrainFeed: + # batch size per device + batch_size: 2 + dataset: + dataset_dir: dataset/coco + image_dir: train2017 + annotation: annotations/instances_train2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +FasterRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +FasterRCNNTestFeed: + batch_size: 1 + dataset: + annotation: annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 diff --git a/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml b/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..3a04619645dea790bea5ba6f3d23accfa9d64460 --- /dev/null +++ b/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml @@ -0,0 +1,143 @@ +architecture: FasterRCNN +train_feed: FasterRCNNTrainFeed +eval_feed: FasterRCNNEvalFeed +test_feed: FasterRCNNTestFeed +max_iters: 180000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +log_iter: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar +weights: output/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x/model_final +metric: COCO +num_classes: 81 + +FasterRCNN: + backbone: ResNeXt + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: BBoxHead + bbox_assigner: BBoxAssigner + +ResNeXt: + depth: 101 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + group_width: 4 + groups: 64 + norm_type: bn + variant: d + dcn_v2_stages: [3, 4, 5] + +FPN: + max_level: 6 + min_level: 2 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + max_level: 6 + min_level: 2 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_negative_overlap: 0.3 + rpn_positive_overlap: 0.7 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + post_nms_top_n: 2000 + pre_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + post_nms_top_n: 1000 + pre_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + max_level: 5 + min_level: 2 + box_resolution: 7 + sampling_ratio: 2 + +BBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] + bg_thresh_hi: 0.5 + bg_thresh_lo: 0.0 + fg_fraction: 0.25 + fg_thresh: 0.5 + +BBoxHead: + head: TwoFCHead + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +TwoFCHead: + num_chan: 1024 + +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 160000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +FasterRCNNTrainFeed: + # batch size per device + batch_size: 1 + dataset: + dataset_dir: dataset/coco + image_dir: train2017 + annotation: annotations/instances_train2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + shuffle: true + +FasterRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +FasterRCNNTestFeed: + batch_size: 1 + dataset: + annotation: annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + shuffle: false diff --git a/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x.yml b/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..dc650f33f5bacb6390abfb32e54892d97c4cca37 --- /dev/null +++ b/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x.yml @@ -0,0 +1,146 @@ +architecture: MaskRCNN +train_feed: MaskRCNNTrainFeed +eval_feed: MaskRCNNEvalFeed +test_feed: MaskRCNNTestFeed +max_iters: 180000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +log_iter: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar +weights: output/mask_rcnn_dcn_r101_vd_fpn_1x/model_final +metric: COCO +num_classes: 81 + +MaskRCNN: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: BBoxHead + bbox_assigner: BBoxAssigner + +ResNet: + depth: 101 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + norm_type: bn + variant: d + dcn_v2_stages: [3, 4, 5] + +FPN: + max_level: 6 + min_level: 2 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + aspect_ratios: [0.5, 1.0, 2.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + max_level: 6 + min_level: 2 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_negative_overlap: 0.3 + rpn_positive_overlap: 0.7 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + max_level: 5 + min_level: 2 + sampling_ratio: 2 + box_resolution: 7 + mask_resolution: 14 + +MaskHead: + dilation: 1 + num_chan_reduced: 256 + num_convs: 4 + resolution: 28 + +BBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] + bg_thresh_hi: 0.5 + bg_thresh_lo: 0.0 + fg_fraction: 0.25 + fg_thresh: 0.5 + +MaskAssigner: + resolution: 28 + +BBoxHead: + head: TwoFCHead + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +TwoFCHead: + num_chan: 1024 + +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 160000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +MaskRCNNTrainFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_train2017.json + image_dir: train2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +MaskRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +MaskRCNNTestFeed: + batch_size: 1 + dataset: + annotation: annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 diff --git a/configs/dcn/mask_rcnn_dcn_r50_fpn_1x.yml b/configs/dcn/mask_rcnn_dcn_r50_fpn_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..28c4b44d93d157e5706a9423f50426d585063cf7 --- /dev/null +++ b/configs/dcn/mask_rcnn_dcn_r50_fpn_1x.yml @@ -0,0 +1,145 @@ +architecture: MaskRCNN +train_feed: MaskRCNNTrainFeed +eval_feed: MaskRCNNEvalFeed +test_feed: MaskRCNNTestFeed +use_gpu: true +max_iters: 180000 +snapshot_iter: 10000 +log_smooth_window: 20 +log_iter: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar +metric: COCO +weights: output/mask_rcnn_dcn_r50_fpn_1x/model_final/ +num_classes: 81 + +MaskRCNN: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: BBoxHead + bbox_assigner: BBoxAssigner + +ResNet: + depth: 50 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + norm_type: bn + dcn_v2_stages: [3, 4, 5] + +FPN: + max_level: 6 + min_level: 2 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + aspect_ratios: [0.5, 1.0, 2.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + max_level: 6 + min_level: 2 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_negative_overlap: 0.3 + rpn_positive_overlap: 0.7 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + max_level: 5 + min_level: 2 + sampling_ratio: 2 + box_resolution: 7 + mask_resolution: 14 + +MaskHead: + dilation: 1 + num_chan_reduced: 256 + num_convs: 4 + resolution: 28 + +BBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] + bg_thresh_hi: 0.5 + bg_thresh_lo: 0.0 + fg_fraction: 0.25 + fg_thresh: 0.5 + +MaskAssigner: + resolution: 28 + +BBoxHead: + head: TwoFCHead + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +TwoFCHead: + num_chan: 1024 + +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 160000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +MaskRCNNTrainFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_train2017.json + image_dir: train2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +MaskRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +MaskRCNNTestFeed: + batch_size: 1 + dataset: + annotation: annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 diff --git a/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x.yml b/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x.yml new file mode 100644 index 0000000000000000000000000000000000000000..04668037d2169288a32ac2853f4b631e97756eeb --- /dev/null +++ b/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x.yml @@ -0,0 +1,147 @@ +architecture: MaskRCNN +train_feed: MaskRCNNTrainFeed +eval_feed: MaskRCNNEvalFeed +test_feed: MaskRCNNTestFeed +use_gpu: true +max_iters: 360000 +snapshot_iter: 10000 +log_smooth_window: 20 +log_iter: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar +metric: COCO +weights: output/mask_rcnn_dcn_r50_vd_fpn_2x/model_final/ +num_classes: 81 + +MaskRCNN: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: BBoxHead + bbox_assigner: BBoxAssigner + +ResNet: + depth: 50 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + norm_type: bn + variant: d + dcn_v2_stages: [3, 4, 5] + +FPN: + max_level: 6 + min_level: 2 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + aspect_ratios: [0.5, 1.0, 2.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + max_level: 6 + min_level: 2 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_negative_overlap: 0.3 + rpn_positive_overlap: 0.7 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + max_level: 5 + min_level: 2 + box_resolution: 7 + sampling_ratio: 2 + mask_resolution: 14 + +MaskHead: + dilation: 1 + num_chan_reduced: 256 + num_convs: 4 + resolution: 28 + +BBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] + bg_thresh_hi: 0.5 + bg_thresh_lo: 0.0 + fg_fraction: 0.25 + fg_thresh: 0.5 + +MaskAssigner: + resolution: 28 + +BBoxHead: + head: TwoFCHead + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +TwoFCHead: + num_chan: 1024 + +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [240000, 320000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +MaskRCNNTrainFeed: + # batch size per device + batch_size: 1 + dataset: + dataset_dir: dataset/coco + image_dir: train2017 + annotation: annotations/instances_train2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +MaskRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +MaskRCNNTestFeed: + batch_size: 1 + dataset: + annotation: annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 diff --git a/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml b/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..740ca65ec6bd6e672683583832450a1e63b75069 --- /dev/null +++ b/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml @@ -0,0 +1,148 @@ +architecture: MaskRCNN +train_feed: MaskRCNNTrainFeed +eval_feed: MaskRCNNEvalFeed +test_feed: MaskRCNNTestFeed +max_iters: 180000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +log_iter: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar +weights: output/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x/model_final +metric: COCO +num_classes: 81 + +MaskRCNN: + backbone: ResNeXt + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: BBoxHead + bbox_assigner: BBoxAssigner + +ResNeXt: + depth: 101 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + group_width: 4 + groups: 64 + norm_type: bn + variant: d + dcn_v2_stages: [3, 4, 5] + +FPN: + max_level: 6 + min_level: 2 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + aspect_ratios: [0.5, 1.0, 2.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + max_level: 6 + min_level: 2 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_negative_overlap: 0.3 + rpn_positive_overlap: 0.7 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + max_level: 5 + min_level: 2 + sampling_ratio: 2 + box_resolution: 7 + mask_resolution: 14 + +MaskHead: + dilation: 1 + num_chan_reduced: 256 + num_convs: 4 + resolution: 28 + +BBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] + bg_thresh_hi: 0.5 + bg_thresh_lo: 0.0 + fg_fraction: 0.25 + fg_thresh: 0.5 + +MaskAssigner: + resolution: 28 + +BBoxHead: + head: TwoFCHead + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +TwoFCHead: + num_chan: 1024 + +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 160000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +MaskRCNNTrainFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_train2017.json + image_dir: train2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +MaskRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +MaskRCNNTestFeed: + batch_size: 1 + dataset: + annotation: annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 diff --git a/docs/MODEL_ZOO.md b/docs/MODEL_ZOO.md index e2e9d7d962bb84408ab3fd1680bf703b735f936b..668860ac4d8189cfcc729ca09ab68289a82e6008 100644 --- a/docs/MODEL_ZOO.md +++ b/docs/MODEL_ZOO.md @@ -24,7 +24,7 @@ The backbone models pretrained on ImageNet are available. All backbone models are pretrained on standard ImageNet-1k dataset and can be downloaded [here](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification#supported-models-and-performances). -- Notes: The ResNet50 model was trained with cosine LR decay schedule and can be downloaded [here](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar). +- **Notes:** The ResNet50 model was trained with cosine LR decay schedule and can be downloaded [here](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar). ## Baselines @@ -58,6 +58,24 @@ The backbone models pretrained on ImageNet are available. All backbone models ar | SENet154-vd-FPN | Faster | 1 | 1.44x | 42.9 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_se154_vd_fpn_s1x.tar) | | SENet154-vd-FPN | Mask | 1 | 1.44x | 44.0 | 38.7 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_se154_vd_fpn_s1x.tar) | +### Deformable ConvNets v2 + +| Backbone | Type | Conv | Image/gpu | Lr schd | Box AP | Mask AP | Download | +| :------------------- | :------------- | :-----: |:--------: | :-----: | :----: | :-----: | :----------------------------------------------------------: | +| ResNet50-FPN | Faster | c3-c5 | 2 | 1x | 41.0 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r50_fpn_1x.tar) | +| ResNet50-vd-FPN | Faster | c3-c5 | 2 | 2x | 42.4 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r50_vd_fpn_2x.tar) | +| ResNet101-vd-FPN | Faster | c3-c5 | 2 | 1x | 44.1 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r101_vd_fpn_1x.tar) | +| ResNeXt101-vd-FPN | Faster | c3-c5 | 1 | 1x | 45.2 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | +| ResNet50-FPN | Mask | c3-c5 | 1 | 1x | 41.9 | 37.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r50_fpn_1x.tar) | +| ResNet50-vd-FPN | Mask | c3-c5 | 1 | 2x | 42.9 | 38.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r50_vd_fpn_2x.tar) | +| ResNet101-vd-FPN | Mask | c3-c5 | 1 | 1x | 44.6 | 39.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r101_vd_fpn_1x.tar) | +| ResNeXt101-vd-FPN | Mask | c3-c5 | 1 | 1x | 46.2 | 40.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | + +#### Notes: +- Deformable ConvNets v2(dcn_v2) reference from [Deformable ConvNets v2](https://arxiv.org/abs/1811.11168). +- `c3-c5` means adding `dcn` in resnet stage 3 to 5. +- Detailed configuration file in [configs/dcn](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/PaddleDetection/configs/dcn) + ### Yolo v3 | Backbone | Size | Image/gpu | Lr schd | Box AP | Download | @@ -86,7 +104,7 @@ The backbone models pretrained on ImageNet are available. All backbone models ar | ResNet34 | 416 | 8 | 270e | 81.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34_voc.tar) | | ResNet34 | 320 | 8 | 270e | 80.1 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34_voc.tar) | -**NOTE**: Yolo v3 is trained in 8 GPU with total batch size as 64 and trained 270 epoches. Yolo v3 training data augmentations: mixup, +**Notes:** Yolo v3 is trained in 8 GPU with total batch size as 64 and trained 270 epoches. Yolo v3 training data augmentations: mixup, randomly color distortion, randomly cropping, randomly expansion, randomly interpolation method, randomly flippling. Yolo v3 used randomly reshaped minibatch in training, inferences can be performed on different image sizes with the same model weights, and we provided evaluation results of image size 608/416/320 above. @@ -106,5 +124,5 @@ results of image size 608/416/320 above. | :----------- | :--: | :-----: | :-----: | :----: | :-------: | | MobileNet v1 | 300 | 32 | 120e | 73.13 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar) | -**NOTE**: SSD is trained in 2 GPU with totoal batch size as 64 and trained 120 epoches. SSD training data augmentations: randomly color distortion, +**Notes:** SSD is trained in 2 GPU with totoal batch size as 64 and trained 120 epoches. SSD training data augmentations: randomly color distortion, randomly cropping, randomly expansion, randomly flipping. diff --git a/docs/MODEL_ZOO_cn.md b/docs/MODEL_ZOO_cn.md index 1b0dc31baf6a0b05ffb7c9cac44db276f1d2d8d3..5a98fb1ec48e1d2f8c8a50b4fb0211671dd6f164 100644 --- a/docs/MODEL_ZOO_cn.md +++ b/docs/MODEL_ZOO_cn.md @@ -56,6 +56,24 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型 | SENet154-vd-FPN | Faster | 1 | 1.44x | 42.9 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_se154_vd_fpn_s1x.tar) | | SENet154-vd-FPN | Mask | 1 | 1.44x | 44.0 | 38.7 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_se154_vd_fpn_s1x.tar) | +### Deformable 卷积网络v2 + +| 骨架网络 | 网络类型 | 卷积 | 每张GPU图片个数 | 学习率策略 | Box AP | Mask AP | 下载 | +| :------------------- | :------------- | :-----: |:--------: | :-----: | :----: | :-----: | :----------------------------------------------------------: | +| ResNet50-FPN | Faster | c3-c5 | 2 | 1x | 41.0 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r50_fpn_1x.tar) | +| ResNet50-vd-FPN | Faster | c3-c5 | 2 | 2x | 42.4 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r50_vd_fpn_2x.tar) | +| ResNet101-vd-FPN | Faster | c3-c5 | 2 | 1x | 44.1 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r101_vd_fpn_1x.tar) | +| ResNeXt101-vd-FPN | Faster | c3-c5 | 1 | 1x | 45.2 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | +| ResNet50-FPN | Mask | c3-c5 | 1 | 1x | 41.9 | 37.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r50_fpn_1x.tar) | +| ResNet50-vd-FPN | Mask | c3-c5 | 1 | 2x | 42.9 | 38.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r50_vd_fpn_2x.tar) | +| ResNet101-vd-FPN | Mask | c3-c5 | 1 | 1x | 44.6 | 39.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r101_vd_fpn_1x.tar) | +| ResNeXt101-vd-FPN | Mask | c3-c5 | 1 | 1x | 46.2 | 40.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | + +#### 注意事项: +- Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168). +- `c3-c5`意思是在resnet模块的3到5阶段增加`dcn`. +- 详细的配置文件在[configs/dcn](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/PaddleDetection/configs/dcn) + ### Yolo v3 | 骨架网络 | 输入尺寸 | 每张GPU图片个数 | 学习率策略 | Box AP | 下载 | diff --git a/ppdet/modeling/backbones/resnet.py b/ppdet/modeling/backbones/resnet.py index aaeb71e8068c5cbb1b1b4282d4e171259533eb0d..7685e4c90d989b1bcf42611803214beb5b2f2763 100644 --- a/ppdet/modeling/backbones/resnet.py +++ b/ppdet/modeling/backbones/resnet.py @@ -22,6 +22,7 @@ from paddle import fluid from paddle.fluid.param_attr import ParamAttr from paddle.fluid.framework import Variable from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import Constant from ppdet.core.workspace import register, serializable from numbers import Integral @@ -44,6 +45,7 @@ class ResNet(object): norm_decay (float): weight decay for normalization layer weights variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently feature_maps (list): index of stages whose feature maps are returned + dcn_v2_stages (list): index of stages who select deformable conv v2 """ def __init__(self, @@ -53,7 +55,8 @@ class ResNet(object): freeze_norm=True, norm_decay=0., variant='b', - feature_maps=[2, 3, 4, 5]): + feature_maps=[2, 3, 4, 5], + dcn_v2_stages=[]): super(ResNet, self).__init__() if isinstance(feature_maps, Integral): @@ -74,6 +77,7 @@ class ResNet(object): self.variant = variant self._model_type = 'ResNet' self.feature_maps = feature_maps + self.dcn_v2_stages = dcn_v2_stages self.depth_cfg = { 18: ([2, 2, 2, 2], self.basicblock), 34: ([3, 4, 6, 3], self.basicblock), @@ -85,6 +89,19 @@ class ResNet(object): self._c1_out_chan_num = 64 self.na = NameAdapter(self) + def _conv_offset(self, input, filter_size, stride, padding, act=None, name=None): + out_channel = filter_size * filter_size * 3 + out = fluid.layers.conv2d(input, + num_filters=out_channel, + filter_size=filter_size, + stride=stride, + padding=padding, + param_attr=ParamAttr(initializer=Constant(0.0)), + bias_attr=ParamAttr(initializer=Constant(0.0)), + act=act, + name=name) + return out + def _conv_norm(self, input, num_filters, @@ -92,18 +109,50 @@ class ResNet(object): stride=1, groups=1, act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - name=name + '.conv2d.output.1') + name=None, + dcn_v2=False): + if not dcn_v2: + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.conv2d.output.1') + else: + # select deformable conv" + offset_mask = self._conv_offset( + input=input, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + act=None, + name=name + "_conv_offset") + offset_channel = filter_size ** 2 * 2 + mask_channel = filter_size ** 2 + offset, mask = fluid.layers.split( + input=offset_mask, + num_or_sections=[offset_channel, mask_channel], + dim=1) + mask = fluid.layers.sigmoid(mask) + conv = fluid.layers.deformable_conv( + input=input, + offset=offset, + mask=mask, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + deformable_groups=1, + im2col_step=1, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + ".conv2d.output.1") bn_name = self.na.fix_conv_norm_name(name) @@ -168,7 +217,7 @@ class ResNet(object): else: return input - def bottleneck(self, input, num_filters, stride, is_first, name): + def bottleneck(self, input, num_filters, stride, is_first, name, dcn_v2=False): if self.variant == 'a': stride1, stride2 = stride, 1 else: @@ -192,7 +241,7 @@ class ResNet(object): [num_filters * expand, 1, 1, None, 1, conv_name3]] residual = input - for (c, k, s, act, g, _name) in conv_def: + for i, (c, k, s, act, g, _name) in enumerate(conv_def): residual = self._conv_norm( input=residual, num_filters=c, @@ -200,7 +249,8 @@ class ResNet(object): stride=s, act=act, groups=g, - name=_name) + name=_name, + dcn_v2=(i==1 and dcn_v2)) short = self._shortcut( input, num_filters * expand, @@ -214,7 +264,8 @@ class ResNet(object): return fluid.layers.elementwise_add( x=short, y=residual, act='relu', name=name + ".add.output.5") - def basicblock(self, input, num_filters, stride, is_first, name): + def basicblock(self, input, num_filters, stride, is_first, name, dcn_v2=False): + assert dcn_v2 is False, "Not implemented yet." conv0 = self._conv_norm( input=input, num_filters=num_filters, @@ -248,6 +299,7 @@ class ResNet(object): ch_out = self.stage_filters[stage_num - 2] is_first = False if stage_num != 2 else True + dcn_v2 = True if stage_num in self.dcn_v2_stages else False # Make the layer name and parameter name consistent # with ImageNet pre-trained model conv = input @@ -260,7 +312,8 @@ class ResNet(object): num_filters=ch_out, stride=2 if i == 0 and stage_num != 2 else 1, is_first=is_first, - name=conv_name) + name=conv_name, + dcn_v2=dcn_v2) return conv def c1_stage(self, input): diff --git a/ppdet/modeling/backbones/resnext.py b/ppdet/modeling/backbones/resnext.py index 54c93078d0a1d585bb4ba5def255858551cd385f..d9d49e76152b59d2b39098f03232a15cf7719b0c 100644 --- a/ppdet/modeling/backbones/resnext.py +++ b/ppdet/modeling/backbones/resnext.py @@ -37,6 +37,7 @@ class ResNeXt(ResNet): norm_decay (float): weight decay for normalization layer weights variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently feature_maps (list): index of the stages whose feature maps are returned + dcn_v2_stages (list): index of stages who select deformable conv v2 """ def __init__(self, @@ -48,7 +49,8 @@ class ResNeXt(ResNet): freeze_norm=True, norm_decay=True, variant='a', - feature_maps=[2, 3, 4, 5]): + feature_maps=[2, 3, 4, 5], + dcn_v2_stages=[]): assert depth in [50, 101, 152], "depth {} should be 50, 101 or 152" super(ResNeXt, self).__init__(depth, freeze_at, norm_type, freeze_norm, norm_decay, variant, feature_maps) @@ -61,6 +63,7 @@ class ResNeXt(ResNet): self.groups = groups self.group_width = group_width self._model_type = 'ResNeXt' + self.dcn_v2_stages = dcn_v2_stages @register diff --git a/ppdet/modeling/backbones/senet.py b/ppdet/modeling/backbones/senet.py index ea7755bb1c6c91fd900cd53fc1c09a26053c2a61..868f9ff886dbb8c150e2997b80450f4935478bc3 100644 --- a/ppdet/modeling/backbones/senet.py +++ b/ppdet/modeling/backbones/senet.py @@ -42,6 +42,7 @@ class SENet(ResNeXt): norm_decay (float): weight decay for normalization layer weights variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently feature_maps (list): index of the stages whose feature maps are returned + dcn_v2_stages (list): index of stages who select deformable conv v2 """ def __init__(self, @@ -53,7 +54,8 @@ class SENet(ResNeXt): freeze_norm=True, norm_decay=0., variant='d', - feature_maps=[2, 3, 4, 5]): + feature_maps=[2, 3, 4, 5], + dcn_v2_stages=[]): super(SENet, self).__init__(depth, groups, group_width, freeze_at, norm_type, freeze_norm, norm_decay, variant, feature_maps) @@ -64,6 +66,7 @@ class SENet(ResNeXt): self.reduction_ratio = 16 self._c1_out_chan_num = 128 self._model_type = 'SEResNeXt' + self.dcn_v2_stages = dcn_v2_stages def _squeeze_excitation(self, input, num_channels, name=None): pool = fluid.layers.pool2d(