diff --git a/dygraph/configs/faster_rcnn/_base_/faster_fpn_reader.yml b/dygraph/configs/faster_rcnn/_base_/faster_fpn_reader.yml index 8a8fb1ebb8f6d20c8e8fec681f7a7b0ce0d45069..5e380eb760a2324604dba563f56f2004f872189a 100644 --- a/dygraph/configs/faster_rcnn/_base_/faster_fpn_reader.yml +++ b/dygraph/configs/faster_rcnn/_base_/faster_fpn_reader.yml @@ -1,13 +1,13 @@ worker_num: 2 TrainReader: sample_transforms: - - DecodeOp: { } - - RandomFlipImage: {prob: 0.5} - - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} - - Permute: {to_bgr: false, channel_first: true} + - DecodeOp: {} + - RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} + - RandomFlipOp: {prob: 0.5} + - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - PermuteOp: {} batch_transforms: - - PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: true} + - PadBatchOp: {pad_to_stride: 32, pad_gt: true} batch_size: 1 shuffle: true drop_last: true @@ -15,12 +15,12 @@ TrainReader: EvalReader: sample_transforms: - - DecodeOp: { } - - NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } - - ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True } - - PermuteOp: { } + - DecodeOp: {} + - ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True} + - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - PermuteOp: {} batch_transforms: - - PadBatchOp: { pad_to_stride: 32, pad_gt: false } + - PadBatchOp: {pad_to_stride: 32, pad_gt: false} batch_size: 1 shuffle: false drop_last: false @@ -29,12 +29,12 @@ EvalReader: TestReader: sample_transforms: - - DecodeOp: { } - - NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } - - ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True } - - PermuteOp: { } + - DecodeOp: {} + - ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True} + - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - PermuteOp: {} batch_transforms: - - PadBatchOp: { pad_to_stride: 32, pad_gt: false } + - PadBatchOp: {pad_to_stride: 32, pad_gt: false} batch_size: 1 shuffle: false drop_last: false diff --git a/dygraph/configs/faster_rcnn/_base_/faster_rcnn_r50.yml b/dygraph/configs/faster_rcnn/_base_/faster_rcnn_r50.yml index be0192c62afb48361f04b86ee98a722d6d7c94a2..5ee7bffb4fda5925ca31a7b869e117f7d08cf826 100644 --- a/dygraph/configs/faster_rcnn/_base_/faster_rcnn_r50.yml +++ b/dygraph/configs/faster_rcnn/_base_/faster_rcnn_r50.yml @@ -2,12 +2,7 @@ architecture: FasterRCNN pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar load_static_weights: True -# Model Achitecture FasterRCNN: - # model anchor info flow - anchor: Anchor - proposal: Proposal - # model feat info flow backbone: ResNet rpn_head: RPNHead bbox_head: BBoxHead @@ -24,70 +19,50 @@ ResNet: num_stages: 3 RPNHead: - rpn_feat: - name: RPNFeat - feat_in: 1024 - feat_out: 1024 - anchor_per_position: 15 - rpn_channel: 1024 - -Anchor: anchor_generator: - name: AnchorGeneratorRPN - anchor_sizes: [32, 64, 128, 256, 512] aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_target_generator: - name: AnchorTargetGeneratorRPN + anchor_sizes: [32, 64, 128, 256, 512] + strides: [16] + rpn_target_assign: batch_size_per_im: 256 fg_fraction: 0.5 negative_overlap: 0.3 positive_overlap: 0.7 - straddle_thresh: 0.0 - -Proposal: - proposal_generator: - name: ProposalGenerator + use_random: True + train_proposal: min_size: 0.0 nms_thresh: 0.7 - train_pre_nms_top_n: 12000 - train_post_nms_top_n: 2000 - infer_pre_nms_top_n: 6000 - infer_post_nms_top_n: 1000 - proposal_target_generator: - name: ProposalTargetGenerator - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: [0.5,] - bg_thresh_lo: [0.0,] - fg_thresh: [0.5,] - fg_fraction: 0.25 + pre_nms_top_n: 12000 + post_nms_top_n: 2000 + topk_after_collect: True + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 6000 + post_nms_top_n: 1000 + BBoxHead: - bbox_feat: - name: BBoxFeat - roi_extractor: - name: RoIAlign - resolution: 14 - sampling_ratio: 0 - start_level: 0 - end_level: 0 - head_feat: - name: Res5Head - feat_in: 1024 - feat_out: 512 + head: Res5Head + roi_extractor: + resolution: 14 + sampling_ratio: 0 + aligned: True + bbox_assigner: BBoxAssigner with_pool: true - in_feat: 2048 +BBoxAssigner: + batch_size_per_im: 512 + bg_thresh: [0.5,] + fg_thresh: [0.5,] + fg_fraction: 0.25 + use_random: True BBoxPostProcess: - decode: - name: RCNNBox - num_classes: 81 - batch_size: 1 + decode: RCNNBox nms: name: MultiClassNMS keep_top_k: 100 score_threshold: 0.05 nms_threshold: 0.5 + normalized: true diff --git a/dygraph/configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml b/dygraph/configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml index c831f7f966438b7194a9ba435e03282b6fd41e57..881485798c04bdefe76217154b4d4143ca3c1b4e 100644 --- a/dygraph/configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml +++ b/dygraph/configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml @@ -2,12 +2,7 @@ architecture: FasterRCNN pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar load_static_weights: True -# Model Achitecture FasterRCNN: - # model anchor info flow - anchor: Anchor - proposal: Proposal - # model feat info flow backbone: ResNet neck: FPN rpn_head: RPNHead @@ -25,72 +20,56 @@ ResNet: num_stages: 4 FPN: - in_channels: [256, 512, 1024, 2048] out_channel: 256 - min_level: 0 - max_level: 4 - spatial_scale: [0.25, 0.125, 0.0625, 0.03125] RPNHead: - rpn_feat: - name: RPNFeat - feat_in: 256 - feat_out: 256 - anchor_per_position: 3 - rpn_channel: 256 - -Anchor: anchor_generator: - name: AnchorGeneratorRPN aspect_ratios: [0.5, 1.0, 2.0] - anchor_start_size: 32 - stride: [4., 4.] - anchor_target_generator: - name: AnchorTargetGeneratorRPN + anchor_sizes: [[32], [64], [128], [256], [512]] + strides: [4, 8, 16, 32, 64] + rpn_target_assign: batch_size_per_im: 256 fg_fraction: 0.5 negative_overlap: 0.3 positive_overlap: 0.7 - straddle_thresh: 0.0 - -Proposal: - proposal_generator: - name: ProposalGenerator + use_random: True + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 1000 + topk_after_collect: True + test_proposal: min_size: 0.0 nms_thresh: 0.7 - train_pre_nms_top_n: 2000 - train_post_nms_top_n: 2000 - infer_pre_nms_top_n: 1000 - infer_post_nms_top_n: 1000 - proposal_target_generator: - name: ProposalTargetGenerator - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: [0.5,] - bg_thresh_lo: [0.0,] - fg_thresh: [0.5,] - fg_fraction: 0.25 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + BBoxHead: - bbox_feat: - name: BBoxFeat - roi_extractor: - name: RoIAlign - resolution: 7 - sampling_ratio: 2 - head_feat: - name: TwoFCHead - in_dim: 256 - mlp_dim: 1024 - in_feat: 1024 + head: TwoFCHead + roi_extractor: + resolution: 7 + sampling_ratio: 0 + aligned: True + bbox_assigner: BBoxAssigner + +BBoxAssigner: + batch_size_per_im: 512 + bg_thresh: [0.5,] + fg_thresh: [0.5,] + fg_fraction: 0.25 + use_random: True + +TwoFCHead: + mlp_dim: 1024 + BBoxPostProcess: - decode: - name: RCNNBox - num_classes: 81 - batch_size: 1 + decode: RCNNBox nms: name: MultiClassNMS keep_top_k: 100 score_threshold: 0.05 nms_threshold: 0.5 + normalized: true diff --git a/dygraph/configs/faster_rcnn/_base_/faster_reader.yml b/dygraph/configs/faster_rcnn/_base_/faster_reader.yml index 5131d0360d4d1b833f85fed45e1b38d0723bb2f4..4cad5abdf9a72290c4be9179d3bec8fe53d5665d 100644 --- a/dygraph/configs/faster_rcnn/_base_/faster_reader.yml +++ b/dygraph/configs/faster_rcnn/_base_/faster_reader.yml @@ -1,13 +1,13 @@ worker_num: 2 TrainReader: sample_transforms: - - DecodeOp: { } - - RandomFlipImage: {prob: 0.5} - - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} - - Permute: {to_bgr: false, channel_first: true} + - DecodeOp: {} + - RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} + - RandomFlipOp: {prob: 0.5} + - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - PermuteOp: {} batch_transforms: - - PadBatch: {pad_to_stride: -1, use_padded_im_info: false, pad_gt: true} + - PadBatchOp: {pad_to_stride: -1., pad_gt: true} batch_size: 1 shuffle: true drop_last: true @@ -15,12 +15,12 @@ TrainReader: EvalReader: sample_transforms: - - DecodeOp: { } - - NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } - - ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True } - - PermuteOp: { } + - DecodeOp: {} + - ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True} + - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - PermuteOp: {} batch_transforms: - - PadBatchOp: { pad_to_stride: -1, pad_gt: false } + - PadBatchOp: {pad_to_stride: -1., pad_gt: false} batch_size: 1 shuffle: false drop_last: false @@ -29,12 +29,12 @@ EvalReader: TestReader: sample_transforms: - - DecodeOp: { } - - NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } - - ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True } - - PermuteOp: { } + - DecodeOp: {} + - ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True} + - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - PermuteOp: {} batch_transforms: - - PadBatchOp: { pad_to_stride: -1, pad_gt: false } + - PadBatchOp: {pad_to_stride: -1., pad_gt: false} batch_size: 1 shuffle: false drop_last: false diff --git a/dygraph/configs/faster_rcnn/_base_/optimizer_1x.yml b/dygraph/configs/faster_rcnn/_base_/optimizer_1x.yml index d28b0947b9fb6567a70f11acfe6663dac89b0771..4caaa63bda15917137a9ac22b736ae83c3d04856 100644 --- a/dygraph/configs/faster_rcnn/_base_/optimizer_1x.yml +++ b/dygraph/configs/faster_rcnn/_base_/optimizer_1x.yml @@ -7,8 +7,8 @@ LearningRate: gamma: 0.1 milestones: [8, 11] - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 + start_factor: 0.1 + steps: 1000 OptimizerBuilder: optimizer: diff --git a/dygraph/configs/mask_rcnn/_base_/mask_fpn_reader.yml b/dygraph/configs/mask_rcnn/_base_/mask_fpn_reader.yml index 1d73c7f31ea6d1e0babb548b91604c1d3492dd83..a3d663558132189ba2c46e9b44fd21cc92e03510 100644 --- a/dygraph/configs/mask_rcnn/_base_/mask_fpn_reader.yml +++ b/dygraph/configs/mask_rcnn/_base_/mask_fpn_reader.yml @@ -2,22 +2,21 @@ worker_num: 2 TrainReader: sample_transforms: - DecodeOp: {} - - RandomFlipImage: {prob: 0.5, is_mask_flip: true} - - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} - - Permute: {to_bgr: false, channel_first: true} + - RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} + - RandomFlipOp: {prob: 0.5, is_mask_flip: true} + - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - PermuteOp: {} batch_transforms: - - PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: true} + - PadBatchOp: {pad_to_stride: 32, pad_gt: true} batch_size: 1 shuffle: true drop_last: true - EvalReader: sample_transforms: - DecodeOp: {} + - ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - - ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True} - PermuteOp: {} batch_transforms: - PadBatchOp: {pad_to_stride: 32, pad_gt: false} @@ -30,8 +29,8 @@ EvalReader: TestReader: sample_transforms: - DecodeOp: {} + - ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - - ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True} - PermuteOp: {} batch_transforms: - PadBatchOp: {pad_to_stride: 32, pad_gt: false} diff --git a/dygraph/configs/mask_rcnn/_base_/mask_rcnn_r50.yml b/dygraph/configs/mask_rcnn/_base_/mask_rcnn_r50.yml index eac16e10590a14fbd1f867890fa468abea32e41e..75191261d7298e3f67b2940e3ff3032a8fe2dfb7 100644 --- a/dygraph/configs/mask_rcnn/_base_/mask_rcnn_r50.yml +++ b/dygraph/configs/mask_rcnn/_base_/mask_rcnn_r50.yml @@ -2,13 +2,7 @@ architecture: MaskRCNN pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar load_static_weights: True -# Model Achitecture MaskRCNN: - # model anchor info flow - anchor: Anchor - proposal: Proposal - mask: Mask - # model feat info flow backbone: ResNet rpn_head: RPNHead bbox_head: BBoxHead @@ -26,88 +20,69 @@ ResNet: num_stages: 3 RPNHead: - rpn_feat: - name: RPNFeat - feat_in: 1024 - feat_out: 1024 - anchor_per_position: 15 - -Anchor: anchor_generator: - name: AnchorGeneratorRPN - anchor_sizes: [32, 64, 128, 256, 512] aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_target_generator: - name: AnchorTargetGeneratorRPN + anchor_sizes: [32, 64, 128, 256, 512] + strides: [16] + rpn_target_assign: batch_size_per_im: 256 fg_fraction: 0.5 negative_overlap: 0.3 positive_overlap: 0.7 - straddle_thresh: 0.0 - -Proposal: - proposal_generator: - name: ProposalGenerator + use_random: True + train_proposal: min_size: 0.0 nms_thresh: 0.7 - train_pre_nms_top_n: 12000 - train_post_nms_top_n: 2000 - infer_pre_nms_top_n: 6000 - infer_post_nms_top_n: 1000 - proposal_target_generator: - name: ProposalTargetGenerator - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: [0.5,] - bg_thresh_lo: [0.0,] - fg_thresh: [0.5,] - fg_fraction: 0.25 + pre_nms_top_n: 12000 + post_nms_top_n: 2000 + topk_after_collect: True + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 6000 + post_nms_top_n: 1000 + BBoxHead: - bbox_feat: - name: BBoxFeat - roi_extractor: RoIAlign - head_feat: - name: Res5Head - feat_in: 1024 - feat_out: 512 + head: Res5Head + roi_extractor: + resolution: 14 + sampling_ratio: 0 + aligned: True + bbox_assigner: BBoxAssigner with_pool: true - in_feat: 2048 + +BBoxAssigner: + batch_size_per_im: 512 + bg_thresh: [0.5,] + fg_thresh: [0.5,] + fg_fraction: 0.25 + use_random: True + BBoxPostProcess: - decode: - name: RCNNBox - num_classes: 81 - batch_size: 1 + decode: RCNNBox nms: name: MultiClassNMS keep_top_k: 100 score_threshold: 0.05 nms_threshold: 0.5 - -Mask: - mask_target_generator: - name: MaskTargetGenerator - mask_resolution: 14 - -RoIAlign: - resolution: 14 - sampling_ratio: 0 - start_level: 0 - end_level: 0 + normalized: true MaskHead: - mask_feat: - name: MaskFeat - num_convs: 0 - feat_in: 2048 - feat_out: 256 - mask_roi_extractor: RoIAlign - share_bbox_feat: true - feat_in: 256 + head: MaskFeat + roi_extractor: + resolution: 14 + sampling_ratio: 0 + aligned: True + mask_assigner: MaskAssigner + share_bbox_feat: true +MaskFeat: + out_channels: 256 -MaskPostProcess: +MaskAssigner: mask_resolution: 14 + +MaskPostProcess: + binary_thresh: 0.5 diff --git a/dygraph/configs/mask_rcnn/_base_/mask_rcnn_r50_fpn.yml b/dygraph/configs/mask_rcnn/_base_/mask_rcnn_r50_fpn.yml index 403a0b43d2f866c5f91434246306683fa42129d7..650184f21a2c25b2a4c5e6f4fb79edfbea6f89ae 100644 --- a/dygraph/configs/mask_rcnn/_base_/mask_rcnn_r50_fpn.yml +++ b/dygraph/configs/mask_rcnn/_base_/mask_rcnn_r50_fpn.yml @@ -2,13 +2,7 @@ architecture: MaskRCNN pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar load_static_weights: True -# Model Achitecture MaskRCNN: - # model anchor info flow - anchor: Anchor - proposal: Proposal - mask: Mask - # model feat info flow backbone: ResNet neck: FPN rpn_head: RPNHead @@ -27,94 +21,73 @@ ResNet: num_stages: 4 FPN: - in_channels: [256, 512, 1024, 2048] out_channel: 256 - min_level: 0 - max_level: 4 - spatial_scale: [0.25, 0.125, 0.0625, 0.03125] RPNHead: - rpn_feat: - name: RPNFeat - feat_in: 256 - feat_out: 256 - anchor_per_position: 3 - rpn_channel: 256 - -Anchor: anchor_generator: - name: AnchorGeneratorRPN aspect_ratios: [0.5, 1.0, 2.0] - anchor_start_size: 32 - stride: [4., 4.] - anchor_target_generator: - name: AnchorTargetGeneratorRPN + anchor_sizes: [[32], [64], [128], [256], [512]] + strides: [4, 8, 16, 32, 64] + rpn_target_assign: batch_size_per_im: 256 fg_fraction: 0.5 negative_overlap: 0.3 positive_overlap: 0.7 - straddle_thresh: 0.0 - -Proposal: - proposal_generator: - name: ProposalGenerator + use_random: True + train_proposal: min_size: 0.0 nms_thresh: 0.7 - train_pre_nms_top_n: 2000 - train_post_nms_top_n: 2000 - infer_pre_nms_top_n: 1000 - infer_post_nms_top_n: 1000 - proposal_target_generator: - name: ProposalTargetGenerator - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: [0.5,] - bg_thresh_lo: [0.0,] - fg_thresh: [0.5,] - fg_fraction: 0.25 + pre_nms_top_n: 2000 + post_nms_top_n: 1000 + topk_after_collect: True + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 BBoxHead: - bbox_feat: - name: BBoxFeat - roi_extractor: - name: RoIAlign - resolution: 7 - sampling_ratio: 2 - head_feat: - name: TwoFCHead - in_dim: 256 - mlp_dim: 1024 - in_feat: 1024 + head: TwoFCHead + roi_extractor: + resolution: 7 + sampling_ratio: 0 + aligned: True + bbox_assigner: BBoxAssigner + +BBoxAssigner: + batch_size_per_im: 512 + bg_thresh: [0.5,] + fg_thresh: [0.5,] + fg_fraction: 0.25 + use_random: True + +TwoFCHead: + mlp_dim: 1024 BBoxPostProcess: - decode: - name: RCNNBox - num_classes: 81 - batch_size: 1 + decode: RCNNBox nms: name: MultiClassNMS keep_top_k: 100 score_threshold: 0.05 nms_threshold: 0.5 - -Mask: - mask_target_generator: - name: MaskTargetGenerator - mask_resolution: 28 + normalized: true MaskHead: - mask_feat: - name: MaskFeat - num_convs: 4 - feat_in: 256 - feat_out: 256 - mask_roi_extractor: - name: RoIAlign - resolution: 14 - sampling_ratio: 2 - share_bbox_feat: False - feat_in: 256 + head: MaskFeat + roi_extractor: + resolution: 14 + sampling_ratio: 0 + aligned: True + mask_assigner: MaskAssigner + share_bbox_feat: False +MaskFeat: + num_convs: 4 + out_channels: 256 -MaskPostProcess: +MaskAssigner: mask_resolution: 28 + +MaskPostProcess: + binary_thresh: 0.5 diff --git a/dygraph/configs/mask_rcnn/_base_/mask_reader.yml b/dygraph/configs/mask_rcnn/_base_/mask_reader.yml index 9ae4ef98ee444d61bf790f8c135ffbe9e4d561c4..7da1f9af9b3e05ab2fad7b9f3a70598f8c0d7be9 100644 --- a/dygraph/configs/mask_rcnn/_base_/mask_reader.yml +++ b/dygraph/configs/mask_rcnn/_base_/mask_reader.yml @@ -2,12 +2,12 @@ worker_num: 2 TrainReader: sample_transforms: - DecodeOp: {} - - RandomFlipImage: {prob: 0.5, is_mask_flip: true} - - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} - - Permute: {to_bgr: false, channel_first: true} + - RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} + - RandomFlipOp: {prob: 0.5, is_mask_flip: true} + - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - PermuteOp: {} batch_transforms: - - PadBatch: {pad_to_stride: -1., use_padded_im_info: false, pad_gt: true} + - PadBatchOp: {pad_to_stride: -1., pad_gt: true} batch_size: 1 shuffle: true drop_last: true @@ -16,8 +16,8 @@ TrainReader: EvalReader: sample_transforms: - DecodeOp: {} + - ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - - ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True} - PermuteOp: {} batch_transforms: - PadBatchOp: {pad_to_stride: -1., pad_gt: false} @@ -30,8 +30,8 @@ EvalReader: TestReader: sample_transforms: - DecodeOp: {} + - ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - - ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True} - PermuteOp: {} batch_transforms: - PadBatchOp: {pad_to_stride: -1., pad_gt: false} diff --git a/dygraph/configs/mask_rcnn/_base_/optimizer_1x.yml b/dygraph/configs/mask_rcnn/_base_/optimizer_1x.yml index d28b0947b9fb6567a70f11acfe6663dac89b0771..63f898e9c52556bfa0fbbe9c369900c09ab3f94c 100644 --- a/dygraph/configs/mask_rcnn/_base_/optimizer_1x.yml +++ b/dygraph/configs/mask_rcnn/_base_/optimizer_1x.yml @@ -7,8 +7,8 @@ LearningRate: gamma: 0.1 milestones: [8, 11] - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 + start_factor: 0.001 + steps: 1000 OptimizerBuilder: optimizer: diff --git a/dygraph/deploy/cpp/include/config_parser.h b/dygraph/deploy/cpp/include/config_parser.h index 05c5941a8ee7cadc95aac3c8e936d5476997751f..c38049d3140a66aeac42059ed0acb835b5ef6743 100644 --- a/dygraph/deploy/cpp/include/config_parser.h +++ b/dygraph/deploy/cpp/include/config_parser.h @@ -76,13 +76,6 @@ class ConfigPaser { std::cerr << "Please set draw_threshold." << std::endl; return false; } - // Get with_background - if (config["with_background"].IsDefined()) { - with_background_ = config["with_background"].as(); - } else { - std::cerr << "Please set with_background." << std::endl; - return false; - } // Get Preprocess for preprocessing if (config["Preprocess"].IsDefined()) { preprocess_info_ = config["Preprocess"]; @@ -111,7 +104,6 @@ class ConfigPaser { float draw_threshold_; std::string arch_; int min_subgraph_size_; - bool with_background_; YAML::Node preprocess_info_; std::vector label_list_; std::vector image_shape_; diff --git a/dygraph/ppdet/core/workspace.py b/dygraph/ppdet/core/workspace.py index caba6997699d0ba29052f72f6939dacec060a5f5..5d6a5d9f71581e51774aabac4e90504b6099db21 100644 --- a/dygraph/ppdet/core/workspace.py +++ b/dygraph/ppdet/core/workspace.py @@ -99,19 +99,6 @@ def _load_config_with_base(file_path): return file_cfg -WITHOUT_BACKGROUND_ARCHS = ['YOLOv3', 'FCOS', 'TTFNet'] - - -def _parse_with_background(): - arch = global_config.architecture - with_background = arch not in WITHOUT_BACKGROUND_ARCHS - global_config['with_background'] = with_background - global_config['TrainReader']['with_background'] = with_background - global_config['EvalReader']['with_background'] = with_background - global_config['TestReader']['with_background'] = with_background - global_config['num_classes'] += with_background - - def load_config(file_path): """ Load config from file. @@ -129,9 +116,6 @@ def load_config(file_path): cfg['filename'] = os.path.splitext(os.path.split(file_path)[-1])[0] merge_config(cfg) - # parse config from merged config - _parse_with_background() - return global_config @@ -166,7 +150,7 @@ def merge_config(config, another_cfg=None): Returns: global config """ global global_config - dct = another_cfg if another_cfg is not None else global_config + dct = another_cfg or global_config return dict_merge(dct, config) @@ -231,16 +215,13 @@ def create(cls_or_name, **kwargs): isinstance(global_config[name], SchemaDict), \ "the module {} is not registered".format(name) config = global_config[name] - config.update(kwargs) - config.validate() cls = getattr(config.pymodule, name) - kwargs = {} - kwargs.update(global_config[name]) + cls_kwargs = {} + cls_kwargs.update(global_config[name]) # parse `shared` annoation of registered modules if getattr(config, 'shared', None): for k in config.shared: - target_key = config[k] shared_conf = config.schema[k].default assert isinstance(shared_conf, SharedConfig) @@ -249,11 +230,14 @@ def create(cls_or_name, **kwargs): continue # value is given for the module elif shared_conf.key in global_config: # `key` is present in config - kwargs[k] = global_config[shared_conf.key] + cls_kwargs[k] = global_config[shared_conf.key] else: - kwargs[k] = shared_conf.default_value + cls_kwargs[k] = shared_conf.default_value # parse `inject` annoation of registered modules + if getattr(cls, 'from_config', None): + cls_kwargs.update(cls.from_config(config, **kwargs)) + if getattr(config, 'inject', None): for k in config.inject: target_key = config[k] @@ -275,18 +259,18 @@ def create(cls_or_name, **kwargs): continue target[i] = v if isinstance(target, SchemaDict): - kwargs[k] = create(inject_name) + cls_kwargs[k] = create(inject_name) elif isinstance(target_key, str): if target_key not in global_config: raise ValueError("Missing injection config:", target_key) target = global_config[target_key] if isinstance(target, SchemaDict): - kwargs[k] = create(target_key) + cls_kwargs[k] = create(target_key) elif hasattr(target, '__dict__'): # serialized object - kwargs[k] = target + cls_kwargs[k] = target else: raise ValueError("Unsupported injection type:", target_key) # prevent modification of global config values of reference types # (e.g., list, dict) from within the created module instances #kwargs = copy.deepcopy(kwargs) - return cls(**kwargs) + return cls(**cls_kwargs) diff --git a/dygraph/ppdet/data/reader.py b/dygraph/ppdet/data/reader.py index def98b962c7c77ea3a4df1a2f604e4f1a8a6292f..ad3e96d41aa454e53902604e7272a209cc551ffd 100644 --- a/dygraph/ppdet/data/reader.py +++ b/dygraph/ppdet/data/reader.py @@ -37,7 +37,7 @@ MAIN_PID = os.getpid() class Compose(object): - def __init__(self, transforms, num_classes=81): + def __init__(self, transforms, num_classes=80): self.transforms = transforms self.transforms_cls = [] for t in self.transforms: @@ -61,7 +61,7 @@ class Compose(object): class BatchCompose(Compose): - def __init__(self, transforms, num_classes=81): + def __init__(self, transforms, num_classes=80): super(BatchCompose, self).__init__(transforms, num_classes) self.output_fields = mp.Manager().list([]) self.lock = mp.Lock() @@ -119,8 +119,7 @@ class BaseDataLoader(object): shuffle=False, drop_last=False, drop_empty=True, - num_classes=81, - with_background=True, + num_classes=80, **kwargs): # sample transform self._sample_transforms = Compose( @@ -132,7 +131,6 @@ class BaseDataLoader(object): self.batch_size = batch_size self.shuffle = shuffle self.drop_last = drop_last - self.with_background = with_background self.kwargs = kwargs def __call__(self, @@ -142,7 +140,7 @@ class BaseDataLoader(object): return_list=False, use_prefetch=True): self.dataset = dataset - self.dataset.parse_dataset(self.with_background) + self.dataset.parse_dataset() # get data self.dataset.set_transform(self._sample_transforms) # set kwargs @@ -204,13 +202,11 @@ class TrainReader(BaseDataLoader): shuffle=True, drop_last=True, drop_empty=True, - num_classes=81, - with_background=True, + num_classes=80, **kwargs): - super(TrainReader, self).__init__(inputs_def, sample_transforms, - batch_transforms, batch_size, shuffle, - drop_last, drop_empty, num_classes, - with_background, **kwargs) + super(TrainReader, self).__init__( + inputs_def, sample_transforms, batch_transforms, batch_size, + shuffle, drop_last, drop_empty, num_classes, **kwargs) @register @@ -223,13 +219,11 @@ class EvalReader(BaseDataLoader): shuffle=False, drop_last=True, drop_empty=True, - num_classes=81, - with_background=True, + num_classes=80, **kwargs): - super(EvalReader, self).__init__(inputs_def, sample_transforms, - batch_transforms, batch_size, shuffle, - drop_last, drop_empty, num_classes, - with_background, **kwargs) + super(EvalReader, self).__init__( + inputs_def, sample_transforms, batch_transforms, batch_size, + shuffle, drop_last, drop_empty, num_classes, **kwargs) @register @@ -242,10 +236,8 @@ class TestReader(BaseDataLoader): shuffle=False, drop_last=False, drop_empty=True, - num_classes=81, - with_background=True, + num_classes=80, **kwargs): - super(TestReader, self).__init__(inputs_def, sample_transforms, - batch_transforms, batch_size, shuffle, - drop_last, drop_empty, num_classes, - with_background, **kwargs) + super(TestReader, self).__init__( + inputs_def, sample_transforms, batch_transforms, batch_size, + shuffle, drop_last, drop_empty, num_classes, **kwargs) diff --git a/dygraph/ppdet/data/source/coco.py b/dygraph/ppdet/data/source/coco.py index aacaa771ca789b26fd072f2fc67923ec35fefb8f..387229136ef5470d988e19e63c912992c3e1a801 100644 --- a/dygraph/ppdet/data/source/coco.py +++ b/dygraph/ppdet/data/source/coco.py @@ -35,7 +35,7 @@ class COCODataSet(DetDataset): self.load_image_only = False self.load_semantic = False - def parse_dataset(self, with_background=True): + def parse_dataset(self): anno_path = os.path.join(self.dataset_dir, self.anno_path) image_dir = os.path.join(self.dataset_dir, self.image_dir) @@ -44,16 +44,12 @@ class COCODataSet(DetDataset): from pycocotools.coco import COCO coco = COCO(anno_path) img_ids = coco.getImgIds() + img_ids.sort() cat_ids = coco.getCatIds() records = [] ct = 0 - # when with_background = True, mapping category to classid, like: - # background:0, first_class:1, second_class:2, ... - catid2clsid = dict({ - catid: i + int(with_background) - for i, catid in enumerate(cat_ids) - }) + catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)}) cname2cid = dict({ coco.loadCats(catid)[0]['name']: clsid for catid, clsid in catid2clsid.items() @@ -95,13 +91,14 @@ class COCODataSet(DetDataset): else: if not any(np.array(inst['bbox'])): continue - x, y, box_w, box_h = inst['bbox'] - x1 = max(0, x) - y1 = max(0, y) - x2 = min(im_w - 1, x1 + max(0, box_w - 1)) - y2 = min(im_h - 1, y1 + max(0, box_h - 1)) - if inst['area'] > 0 and x2 >= x1 and y2 >= y1: - inst['clean_bbox'] = [x1, y1, x2, y2] + x1, y1, box_w, box_h = inst['bbox'] + x2 = x1 + box_w + y2 = y1 + box_h + eps = 1e-5 + if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps: + inst['clean_bbox'] = [ + round(float(x), 3) for x in [x1, y1, x2, y2] + ] bboxes.append(inst) else: logger.warning( diff --git a/dygraph/ppdet/data/source/dataset.py b/dygraph/ppdet/data/source/dataset.py index 69e38ef6d1b2c210d8ea054a8b5de63127c77a25..66c32b881406b610846c2fa7de0b62def904fc76 100644 --- a/dygraph/ppdet/data/source/dataset.py +++ b/dygraph/ppdet/data/source/dataset.py @@ -78,7 +78,7 @@ class DetDataset(Dataset): def set_epoch(self, epoch_id): self._epoch = epoch_id - def parse_dataset(self, with_background=True): + def parse_dataset(self, ): raise NotImplemented( "Need to implement parse_dataset method of Dataset") @@ -115,13 +115,17 @@ class ImageFolder(DetDataset): sample_num=-1, use_default_label=None, **kwargs): - super(ImageFolder, self).__init__(dataset_dir, image_dir, anno_path, - sample_num, use_default_label) + super(ImageFolder, self).__init__( + dataset_dir, + image_dir, + anno_path, + sample_num=sample_num, + use_default_label=use_default_label) self._imid2path = {} self.roidbs = None self.sample_num = sample_num - def parse_dataset(self, with_background=True): + def parse_dataset(self, ): if not self.roidbs: self.roidbs = self._load_images() diff --git a/dygraph/ppdet/data/source/voc.py b/dygraph/ppdet/data/source/voc.py index 6ef016f3ef774fd7bb890bfe7c15978b571b601f..00d976ce0c944620b0d249d434b6fc023a4e4fb7 100644 --- a/dygraph/ppdet/data/source/voc.py +++ b/dygraph/ppdet/data/source/voc.py @@ -58,15 +58,12 @@ class VOCDataSet(DetDataset): sample_num=sample_num) self.label_list = label_list - def parse_dataset(self, with_background=True): + def parse_dataset(self, ): anno_path = os.path.join(self.dataset_dir, self.anno_path) image_dir = os.path.join(self.dataset_dir, self.image_dir) # mapping category name to class id - # if with_background is True: - # background:0, first_class:1, second_class:2, ... - # if with_background is False: - # first_class:0, second_class:1, ... + # first_class:0, second_class:1, ... records = [] ct = 0 cname2cid = {} @@ -76,12 +73,12 @@ class VOCDataSet(DetDataset): raise ValueError("label_list {} does not exists".format( label_path)) with open(label_path, 'r') as fr: - label_id = int(with_background) + label_id = 0 for line in fr.readlines(): cname2cid[line.strip()] = label_id label_id += 1 else: - cname2cid = pascalvoc_label(with_background) + cname2cid = pascalvoc_label() with open(anno_path, 'r') as fr: while True: @@ -175,29 +172,27 @@ class VOCDataSet(DetDataset): return os.path.join(self.dataset_dir, self.label_list) -def pascalvoc_label(with_background=True): +def pascalvoc_label(): labels_map = { - 'aeroplane': 1, - 'bicycle': 2, - 'bird': 3, - 'boat': 4, - 'bottle': 5, - 'bus': 6, - 'car': 7, - 'cat': 8, - 'chair': 9, - 'cow': 10, - 'diningtable': 11, - 'dog': 12, - 'horse': 13, - 'motorbike': 14, - 'person': 15, - 'pottedplant': 16, - 'sheep': 17, - 'sofa': 18, - 'train': 19, - 'tvmonitor': 20 + 'aeroplane': 0, + 'bicycle': 1, + 'bird': 2, + 'boat': 3, + 'bottle': 4, + 'bus': 5, + 'car': 6, + 'cat': 7, + 'chair': 8, + 'cow': 9, + 'diningtable': 10, + 'dog': 11, + 'horse': 12, + 'motorbike': 13, + 'person': 14, + 'pottedplant': 15, + 'sheep': 16, + 'sofa': 17, + 'train': 18, + 'tvmonitor': 19 } - if not with_background: - labels_map = {k: v - 1 for k, v in labels_map.items()} return labels_map diff --git a/dygraph/ppdet/data/source/widerface.py b/dygraph/ppdet/data/source/widerface.py index 3a2b5224ec1f23b4d4ffd480cde29a62116fd327..d782cd4982efc2128f69ec20de136ec0d06f0faa 100644 --- a/dygraph/ppdet/data/source/widerface.py +++ b/dygraph/ppdet/data/source/widerface.py @@ -52,7 +52,7 @@ class WIDERFaceDataSet(DataSet): self.cname2cid = None self.with_lmk = with_lmk - def load_roidb_and_cname2cid(self, with_background=True): + def load_roidb_and_cname2cid(self, ): anno_path = os.path.join(self.dataset_dir, self.anno_path) image_dir = os.path.join(self.dataset_dir, self.image_dir) @@ -61,7 +61,7 @@ class WIDERFaceDataSet(DataSet): records = [] ct = 0 file_lists = self._load_file_list(txt_file) - cname2cid = widerface_label(with_background) + cname2cid = widerface_label() for item in file_lists: im_fname = item[0] @@ -159,8 +159,6 @@ class WIDERFaceDataSet(DataSet): return list(file_dict.values()) -def widerface_label(with_background=True): - labels_map = {'face': 1} - if not with_background: - labels_map = {k: v - 1 for k, v in labels_map.items()} +def widerface_label(): + labels_map = {'face': 0} return labels_map diff --git a/dygraph/ppdet/data/transform/operator.py b/dygraph/ppdet/data/transform/operator.py index 37c28524ebf003503863ca09b986143bce514a53..90b69dccb4e3b6d9352038175b6dc7e291e837aa 100644 --- a/dygraph/ppdet/data/transform/operator.py +++ b/dygraph/ppdet/data/transform/operator.py @@ -500,7 +500,7 @@ class RandomFlipOp(BaseOperator): def apply_segm(self, segms, height, width): def _flip_poly(poly, width): flipped_poly = np.array(poly) - flipped_poly[0::2] = width - np.array(poly[0::2]) - 1 + flipped_poly[0::2] = width - np.array(poly[0::2]) return flipped_poly.tolist() def _flip_rle(rle, height, width): @@ -526,7 +526,7 @@ class RandomFlipOp(BaseOperator): for i in range(gt_keypoint.shape[1]): if i % 2 == 0: old_x = gt_keypoint[:, i].copy() - gt_keypoint[:, i] = width - old_x - 1 + gt_keypoint[:, i] = width - old_x return gt_keypoint def apply_image(self, image): @@ -535,8 +535,8 @@ class RandomFlipOp(BaseOperator): def apply_bbox(self, bbox, width): oldx1 = bbox[:, 0].copy() oldx2 = bbox[:, 2].copy() - bbox[:, 0] = width - oldx2 - 1 - bbox[:, 2] = width - oldx1 - 1 + bbox[:, 0] = width - oldx2 + bbox[:, 2] = width - oldx1 return bbox def apply(self, sample, context=None): @@ -601,6 +601,7 @@ class ResizeOp(BaseOperator): def apply_image(self, image, scale): im_scale_x, im_scale_y = scale + return cv2.resize( image, None, @@ -614,8 +615,8 @@ class ResizeOp(BaseOperator): resize_w, resize_h = size bbox[:, 0::2] *= im_scale_x bbox[:, 1::2] *= im_scale_y - bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w - 1) - bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h - 1) + bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w) + bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h) return bbox def apply_segm(self, segms, im_size, scale): diff --git a/dygraph/ppdet/engine/export_utils.py b/dygraph/ppdet/engine/export_utils.py index 5d5779273af66642a386a94f4572e0bebdf4a72b..85cd8d003e3130c5886c57e1050924fd5acfcbb9 100644 --- a/dygraph/ppdet/engine/export_utils.py +++ b/dygraph/ppdet/engine/export_utils.py @@ -43,9 +43,8 @@ def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape): preprocess_list = [] anno_file = dataset_cfg.get_anno() - with_background = reader_cfg['with_background'] - clsid2catid, catid2name = get_categories(metric, anno_file, with_background) + clsid2catid, catid2name = get_categories(metric, anno_file) label_list = [str(cat) for cat in catid2name.values()] @@ -73,7 +72,7 @@ def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape): }) break - return with_background, preprocess_list, label_list, image_shape + return preprocess_list, label_list, image_shape def _dump_infer_config(config, path, image_shape, model): @@ -102,7 +101,7 @@ def _dump_infer_config(config, path, image_shape, model): if 'mask_post_process' in model.__dict__ and model.__dict__[ 'mask_post_process']: infer_cfg['mask_resolution'] = model.mask_post_process.mask_resolution - infer_cfg['with_background'], infer_cfg['Preprocess'], infer_cfg[ + infer_cfg['Preprocess'], infer_cfg[ 'label_list'], image_shape = _parse_reader( config['TestReader'], config['TestDataset'], config['metric'], infer_cfg['arch'], image_shape) diff --git a/dygraph/ppdet/engine/trainer.py b/dygraph/ppdet/engine/trainer.py index 8eb5d0e395cb69c0bd135c0e83b5d763a76e2f77..e51aa8b9e1ba0b8515cae870821b672314739df4 100644 --- a/dygraph/ppdet/engine/trainer.py +++ b/dygraph/ppdet/engine/trainer.py @@ -97,19 +97,11 @@ class Trainer(object): def _init_metrics(self): if self.mode == 'eval': if self.cfg.metric == 'COCO': - mask_resolution = self.model.mask_post_process.mask_resolution if getattr( - self.model, 'mask_post_process', None) else None - self._metrics = [ - COCOMetric( - anno_file=self.dataset.get_anno(), - with_background=self.cfg.with_background, - mask_resolution=mask_resolution) - ] + self._metrics = [COCOMetric(anno_file=self.dataset.get_anno())] elif self.cfg.metric == 'VOC': self._metrics = [ VOCMetric( anno_file=self.dataset.get_anno(), - with_background=self.cfg.with_background, class_num=self.cfg.num_classes, map_type=self.cfg.map_type) ] @@ -240,9 +232,7 @@ class Trainer(object): imid2path = self.dataset.get_imid2path() anno_file = self.dataset.get_anno() - with_background = self.cfg.with_background - clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file, - with_background) + clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file) # Run Infer for step_id, data in enumerate(loader): @@ -255,14 +245,6 @@ class Trainer(object): for key, value in outs.items(): outs[key] = value.numpy() - # FIXME: for more elegent coding - if 'mask' in outs and 'bbox' in outs: - mask_resolution = self.model.mask_post_process.mask_resolution - from ppdet.py_op.post_process import mask_post_process - outs['mask'] = mask_post_process(outs, outs['im_shape'], - outs['scale_factor'], - mask_resolution) - batch_res = get_infer_results(outs, clsid2catid) bbox_num = outs['bbox_num'] start = 0 diff --git a/dygraph/ppdet/metrics/category.py b/dygraph/ppdet/metrics/category.py index a957d850e49e9a054ce36a4094c12222cc6b3693..6b19c06a625932e61283afbe90d227fd53e41b03 100644 --- a/dygraph/ppdet/metrics/category.py +++ b/dygraph/ppdet/metrics/category.py @@ -25,15 +25,13 @@ logger = setup_logger(__name__) __all__ = ['get_categories'] -def get_categories(metric_type, anno_file=None, with_background=True): +def get_categories(metric_type, anno_file=None): """ Get class id to category id map and category id to category name map from annotation file. Args: anno_file (str): annotation file path - with_background (bool, default True): - whether load background as class 0. """ if metric_type.lower() == 'coco': if anno_file and os.path.isfile(anno_file): @@ -43,21 +41,14 @@ def get_categories(metric_type, anno_file=None, with_background=True): coco = COCO(anno_file) cats = coco.loadCats(coco.getCatIds()) - clsid2catid = { - i + int(with_background): cat['id'] - for i, cat in enumerate(cats) - } + clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)} catid2name = {cat['id']: cat['name'] for cat in cats} - if with_background: - clsid2catid.update({0: 0}) - catid2name.update({0: 'background'}) - return clsid2catid, catid2name # anno file not exist, load default categories of COCO17 else: - return _coco17_category(with_background) + return _coco17_category() elif metric_type.lower() == 'voc': if anno_file and os.path.isfile(anno_file): @@ -66,9 +57,7 @@ def get_categories(metric_type, anno_file=None, with_background=True): for line in f.readlines(): cats.append(line.strip()) - if cats[0] != 'background' and with_background: - cats.insert(0, 'background') - if cats[0] == 'background' and not with_background: + if cats[0] == 'background': cats = cats[1:] clsid2catid = {i: i for i in range(len(cats))} @@ -79,25 +68,22 @@ def get_categories(metric_type, anno_file=None, with_background=True): # anno file not exist, load default categories of # VOC all 20 categories else: - return _vocall_category(with_background) + return _vocall_category() elif metric_type.lower() == 'oid': if anno_file and os.path.isfile(anno_file): logger.warn("only default categories support for OID19") - return _oid19_category(with_background) + return _oid19_category() else: raise ValueError("unknown metric type {}".format(metric_type)) -def _coco17_category(with_background=True): +def _coco17_category(): """ Get class id to category id map and category id to category name map of COCO2017 dataset - Args: - with_background (bool, default True): - whether load background as class 0. """ clsid2catid = { 1: 1, @@ -266,39 +252,30 @@ def _coco17_category(with_background=True): 90: 'toothbrush' } - if not with_background: - clsid2catid = {k - 1: v for k, v in clsid2catid.items()} - catid2name.pop(0) - else: - clsid2catid.update({0: 0}) + clsid2catid = {k - 1: v for k, v in clsid2catid.items()} + catid2name.pop(0) return clsid2catid, catid2name -def _vocall_category(with_background=True): +def _vocall_category(): """ Get class id to category id map and category id to category name map of mixup voc dataset - Args: - with_background (bool, default True): - whether load background as class 0. """ - label_map = pascalvoc_label(with_background) + label_map = pascalvoc_label() label_map = sorted(label_map.items(), key=lambda x: x[1]) cats = [l[0] for l in label_map] - if with_background: - cats.insert(0, 'background') - clsid2catid = {i: i for i in range(len(cats))} catid2name = {i: name for i, name in enumerate(cats)} return clsid2catid, catid2name -def _oid19_category(with_background=True): - clsid2catid = {k: k for k in range(1, 501)} +def _oid19_category(): + clsid2catid = {k: k + 1 for k in range(500)} catid2name = { 0: "background", @@ -804,6 +781,4 @@ def _oid19_category(with_background=True): 500: "Toilet", } - if not with_background: - clsid2catid = {k - 1: v for k, v in clsid2catid.items()} return clsid2catid, catid2name diff --git a/dygraph/ppdet/metrics/coco_utils.py b/dygraph/ppdet/metrics/coco_utils.py index c25641d756253b7aa74154602615c77715936ee7..5ace10df0f0e1cca3e8da20c43bb49facfe7941f 100644 --- a/dygraph/ppdet/metrics/coco_utils.py +++ b/dygraph/ppdet/metrics/coco_utils.py @@ -38,17 +38,17 @@ def get_infer_results(outs, catid): ) im_id = outs['im_id'] - im_shape = outs['im_shape'] - scale_factor = outs['scale_factor'] infer_res = {} if 'bbox' in outs: - infer_res['bbox'] = get_det_res(outs['bbox'], outs['bbox_num'], im_id, + infer_res['bbox'] = get_det_res(outs['bbox'], outs['score'], + outs['label'], outs['bbox_num'], im_id, catid) if 'mask' in outs: # mask post process - infer_res['mask'] = get_seg_res(outs['mask'], outs['bbox_num'], im_id, + infer_res['mask'] = get_seg_res(outs['mask'], outs['score'], + outs['label'], outs['bbox_num'], im_id, catid) if 'segm' in outs: diff --git a/dygraph/ppdet/metrics/metrics.py b/dygraph/ppdet/metrics/metrics.py index fde7e19b860390eddc0cc1650e6044e51f812335..644658cf48c8054578f88db49fc581d93a7ee9d9 100644 --- a/dygraph/ppdet/metrics/metrics.py +++ b/dygraph/ppdet/metrics/metrics.py @@ -49,14 +49,11 @@ class Metric(paddle.metric.Metric): class COCOMetric(Metric): - def __init__(self, anno_file, with_background=True, mask_resolution=None): + def __init__(self, anno_file): assert os.path.isfile(anno_file), \ "anno_file {} not a file".format(anno_file) self.anno_file = anno_file - self.with_background = with_background - self.mask_resolution = mask_resolution - self.clsid2catid, self.catid2name = get_categories('COCO', anno_file, - with_background) + self.clsid2catid, self.catid2name = get_categories('COCO', anno_file) self.reset() @@ -71,16 +68,9 @@ class COCOMetric(Metric): for k, v in outputs.items(): outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v - # some input fields also needed - for k in ['im_id', 'scale_factor', 'im_shape']: - v = inputs[k] - outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v - - if 'mask' in outs and 'bbox' in outs: - from ppdet.py_op.post_process import mask_post_process - outs['mask'] = mask_post_process(outs, outs['im_shape'], - outs['scale_factor'], - self.mask_resolution) + im_id = inputs['im_id'] + outs['im_id'] = im_id.numpy() if isinstance(im_id, + paddle.Tensor) else im_id infer_results = get_infer_results(outs, self.clsid2catid) self.results['bbox'] += infer_results[ @@ -131,7 +121,6 @@ class COCOMetric(Metric): class VOCMetric(Metric): def __init__(self, anno_file, - with_background=True, class_num=20, overlap_thresh=0.5, map_type='11point', @@ -140,9 +129,7 @@ class VOCMetric(Metric): assert os.path.isfile(anno_file), \ "anno_file {} not a file".format(anno_file) self.anno_file = anno_file - self.with_background = with_background - self.clsid2catid, self.catid2name = get_categories('VOC', anno_file, - with_background) + self.clsid2catid, self.catid2name = get_categories('VOC', anno_file) self.overlap_thresh = overlap_thresh self.map_type = map_type diff --git a/dygraph/ppdet/modeling/__init__.py b/dygraph/ppdet/modeling/__init__.py index af37acfda4b315866bd40675fa35cc85e59fd0fc..5171d205cf3992f70c3187eea595504215560ef2 100644 --- a/dygraph/ppdet/modeling/__init__.py +++ b/dygraph/ppdet/modeling/__init__.py @@ -1,8 +1,7 @@ from . import ops -from . import bbox -from . import mask from . import backbones from . import necks +from . import proposal_generator from . import heads from . import losses from . import architectures @@ -11,10 +10,9 @@ from . import layers from . import utils from .ops import * -from .bbox import * -from .mask import * from .backbones import * from .necks import * +from .proposal_generator import * from .heads import * from .losses import * from .architectures import * diff --git a/dygraph/ppdet/modeling/architectures/faster_rcnn.py b/dygraph/ppdet/modeling/architectures/faster_rcnn.py index ffdd17c9387998e7dd3cc1b91e2c67c2ecdb23d7..e9a4af87b0f5d0e9bccaa85050444bbeb770f1df 100644 --- a/dygraph/ppdet/modeling/architectures/faster_rcnn.py +++ b/dygraph/ppdet/modeling/architectures/faster_rcnn.py @@ -1,9 +1,23 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function import paddle -from ppdet.core.workspace import register +from ppdet.core.workspace import register, create from .meta_arch import BaseArch __all__ = ['FasterRCNN'] @@ -12,91 +26,86 @@ __all__ = ['FasterRCNN'] @register class FasterRCNN(BaseArch): __category__ = 'architecture' - __inject__ = [ - 'anchor', 'proposal', 'backbone', 'neck', 'rpn_head', 'bbox_head', - 'bbox_post_process' - ] + __inject__ = ['bbox_post_process'] def __init__(self, - anchor, - proposal, backbone, rpn_head, bbox_head, bbox_post_process, neck=None): + """ + backbone (nn.Layer): backbone instance. + rpn_head (nn.Layer): generates proposals using backbone features. + bbox_head (nn.Layer): a head that performs per-region computation. + mask_head (nn.Layer): generates mask from bbox and backbone features. + """ + super(FasterRCNN, self).__init__() - self.anchor = anchor - self.proposal = proposal self.backbone = backbone + self.neck = neck self.rpn_head = rpn_head self.bbox_head = bbox_head self.bbox_post_process = bbox_post_process - self.neck = neck - def model_arch(self): - # Backbone - body_feats = self.backbone(self.inputs) - spatial_scale = 0.0625 + @classmethod + def from_config(cls, cfg, *args, **kwargs): + backbone = create(cfg['backbone']) + kwargs = {'input_shape': backbone.out_shape} + neck = cfg['neck'] and create(cfg['neck'], **kwargs) + + out_shape = neck and neck.out_shape or backbone.out_shape + kwargs = {'input_shape': out_shape} + rpn_head = create(cfg['rpn_head'], **kwargs) + bbox_head = create(cfg['bbox_head'], **kwargs) + return { + 'backbone': backbone, + 'neck': neck, + "rpn_head": rpn_head, + "bbox_head": bbox_head, + } - # Neck + def _forward(self): + body_feats = self.backbone(self.inputs) if self.neck is not None: - body_feats, spatial_scale = self.neck(body_feats) - - # RPN - # rpn_head returns two list: rpn_feat, rpn_head_out - # each element in rpn_feats contains rpn feature on each level, - # and the length is 1 when the neck is not applied. - # each element in rpn_head_out contains (rpn_rois_score, rpn_rois_delta) - rpn_feat, self.rpn_head_out = self.rpn_head(self.inputs, body_feats) - - # Anchor - # anchor_out returns a list, - # each element contains (anchor, anchor_var) - self.anchor_out = self.anchor(rpn_feat) + body_feats = self.neck(body_feats) + if self.training: + rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs) + bbox_loss, _ = self.bbox_head(body_feats, rois, rois_num, + self.inputs) + return rpn_loss, bbox_loss + else: + rois, rois_num, _ = self.rpn_head(body_feats, self.inputs) + preds, _ = self.bbox_head(body_feats, rois, rois_num, None) - # Proposal RoI - # compute targets here when training - rois = self.proposal(self.inputs, self.rpn_head_out, self.anchor_out, - self.training) - # BBox Head - bbox_feat, self.bbox_head_out, self.bbox_head_feat_func = self.bbox_head( - body_feats, rois, spatial_scale) + im_shape = self.inputs['im_shape'] + scale_factor = self.inputs['scale_factor'] + bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num), + im_shape, scale_factor) - if not self.training: - bbox_pred, bboxes = self.bbox_head.get_prediction( - self.bbox_head_out, rois) - # Refine bbox by the output from bbox_head at test stage - self.bboxes = self.bbox_post_process(bbox_pred, bboxes, - self.inputs['im_shape'], - self.inputs['scale_factor']) - - else: - # Proposal RoI for Mask branch - # bboxes update at training stage only - bbox_targets = self.proposal.get_targets()[0] + # rescale the prediction back to origin image + bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num, + im_shape, scale_factor) + return bbox_pred, bbox_num def get_loss(self, ): + rpn_loss, bbox_loss = self._forward() loss = {} - - # RPN loss - rpn_loss_inputs = self.anchor.generate_loss_inputs( - self.inputs, self.rpn_head_out, self.anchor_out) - loss_rpn = self.rpn_head.get_loss(rpn_loss_inputs) - loss.update(loss_rpn) - - # BBox loss - bbox_targets = self.proposal.get_targets() - loss_bbox = self.bbox_head.get_loss([self.bbox_head_out], bbox_targets) - loss.update(loss_bbox) + loss.update(rpn_loss) + loss.update(bbox_loss) total_loss = paddle.add_n(list(loss.values())) loss.update({'loss': total_loss}) return loss def get_pred(self): - bbox, bbox_num = self.bboxes + bbox_pred, bbox_num = self._forward() + label = bbox_pred[:, 0] + score = bbox_pred[:, 1] + bbox = bbox_pred[:, 2:] output = { 'bbox': bbox, - 'bbox_num': bbox_num, + 'score': score, + 'label': label, + 'bbox_num': bbox_num } return output diff --git a/dygraph/ppdet/modeling/architectures/mask_rcnn.py b/dygraph/ppdet/modeling/architectures/mask_rcnn.py index 7c626eaf027a493f4187b419b84b7c2a21aebdf7..05b05c137bce52458428fb995417ad2869a602dd 100644 --- a/dygraph/ppdet/modeling/architectures/mask_rcnn.py +++ b/dygraph/ppdet/modeling/architectures/mask_rcnn.py @@ -17,7 +17,7 @@ from __future__ import division from __future__ import print_function import paddle -from ppdet.core.workspace import register +from ppdet.core.workspace import register, create from .meta_arch import BaseArch __all__ = ['MaskRCNN'] @@ -27,22 +27,11 @@ __all__ = ['MaskRCNN'] class MaskRCNN(BaseArch): __category__ = 'architecture' __inject__ = [ - 'anchor', - 'proposal', - 'mask', - 'backbone', - 'neck', - 'rpn_head', - 'bbox_head', - 'mask_head', 'bbox_post_process', 'mask_post_process', ] def __init__(self, - anchor, - proposal, - mask, backbone, rpn_head, bbox_head, @@ -50,95 +39,99 @@ class MaskRCNN(BaseArch): bbox_post_process, mask_post_process, neck=None): + """ + backbone (nn.Layer): backbone instance. + rpn_head (nn.Layer): generates proposals using backbone features. + bbox_head (nn.Layer): a head that performs per-region computation. + mask_head (nn.Layer): generates mask from bbox and backbone features. + """ super(MaskRCNN, self).__init__() - self.anchor = anchor - self.proposal = proposal - self.mask = mask self.backbone = backbone self.neck = neck self.rpn_head = rpn_head self.bbox_head = bbox_head self.mask_head = mask_head + self.bbox_post_process = bbox_post_process self.mask_post_process = mask_post_process - def model_arch(self): - # Backbone - body_feats = self.backbone(self.inputs) - spatial_scale = 1. / 16 + @classmethod + def from_config(cls, cfg, *args, **kwargs): + backbone = create(cfg['backbone']) + kwargs = {'input_shape': backbone.out_shape} + neck = cfg['neck'] and create(cfg['neck'], **kwargs) + + out_shape = neck and neck.out_shape or backbone.out_shape + kwargs = {'input_shape': out_shape} + rpn_head = create(cfg['rpn_head'], **kwargs) + bbox_head = create(cfg['bbox_head'], **kwargs) + + out_shape = neck and out_shape or bbox_head.get_head().out_shape + kwargs = {'input_shape': out_shape} + mask_head = create(cfg['mask_head'], **kwargs) + return { + 'backbone': backbone, + 'neck': neck, + "rpn_head": rpn_head, + "bbox_head": bbox_head, + "mask_head": mask_head, + } - # Neck + def _forward(self): + body_feats = self.backbone(self.inputs) if self.neck is not None: - body_feats, spatial_scale = self.neck(body_feats) - - # RPN - # rpn_head returns two list: rpn_feat, rpn_head_out - # each element in rpn_feats contains rpn feature on each level, - # and the length is 1 when the neck is not applied. - # each element in rpn_head_out contains (rpn_rois_score, rpn_rois_delta) - rpn_feat, self.rpn_head_out = self.rpn_head(self.inputs, body_feats) - - # Anchor - # anchor_out returns a list, - # each element contains (anchor, anchor_var) - self.anchor_out = self.anchor(rpn_feat) - - # Proposal RoI - # compute targets here when training - rois = self.proposal(self.inputs, self.rpn_head_out, self.anchor_out, - self.training) - # BBox Head - bbox_feat, self.bbox_head_out, bbox_head_feat_func = self.bbox_head( - body_feats, rois, spatial_scale) - - rois_has_mask_int32 = None - if not self.training: - bbox_pred, bboxes = self.bbox_head.get_prediction( - self.bbox_head_out, rois) - # Refine bbox by the output from bbox_head at test stage - self.bboxes = self.bbox_post_process(bbox_pred, bboxes, - self.inputs['im_shape'], - self.inputs['scale_factor']) + body_feats = self.neck(body_feats) + + if self.training: + rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs) + bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num, + self.inputs) + rois, rois_num = self.bbox_head.get_assigned_rois() + bbox_targets = self.bbox_head.get_assigned_targets() + # Mask Head needs bbox_feat in Mask RCNN + mask_loss = self.mask_head(body_feats, rois, rois_num, self.inputs, + bbox_targets, bbox_feat) + return rpn_loss, bbox_loss, mask_loss else: - # Proposal RoI for Mask branch - # bboxes update at training stage only - bbox_targets = self.proposal.get_targets()[0] - self.bboxes, rois_has_mask_int32 = self.mask(self.inputs, rois, - bbox_targets) - - # Mask Head - self.mask_head_out = self.mask_head( - self.inputs, body_feats, self.bboxes, bbox_feat, - rois_has_mask_int32, spatial_scale, bbox_head_feat_func) + rois, rois_num, _ = self.rpn_head(body_feats, self.inputs) + preds, feat_func = self.bbox_head(body_feats, rois, rois_num, None) - def get_loss(self, ): - loss = {} - - # RPN loss - rpn_loss_inputs = self.anchor.generate_loss_inputs( - self.inputs, self.rpn_head_out, self.anchor_out) - loss_rpn = self.rpn_head.get_loss(rpn_loss_inputs) - loss.update(loss_rpn) + im_shape = self.inputs['im_shape'] + scale_factor = self.inputs['scale_factor'] - # BBox loss - bbox_targets = self.proposal.get_targets() - loss_bbox = self.bbox_head.get_loss([self.bbox_head_out], bbox_targets) - loss.update(loss_bbox) + bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num), + im_shape, scale_factor) + mask_out = self.mask_head( + body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func) - # Mask loss - mask_targets = self.mask.get_targets() - loss_mask = self.mask_head.get_loss(self.mask_head_out, mask_targets) - loss.update(loss_mask) + # rescale the prediction back to origin image + bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num, + im_shape, scale_factor) + origin_shape = self.bbox_post_process.get_origin_shape() + mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred, + bbox_num, origin_shape) + return bbox_pred, bbox_num, mask_pred + def get_loss(self, ): + bbox_loss, mask_loss, rpn_loss = self._forward() + loss = {} + loss.update(rpn_loss) + loss.update(bbox_loss) + loss.update(mask_loss) total_loss = paddle.add_n(list(loss.values())) loss.update({'loss': total_loss}) return loss def get_pred(self): - bbox, bbox_num = self.bboxes + bbox_pred, bbox_num, mask_pred = self._forward() + label = bbox_pred[:, 0] + score = bbox_pred[:, 1] + bbox = bbox_pred[:, 2:] output = { + 'label': label, + 'score': score, 'bbox': bbox, 'bbox_num': bbox_num, - 'mask': self.mask_head_out + 'mask': mask_pred, } return output diff --git a/dygraph/ppdet/modeling/architectures/meta_arch.py b/dygraph/ppdet/modeling/architectures/meta_arch.py index a58f33026d17c9708e45c355d67c5d9e9e45cf32..a82292a213e3ee9be81ebfab5c4dbdc722f4a79e 100644 --- a/dygraph/ppdet/modeling/architectures/meta_arch.py +++ b/dygraph/ppdet/modeling/architectures/meta_arch.py @@ -31,8 +31,8 @@ class BaseArch(nn.Layer): inputs[k] = data[i] return inputs - def model_arch(self): - raise NotImplementedError("Should implement model_arch method!") + def model_arch(self, ): + pass def get_loss(self, ): raise NotImplementedError("Should implement get_loss method!") diff --git a/dygraph/ppdet/modeling/backbones/resnet.py b/dygraph/ppdet/modeling/backbones/resnet.py index ad466cde3745256fcbe56defe688167de32af665..9a9e2181bb5a4a42c0b68c11a660df506e375a37 100755 --- a/dygraph/ppdet/modeling/backbones/resnet.py +++ b/dygraph/ppdet/modeling/backbones/resnet.py @@ -13,15 +13,16 @@ # limitations under the License. import math +from numbers import Integral + import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddle import ParamAttr from ppdet.core.workspace import register, serializable from paddle.regularizer import L2Decay -from .name_adapter import NameAdapter -from numbers import Integral from ppdet.modeling.layers import DeformableConvV2 +from .name_adapter import NameAdapter +from ..shape_spec import ShapeSpec __all__ = ['ResNet', 'Res5Head'] @@ -62,7 +63,7 @@ class ConvNormLayer(nn.Layer): stride=stride, padding=(filter_size - 1) // 2, groups=groups, - weight_attr=ParamAttr( + weight_attr=paddle.ParamAttr( learning_rate=lr, name=name + "_weights"), bias_attr=False) else: @@ -73,19 +74,19 @@ class ConvNormLayer(nn.Layer): stride=stride, padding=(filter_size - 1) // 2, groups=groups, - weight_attr=ParamAttr( - learning_rate=lr, name=name + "_weights"), + weight_attr=paddle.ParamAttr( + learning_rate=lr, name=name + '_weights'), bias_attr=False, name=name) bn_name = name_adapter.fix_conv_norm_name(name) norm_lr = 0. if freeze_norm else lr - param_attr = ParamAttr( + param_attr = paddle.ParamAttr( learning_rate=norm_lr, regularizer=L2Decay(norm_decay), name=bn_name + "_scale", trainable=False if freeze_norm else True) - bias_attr = ParamAttr( + bias_attr = paddle.ParamAttr( learning_rate=norm_lr, regularizer=L2Decay(norm_decay), name=bn_name + "_offset", @@ -483,10 +484,12 @@ class ResNet(nn.Layer): lr=1.0, name=_name)) - self.pool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - ch_in_list = [64, 256, 512, 1024] ch_out_list = [64, 128, 256, 512] + self.expansion = 4 if depth >= 50 else 1 + + self._out_channels = [self.expansion * v for v in ch_out_list] + self._out_strides = [4, 8, 16, 32] self.res_layers = [] for i in range(num_stages): @@ -514,10 +517,18 @@ class ResNet(nn.Layer): dcn_v2=(i in self.dcn_v2_stages))) self.res_layers.append(res_layer) + @property + def out_shape(self): + return [ + ShapeSpec( + channels=self._out_channels[i], stride=self._out_strides[i]) + for i in self.return_idx + ] + def forward(self, inputs): x = inputs['image'] conv1 = self.conv1(x) - x = self.pool(conv1) + x = F.max_pool2d(conv1, kernel_size=3, stride=2, padding=1) outs = [] for idx, stage in enumerate(self.res_layers): x = stage(x) @@ -530,16 +541,24 @@ class ResNet(nn.Layer): @register class Res5Head(nn.Layer): - def __init__(self, depth=50, feat_in=1024, feat_out=512): + def __init__(self, depth=50): super(Res5Head, self).__init__() + feat_in, feat_out = [1024, 512] + if depth < 50: + feat_in = 256 na = NameAdapter(self) - self.res5_conv = [] self.res5 = self.add_sublayer( 'res5_roi_feat', Blocks( depth, feat_in, feat_out, count=3, name_adapter=na, stage_num=5)) - self.feat_out = feat_out * 4 + self.feat_out = feat_out if depth < 50 else feat_out * 4 + + @property + def out_shape(self): + return [ShapeSpec( + channels=self.feat_out, + stride=32, )] def forward(self, roi_feat, stage=0): y = self.res5(roi_feat) diff --git a/dygraph/ppdet/modeling/bbox.py b/dygraph/ppdet/modeling/bbox.py deleted file mode 100644 index 65892f2c9f309f6d4cc034ba9cd0d5cee952b278..0000000000000000000000000000000000000000 --- a/dygraph/ppdet/modeling/bbox.py +++ /dev/null @@ -1,192 +0,0 @@ -import numpy as np -import paddle -import paddle.nn as nn -import paddle.nn.functional as F -from ppdet.core.workspace import register -from . import ops - - -@register -class Anchor(object): - __inject__ = ['anchor_generator', 'anchor_target_generator'] - - def __init__(self, anchor_generator, anchor_target_generator): - super(Anchor, self).__init__() - self.anchor_generator = anchor_generator - self.anchor_target_generator = anchor_target_generator - - def __call__(self, rpn_feats): - anchors = [] - num_level = len(rpn_feats) - for i, rpn_feat in enumerate(rpn_feats): - anchor, var = self.anchor_generator(rpn_feat, i) - anchors.append((anchor, var)) - return anchors - - def _get_target_input(self, rpn_feats, anchors): - rpn_score_list = [] - rpn_delta_list = [] - anchor_list = [] - for (rpn_score, rpn_delta), (anchor, var) in zip(rpn_feats, anchors): - rpn_score = paddle.transpose(rpn_score, perm=[0, 2, 3, 1]) - rpn_delta = paddle.transpose(rpn_delta, perm=[0, 2, 3, 1]) - rpn_score = paddle.reshape(x=rpn_score, shape=(0, -1, 1)) - rpn_delta = paddle.reshape(x=rpn_delta, shape=(0, -1, 4)) - - anchor = paddle.reshape(anchor, shape=(-1, 4)) - var = paddle.reshape(var, shape=(-1, 4)) - rpn_score_list.append(rpn_score) - rpn_delta_list.append(rpn_delta) - anchor_list.append(anchor) - - rpn_scores = paddle.concat(rpn_score_list, axis=1) - rpn_deltas = paddle.concat(rpn_delta_list, axis=1) - anchors = paddle.concat(anchor_list) - return rpn_scores, rpn_deltas, anchors - - def generate_loss_inputs(self, inputs, rpn_head_out, anchors): - if len(rpn_head_out) != len(anchors): - raise ValueError( - "rpn_head_out and anchors should have same length, " - " but received rpn_head_out' length is {} and anchors' " - " length is {}".format(len(rpn_head_out), len(anchors))) - rpn_score, rpn_delta, anchors = self._get_target_input(rpn_head_out, - anchors) - - score_pred, roi_pred, score_tgt, roi_tgt, roi_weight = self.anchor_target_generator( - bbox_pred=rpn_delta, - cls_logits=rpn_score, - anchor_box=anchors, - gt_boxes=inputs['gt_bbox'], - is_crowd=inputs['is_crowd'], - im_info=inputs['im_info']) - outs = { - 'rpn_score_pred': score_pred, - 'rpn_score_target': score_tgt, - 'rpn_rois_pred': roi_pred, - 'rpn_rois_target': roi_tgt, - 'rpn_rois_weight': roi_weight - } - return outs - - -@register -class Proposal(object): - __inject__ = ['proposal_generator', 'proposal_target_generator'] - - def __init__(self, proposal_generator, proposal_target_generator): - super(Proposal, self).__init__() - self.proposal_generator = proposal_generator - self.proposal_target_generator = proposal_target_generator - - def generate_proposal(self, inputs, rpn_head_out, anchor_out, is_train): - # TODO: delete im_info - try: - im_shape = inputs['im_info'] - except: - im_shape = inputs['im_shape'] - rpn_rois_list = [] - rpn_prob_list = [] - rpn_rois_num_list = [] - for (rpn_score, rpn_delta), (anchor, var) in zip(rpn_head_out, - anchor_out): - rpn_prob = F.sigmoid(rpn_score) - rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n = self.proposal_generator( - scores=rpn_prob, - bbox_deltas=rpn_delta, - anchors=anchor, - variances=var, - im_shape=im_shape, - is_train=is_train) - if len(rpn_head_out) == 1: - return rpn_rois, rpn_rois_num - rpn_rois_list.append(rpn_rois) - rpn_prob_list.append(rpn_rois_prob) - rpn_rois_num_list.append(rpn_rois_num) - - start_level = 2 - end_level = start_level + len(rpn_head_out) - rois_collect, rois_num_collect = ops.collect_fpn_proposals( - rpn_rois_list, - rpn_prob_list, - start_level, - end_level, - post_nms_top_n, - rois_num_per_level=rpn_rois_num_list) - return rois_collect, rois_num_collect - - def generate_proposal_target(self, - inputs, - rois, - rois_num, - stage=0, - max_overlap=None): - outs = self.proposal_target_generator( - rpn_rois=rois, - rpn_rois_num=rois_num, - gt_classes=inputs['gt_class'], - is_crowd=inputs['is_crowd'], - gt_boxes=inputs['gt_bbox'], - im_info=inputs['im_info'], - stage=stage, - max_overlap=max_overlap) - rois = outs[0] - max_overlap = outs[-1] - rois_num = outs[-2] - targets = { - 'labels_int32': outs[1], - 'bbox_targets': outs[2], - 'bbox_inside_weights': outs[3], - 'bbox_outside_weights': outs[4] - } - return rois, rois_num, targets, max_overlap - - def refine_bbox(self, roi, bbox_delta, stage=1): - out_dim = bbox_delta.shape[1] // 4 - bbox_delta_r = paddle.reshape(bbox_delta, (-1, out_dim, 4)) - bbox_delta_s = paddle.slice( - bbox_delta_r, axes=[1], starts=[1], ends=[2]) - - reg_weights = [ - i / stage for i in self.proposal_target_generator.bbox_reg_weights - ] - refined_bbox = ops.box_coder( - prior_box=roi, - prior_box_var=reg_weights, - target_box=bbox_delta_s, - code_type='decode_center_size', - box_normalized=False, - axis=1) - refined_bbox = paddle.reshape(refined_bbox, shape=[-1, 4]) - return refined_bbox - - def __call__(self, - inputs, - rpn_head_out, - anchor_out, - is_train=False, - stage=0, - proposal_out=None, - bbox_head_out=None, - max_overlap=None): - if stage == 0: - roi, rois_num = self.generate_proposal(inputs, rpn_head_out, - anchor_out, is_train) - self.targets_list = [] - self.max_overlap = None - - else: - bbox_delta = bbox_head_out[1] - roi = self.refine_bbox(proposal_out[0], bbox_delta, stage) - rois_num = proposal_out[1] - if is_train: - roi, rois_num, targets, self.max_overlap = self.generate_proposal_target( - inputs, roi, rois_num, stage, self.max_overlap) - self.targets_list.append(targets) - return roi, rois_num - - def get_targets(self): - return self.targets_list - - def get_max_overlap(self): - return self.max_overlap diff --git a/dygraph/ppdet/modeling/bbox_utils.py b/dygraph/ppdet/modeling/bbox_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..51f963a60394f2cb2169c2048b55f923095a0615 --- /dev/null +++ b/dygraph/ppdet/modeling/bbox_utils.py @@ -0,0 +1,129 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import paddle + + +def bbox2delta(src_boxes, tgt_boxes, weights): + src_w = src_boxes[:, 2] - src_boxes[:, 0] + src_h = src_boxes[:, 3] - src_boxes[:, 1] + src_ctr_x = src_boxes[:, 0] + 0.5 * src_w + src_ctr_y = src_boxes[:, 1] + 0.5 * src_h + + tgt_w = tgt_boxes[:, 2] - tgt_boxes[:, 0] + tgt_h = tgt_boxes[:, 3] - tgt_boxes[:, 1] + tgt_ctr_x = tgt_boxes[:, 0] + 0.5 * tgt_w + tgt_ctr_y = tgt_boxes[:, 1] + 0.5 * tgt_h + + wx, wy, ww, wh = weights + dx = wx * (tgt_ctr_x - src_ctr_x) / src_w + dy = wy * (tgt_ctr_y - src_ctr_y) / src_h + dw = ww * paddle.log(tgt_w / src_w) + dh = wh * paddle.log(tgt_h / src_h) + + deltas = paddle.stack((dx, dy, dw, dh), axis=1) + return deltas + + +def delta2bbox(deltas, boxes, weights): + clip_scale = math.log(1000.0 / 16) + if boxes.shape[0] == 0: + return paddle.zeros((0, deltas.shape[1]), dtype='float32') + + widths = boxes[:, 2] - boxes[:, 0] + heights = boxes[:, 3] - boxes[:, 1] + ctr_x = boxes[:, 0] + 0.5 * widths + ctr_y = boxes[:, 1] + 0.5 * heights + + wx, wy, ww, wh = weights + dx = deltas[:, 0::4] / wx + dy = deltas[:, 1::4] / wy + dw = deltas[:, 2::4] / ww + dh = deltas[:, 3::4] / wh + # Prevent sending too large values into np.exp() + dw = paddle.clip(dw, max=clip_scale) + dh = paddle.clip(dh, max=clip_scale) + + pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1) + pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1) + pred_w = paddle.exp(dw) * widths.unsqueeze(1) + pred_h = paddle.exp(dh) * heights.unsqueeze(1) + + pred_boxes = paddle.zeros_like(deltas) + + pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w + pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h + pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w + pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h + return pred_boxes + + +def expand_bbox(bboxes, scale): + w_half = (bboxes[:, 2] - bboxes[:, 0]) * .5 + h_half = (bboxes[:, 3] - bboxes[:, 1]) * .5 + x_c = (bboxes[:, 2] + bboxes[:, 0]) * .5 + y_c = (bboxes[:, 3] + bboxes[:, 1]) * .5 + + w_half *= scale + h_half *= scale + + bboxes_exp = np.zeros(bboxes.shape, dtype=np.float32) + bboxes_exp[:, 0] = x_c - w_half + bboxes_exp[:, 2] = x_c + w_half + bboxes_exp[:, 1] = y_c - h_half + bboxes_exp[:, 3] = y_c + h_half + + return bboxes_exp + + +def clip_bbox(boxes, im_shape): + h, w = im_shape + x1 = boxes[:, 0].clip(0, w) + y1 = boxes[:, 1].clip(0, h) + x2 = boxes[:, 2].clip(0, w) + y2 = boxes[:, 3].clip(0, h) + return paddle.stack([x1, y1, x2, y2], axis=1) + + +def nonempty_bbox(boxes, min_size=0, return_mask=False): + w = boxes[:, 2] - boxes[:, 0] + h = boxes[:, 3] - boxes[:, 1] + mask = paddle.logical_and(w > min_size, w > min_size) + if return_mask: + return mask + keep = paddle.nonzero(mask).flatten() + return keep + + +def bbox_area(boxes): + return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + + +def bbox_overlaps(boxes1, boxes2): + area1 = bbox_area(boxes1) + area2 = bbox_area(boxes2) + + xy_max = paddle.minimum( + paddle.unsqueeze(boxes1, 1)[:, :, 2:], boxes2[:, 2:]) + xy_min = paddle.maximum( + paddle.unsqueeze(boxes1, 1)[:, :, :2], boxes2[:, :2]) + width_height = xy_max - xy_min + width_height = width_height.clip(min=0) + inter = width_height.prod(axis=2) + + overlaps = paddle.where(inter > 0, inter / + (paddle.unsqueeze(area1, 1) + area2 - inter), + paddle.zeros_like(inter)) + return overlaps diff --git a/dygraph/ppdet/modeling/heads/__init__.py b/dygraph/ppdet/modeling/heads/__init__.py index 3bd98f554bdf0b82af870aa0d73c7bc5725cba88..14b587700d8adaf4b08fced34d48263bd36ec5b0 100644 --- a/dygraph/ppdet/modeling/heads/__init__.py +++ b/dygraph/ppdet/modeling/heads/__init__.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import rpn_head from . import bbox_head from . import mask_head from . import yolo_head @@ -22,7 +21,6 @@ from . import fcos_head from . import solov2_head from . import ttf_head -from .rpn_head import * from .bbox_head import * from .mask_head import * from .yolo_head import * diff --git a/dygraph/ppdet/modeling/heads/bbox_head.py b/dygraph/ppdet/modeling/heads/bbox_head.py index d3cdb73bd90a237cfc8951e83babe2300c5c7c87..a2af5f30344c58b87ee359b65075b962aefe2e53 100644 --- a/dygraph/ppdet/modeling/heads/bbox_head.py +++ b/dygraph/ppdet/modeling/heads/bbox_head.py @@ -13,234 +13,216 @@ # limitations under the License. import paddle -from paddle import ParamAttr import paddle.nn as nn import paddle.nn.functional as F -from paddle.nn import ReLU from paddle.nn.initializer import Normal, XavierUniform from paddle.regularizer import L2Decay -from ppdet.core.workspace import register + +from ppdet.core.workspace import register, create from ppdet.modeling import ops +from .roi_extractor import RoIAlign +from ..shape_spec import ShapeSpec +from ..bbox_utils import bbox2delta + @register class TwoFCHead(nn.Layer): - - __shared__ = ['roi_stages'] - - def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, roi_stages=1): + def __init__(self, in_dim=256, mlp_dim=1024, resolution=7): super(TwoFCHead, self).__init__() self.in_dim = in_dim self.mlp_dim = mlp_dim - self.roi_stages = roi_stages fan = in_dim * resolution * resolution - self.fc6_list = [] - self.fc6_relu_list = [] - self.fc7_list = [] - self.fc7_relu_list = [] - for stage in range(roi_stages): - fc6_name = 'fc6_{}'.format(stage) - fc7_name = 'fc7_{}'.format(stage) - lr_factor = 2**stage - fc6 = self.add_sublayer( - fc6_name, - nn.Linear( - in_dim * resolution * resolution, - mlp_dim, - weight_attr=ParamAttr( - learning_rate=lr_factor, - initializer=XavierUniform(fan_out=fan)), - bias_attr=ParamAttr( - learning_rate=2. * lr_factor, regularizer=L2Decay(0.)))) - fc6_relu = self.add_sublayer(fc6_name + 'act', ReLU()) - fc7 = self.add_sublayer( - fc7_name, - nn.Linear( - mlp_dim, - mlp_dim, - weight_attr=ParamAttr( - learning_rate=lr_factor, initializer=XavierUniform()), - bias_attr=ParamAttr( - learning_rate=2. * lr_factor, regularizer=L2Decay(0.)))) - fc7_relu = self.add_sublayer(fc7_name + 'act', ReLU()) - self.fc6_list.append(fc6) - self.fc6_relu_list.append(fc6_relu) - self.fc7_list.append(fc7) - self.fc7_relu_list.append(fc7_relu) - - def forward(self, rois_feat, stage=0): + lr_factor = 1. + self.fc6 = nn.Linear( + in_dim * resolution * resolution, + mlp_dim, + weight_attr=paddle.ParamAttr( + learning_rate=lr_factor, + initializer=XavierUniform(fan_out=fan))) + + self.fc7 = nn.Linear( + mlp_dim, + mlp_dim, + weight_attr=paddle.ParamAttr( + learning_rate=lr_factor, initializer=XavierUniform())) + + @classmethod + def from_config(cls, cfg, input_shape): + s = input_shape + s = s[0] if isinstance(s, (list, tuple)) else s + return {'in_dim': s.channels} + + @property + def out_shape(self): + return [ShapeSpec(channels=self.mlp_dim, )] + + def forward(self, rois_feat): rois_feat = paddle.flatten(rois_feat, start_axis=1, stop_axis=-1) - fc6 = self.fc6_list[stage](rois_feat) - fc6_relu = self.fc6_relu_list[stage](fc6) - fc7 = self.fc7_list[stage](fc6_relu) - fc7_relu = self.fc7_relu_list[stage](fc7) - return fc7_relu - - -@register -class BBoxFeat(nn.Layer): - __inject__ = ['roi_extractor', 'head_feat'] - - def __init__(self, roi_extractor, head_feat): - super(BBoxFeat, self).__init__() - self.roi_extractor = roi_extractor - self.head_feat = head_feat - self.rois_feat_list = [] - - def forward(self, body_feats, rois, spatial_scale, stage=0): - rois_feat = self.roi_extractor(body_feats, rois, spatial_scale) - bbox_feat = self.head_feat(rois_feat, stage) - return rois_feat, bbox_feat + fc6 = self.fc6(rois_feat) + fc6 = F.relu(fc6) + fc7 = self.fc7(fc6) + fc7 = F.relu(fc7) + return fc7 @register class BBoxHead(nn.Layer): - __shared__ = ['num_classes', 'roi_stages'] - __inject__ = ['bbox_feat'] + __shared__ = ['num_classes'] + __inject__ = ['bbox_assigner'] + """ + head (nn.Layer): Extract feature in bbox head + in_channel (int): Input channel after RoI extractor + """ def __init__(self, - bbox_feat, - in_feat=1024, - num_classes=81, - cls_agnostic=False, - roi_stages=1, + head, + in_channel, + roi_extractor=RoIAlign().__dict__, + bbox_assigner='BboxAssigner', with_pool=False, - score_stage=[0, 1, 2], - delta_stage=[2]): + num_classes=80, + bbox_weight=[10., 10., 5., 5.]): super(BBoxHead, self).__init__() - self.num_classes = num_classes - self.cls_agnostic = cls_agnostic - self.delta_dim = 2 if cls_agnostic else num_classes - self.bbox_feat = bbox_feat - self.roi_stages = roi_stages - self.bbox_score_list = [] - self.bbox_delta_list = [] - self.roi_feat_list = [[] for i in range(roi_stages)] + self.head = head + self.roi_extractor = roi_extractor + if isinstance(roi_extractor, dict): + self.roi_extractor = RoIAlign(**roi_extractor) + self.bbox_assigner = bbox_assigner + self.with_pool = with_pool - self.score_stage = score_stage - self.delta_stage = delta_stage - for stage in range(roi_stages): - score_name = 'bbox_score_{}'.format(stage) - delta_name = 'bbox_delta_{}'.format(stage) - lr_factor = 2**stage - bbox_score = self.add_sublayer( - score_name, - nn.Linear( - in_feat, - 1 * self.num_classes, - weight_attr=ParamAttr( - learning_rate=lr_factor, - initializer=Normal( - mean=0.0, std=0.01)), - bias_attr=ParamAttr( - learning_rate=2. * lr_factor, regularizer=L2Decay(0.)))) - - bbox_delta = self.add_sublayer( - delta_name, - nn.Linear( - in_feat, - 4 * self.delta_dim, - weight_attr=ParamAttr( - learning_rate=lr_factor, - initializer=Normal( - mean=0.0, std=0.001)), - bias_attr=ParamAttr( - learning_rate=2. * lr_factor, regularizer=L2Decay(0.)))) - self.bbox_score_list.append(bbox_score) - self.bbox_delta_list.append(bbox_delta) - - def forward(self, - body_feats=None, - rois=None, - spatial_scale=None, - stage=0, - roi_stage=-1): - if rois is not None: - rois_feat, bbox_feat = self.bbox_feat(body_feats, rois, - spatial_scale, stage) - self.roi_feat_list[stage] = rois_feat + self.num_classes = num_classes + self.bbox_weight = bbox_weight + + lr_factor = 1. + self.bbox_score = nn.Linear( + in_channel, + self.num_classes + 1, + weight_attr=paddle.ParamAttr( + learning_rate=lr_factor, initializer=Normal( + mean=0.0, std=0.01))) + + self.bbox_delta = nn.Linear( + in_channel, + 4 * self.num_classes, + weight_attr=paddle.ParamAttr( + learning_rate=lr_factor, + initializer=Normal( + mean=0.0, std=0.001))) + self.assigned_label = None + self.assigned_rois = None + + @classmethod + def from_config(cls, cfg, input_shape): + roi_pooler = cfg['roi_extractor'] + assert isinstance(roi_pooler, dict) + kwargs = RoIAlign.from_config(cfg, input_shape) + roi_pooler.update(kwargs) + kwargs = {'input_shape': input_shape} + head = create(cfg['head'], **kwargs) + return { + 'roi_extractor': roi_pooler, + 'head': head, + 'in_channel': head.out_shape[0].channels + } + + def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None): + """ + body_feats (list[Tensor]): + rois (Tensor): + rois_num (Tensor): + inputs (dict{Tensor}): + """ + if self.training: + rois, rois_num, _, targets = self.bbox_assigner(rois, rois_num, + inputs) + self.assigned_rois = (rois, rois_num) + self.assigned_targets = targets + + rois_feat = self.roi_extractor(body_feats, rois, rois_num) + bbox_feat = self.head(rois_feat) + #if self.with_pool: + if len(bbox_feat.shape) > 2 and bbox_feat.shape[-1] > 1: + feat = F.adaptive_avg_pool2d(bbox_feat, output_size=1) + feat = paddle.squeeze(feat, axis=[2, 3]) else: - rois_feat = self.roi_feat_list[roi_stage] - bbox_feat = self.bbox_feat.head_feat(rois_feat, stage) - if self.with_pool: - bbox_feat_ = F.adaptive_avg_pool2d(bbox_feat, output_size=1) - bbox_feat_ = paddle.squeeze(bbox_feat_, axis=[2, 3]) - scores = self.bbox_score_list[stage](bbox_feat_) - deltas = self.bbox_delta_list[stage](bbox_feat_) + feat = bbox_feat + scores = self.bbox_score(feat) + deltas = self.bbox_delta(feat) + + if self.training: + loss = self.get_loss(scores, deltas, targets, rois) + return loss, bbox_feat else: - scores = self.bbox_score_list[stage](bbox_feat) - deltas = self.bbox_delta_list[stage](bbox_feat) - bbox_head_out = (scores, deltas) - return bbox_feat, bbox_head_out, self.bbox_feat.head_feat - - def _get_head_loss(self, score, delta, target): - # bbox cls - labels_int64 = paddle.cast(x=target['labels_int32'], dtype='int64') - labels_int64.stop_gradient = True - loss_bbox_cls = F.softmax_with_cross_entropy( - logits=score, label=labels_int64) - loss_bbox_cls = paddle.mean(loss_bbox_cls) + pred = self.get_prediction(scores, deltas) + return pred, self.head + + def get_loss(self, scores, deltas, targets, rois): + """ + scores (Tensor): scores from bbox head outputs + deltas (Tensor): deltas from bbox head outputs + targets (list[List[Tensor]]): bbox targets containing tgt_labels, tgt_bboxes and tgt_gt_inds + rois (List[Tensor]): RoIs generated in each batch + """ + # TODO: better pass args + tgt_labels, tgt_bboxes, tgt_gt_inds = targets + tgt_labels = paddle.concat(tgt_labels) if len( + tgt_labels) > 1 else tgt_labels[0] + tgt_labels = tgt_labels.cast('int64') + tgt_labels.stop_gradient = True + loss_bbox_cls = F.cross_entropy( + input=scores, label=tgt_labels, reduction='mean') # bbox reg - loss_bbox_reg = ops.smooth_l1( - input=delta, - label=target['bbox_targets'], - inside_weight=target['bbox_inside_weights'], - outside_weight=target['bbox_outside_weights'], - sigma=1.0) - loss_bbox_reg = paddle.mean(loss_bbox_reg) - return loss_bbox_cls, loss_bbox_reg - - def get_loss(self, bbox_head_out, targets): - loss_bbox = {} + + cls_agnostic_bbox_reg = deltas.shape[1] == 4 + + fg_inds = paddle.nonzero( + paddle.logical_and(tgt_labels >= 0, tgt_labels < + self.num_classes)).flatten() + + if cls_agnostic_bbox_reg: + reg_delta = paddle.gather(deltas, fg_inds) + else: + fg_gt_classes = paddle.gather(tgt_labels, fg_inds) + + reg_row_inds = paddle.arange(fg_gt_classes.shape[0]).unsqueeze(1) + reg_row_inds = paddle.tile(reg_row_inds, [1, 4]).reshape([-1, 1]) + + reg_col_inds = 4 * fg_gt_classes.unsqueeze(1) + paddle.arange(4) + + reg_col_inds = reg_col_inds.reshape([-1, 1]) + reg_inds = paddle.concat([reg_row_inds, reg_col_inds], axis=1) + + reg_delta = paddle.gather(deltas, fg_inds) + reg_delta = paddle.gather_nd(reg_delta, reg_inds).reshape([-1, 4]) + rois = paddle.concat(rois) if len(rois) > 1 else rois[0] + tgt_bboxes = paddle.concat(tgt_bboxes) if len( + tgt_bboxes) > 1 else tgt_bboxes[0] + + reg_target = bbox2delta(rois, tgt_bboxes, self.bbox_weight) + reg_target = paddle.gather(reg_target, fg_inds) + reg_target.stop_gradient = True + + loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum( + ) / tgt_labels.shape[0] + cls_name = 'loss_bbox_cls' reg_name = 'loss_bbox_reg' - for lvl, (bboxhead, target) in enumerate(zip(bbox_head_out, targets)): - score, delta = bboxhead - if len(targets) > 1: - cls_name = 'loss_bbox_cls_{}'.format(lvl) - reg_name = 'loss_bbox_reg_{}'.format(lvl) - loss_bbox_cls, loss_bbox_reg = self._get_head_loss(score, delta, - target) - loss_weight = 1. / 2**lvl - loss_bbox[cls_name] = loss_bbox_cls * loss_weight - loss_bbox[reg_name] = loss_bbox_reg * loss_weight + loss_bbox = {} + loss_bbox[cls_name] = loss_bbox_cls + loss_bbox[reg_name] = loss_bbox_reg + return loss_bbox - def get_prediction(self, bbox_head_out, rois): - proposal, proposal_num = rois - score, delta = bbox_head_out + def get_prediction(self, score, delta): bbox_prob = F.softmax(score) - delta = paddle.reshape(delta, (-1, self.delta_dim, 4)) - bbox_pred = (delta, bbox_prob) - return bbox_pred, rois - - def get_cascade_prediction(self, bbox_head_out, rois): - proposal_list = [] - prob_list = [] - delta_list = [] - for stage in range(len(rois)): - proposals = rois[stage] - bboxhead = bbox_head_out[stage] - score, delta = bboxhead - proposal, proposal_num = proposals - if stage in self.score_stage: - if stage < 2: - _, head_out, _ = self(stage=stage, roi_stage=-1) - score = head_out[0] - - bbox_prob = F.softmax(score) - prob_list.append(bbox_prob) - if stage in self.delta_stage: - proposal_list.append(proposal) - delta_list.append(delta) - bbox_prob = paddle.mean(paddle.stack(prob_list), axis=0) - delta = paddle.mean(paddle.stack(delta_list), axis=0) - proposal = paddle.mean(paddle.stack(proposal_list), axis=0) - delta = paddle.reshape(delta, (-1, self.delta_dim, 4)) - if self.cls_agnostic: - N, C, M = delta.shape - delta = delta[:, 1:2, :] - delta = paddle.expand(delta, [N, self.num_classes, M]) - bboxes = (proposal, proposal_num) - bbox_pred = (delta, bbox_prob) - return bbox_pred, bboxes + return delta, bbox_prob + + def get_head(self, ): + return self.head + + def get_assigned_targets(self, ): + return self.assigned_targets + + def get_assigned_rois(self, ): + return self.assigned_rois diff --git a/dygraph/ppdet/modeling/heads/mask_head.py b/dygraph/ppdet/modeling/heads/mask_head.py index e482ebbe3f65dad4072cd673663786ef798600fd..3ccf23ba846c90aeeb0515cf6f2a96592879626e 100644 --- a/dygraph/ppdet/modeling/heads/mask_head.py +++ b/dygraph/ppdet/modeling/heads/mask_head.py @@ -13,195 +13,196 @@ # limitations under the License. import paddle +import paddle.nn as nn import paddle.nn.functional as F -from paddle import ParamAttr -from paddle.nn import Layer, Sequential -from paddle.nn import Conv2D, Conv2DTranspose, ReLU from paddle.nn.initializer import KaimingNormal from paddle.regularizer import L2Decay -from ppdet.core.workspace import register + +from ppdet.core.workspace import register, create from ppdet.modeling import ops +from .roi_extractor import RoIAlign -@register -class MaskFeat(Layer): - __inject__ = ['mask_roi_extractor'] - def __init__(self, - mask_roi_extractor=None, - num_convs=0, - feat_in=2048, - feat_out=256, - mask_num_stages=1, - share_bbox_feat=False): +@register +class MaskFeat(nn.Layer): + def __init__(self, num_convs=0, in_channels=2048, out_channels=256): super(MaskFeat, self).__init__() self.num_convs = num_convs - self.feat_in = feat_in - self.feat_out = feat_out - self.mask_roi_extractor = mask_roi_extractor - self.mask_num_stages = mask_num_stages - self.share_bbox_feat = share_bbox_feat - self.upsample_module = [] - fan_conv = feat_out * 3 * 3 - fan_deconv = feat_out * 2 * 2 - for i in range(self.mask_num_stages): - name = 'stage_{}'.format(i) - mask_conv = Sequential() - for j in range(self.num_convs): - conv_name = 'mask_inter_feat_{}'.format(j + 1) - mask_conv.add_sublayer( - conv_name, - Conv2D( - in_channels=feat_in if j == 0 else feat_out, - out_channels=feat_out, - kernel_size=3, - padding=1, - weight_attr=ParamAttr( - initializer=KaimingNormal(fan_in=fan_conv)), - bias_attr=ParamAttr( - learning_rate=2., regularizer=L2Decay(0.)))) - mask_conv.add_sublayer(conv_name + 'act', ReLU()) + self.in_channels = in_channels + self.out_channels = out_channels + fan_conv = out_channels * 3 * 3 + fan_deconv = out_channels * 2 * 2 + + mask_conv = nn.Sequential() + for i in range(self.num_convs): + conv_name = 'mask_inter_feat_{}'.format(i + 1) mask_conv.add_sublayer( - 'conv5_mask', - Conv2DTranspose( - in_channels=self.feat_in, - out_channels=self.feat_out, - kernel_size=2, - stride=2, - weight_attr=ParamAttr( - initializer=KaimingNormal(fan_in=fan_deconv)), - bias_attr=ParamAttr( - learning_rate=2., regularizer=L2Decay(0.)))) - mask_conv.add_sublayer('conv5_mask' + 'act', ReLU()) - upsample = self.add_sublayer(name, mask_conv) - self.upsample_module.append(upsample) + conv_name, + nn.Conv2D( + in_channels=in_channels if i == 0 else out_channels, + out_channels=out_channels, + kernel_size=3, + padding=1, + weight_attr=paddle.ParamAttr( + initializer=KaimingNormal(fan_in=fan_conv)))) + mask_conv.add_sublayer(conv_name + 'act', nn.ReLU()) + mask_conv.add_sublayer( + 'conv5_mask', + nn.Conv2DTranspose( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=2, + stride=2, + weight_attr=paddle.ParamAttr( + initializer=KaimingNormal(fan_in=fan_deconv)))) + mask_conv.add_sublayer('conv5_mask' + 'act', nn.ReLU()) + self.upsample = mask_conv - def forward(self, - body_feats, - bboxes, - bbox_feat, - mask_index, - spatial_scale, - stage=0, - bbox_head_feat_func=None): - if self.share_bbox_feat and mask_index is not None: - rois_feat = paddle.gather(bbox_feat, mask_index) - else: - rois_feat = self.mask_roi_extractor(body_feats, bboxes, - spatial_scale) - if self.share_bbox_feat and bbox_head_feat_func is not None and not self.training: - rois_feat = bbox_head_feat_func(rois_feat) + @classmethod + def from_config(cls, cfg, input_shape): + if isinstance(input_shape, (list, tuple)): + input_shape = input_shape[0] + return {'in_channels': input_shape.channels, } - # upsample - mask_feat = self.upsample_module[stage](rois_feat) - return mask_feat + def out_channel(self): + return self.out_channels + + def forward(self, feats): + return self.upsample(feats) @register -class MaskHead(Layer): - __shared__ = ['num_classes', 'mask_num_stages'] - __inject__ = ['mask_feat'] +class MaskHead(nn.Layer): + __shared__ = ['num_classes'] + __inject__ = ['mask_assigner'] def __init__(self, - mask_feat, - feat_in=256, - num_classes=81, - mask_num_stages=1): + head, + roi_extractor=RoIAlign().__dict__, + mask_assigner='MaskAssigner', + num_classes=80, + share_bbox_feat=False): super(MaskHead, self).__init__() - self.mask_feat = mask_feat - self.feat_in = feat_in self.num_classes = num_classes - self.mask_num_stages = mask_num_stages - self.mask_fcn_logits = [] - for i in range(self.mask_num_stages): - name = 'mask_fcn_logits_{}'.format(i) - self.mask_fcn_logits.append( - self.add_sublayer( - name, - Conv2D( - in_channels=self.feat_in, - out_channels=self.num_classes, - kernel_size=1, - weight_attr=ParamAttr(initializer=KaimingNormal( - fan_in=self.num_classes)), - bias_attr=ParamAttr( - learning_rate=2., regularizer=L2Decay(0.0))))) - - def forward_train(self, - body_feats, - bboxes, - bbox_feat, - mask_index, - spatial_scale, - stage=0): - # feat - mask_feat = self.mask_feat(body_feats, bboxes, bbox_feat, mask_index, - spatial_scale, stage) - # logits - mask_head_out = self.mask_fcn_logits[stage](mask_feat) - return mask_head_out + + self.roi_extractor = roi_extractor + if isinstance(roi_extractor, dict): + self.roi_extractor = RoIAlign(**roi_extractor) + self.head = head + self.in_channels = head.out_channel() + self.mask_assigner = mask_assigner + self.share_bbox_feat = share_bbox_feat + self.bbox_head = None + + self.mask_fcn_logits = nn.Conv2D( + in_channels=self.in_channels, + out_channels=self.num_classes, + kernel_size=1, + weight_attr=paddle.ParamAttr(initializer=KaimingNormal( + fan_in=self.num_classes))) + + @classmethod + def from_config(cls, cfg, input_shape): + roi_pooler = cfg['roi_extractor'] + assert isinstance(roi_pooler, dict) + kwargs = RoIAlign.from_config(cfg, input_shape) + roi_pooler.update(kwargs) + kwargs = {'input_shape': input_shape} + head = create(cfg['head'], **kwargs) + return { + 'roi_extractor': roi_pooler, + 'head': head, + } + + def get_loss(self, mask_logits, mask_label, mask_target, mask_weight): + mask_label = F.one_hot(mask_label, self.num_classes).unsqueeze([2, 3]) + mask_label = paddle.expand_as(mask_label, mask_logits) + mask_label.stop_gradient = True + mask_pred = paddle.gather_nd(mask_logits, paddle.nonzero(mask_label)) + shape = mask_logits.shape + mask_pred = paddle.reshape(mask_pred, [shape[0], shape[2], shape[3]]) + + mask_target = mask_target.cast('float32') + mask_weight = mask_weight.unsqueeze([1, 2]) + loss_mask = F.binary_cross_entropy_with_logits( + mask_pred, mask_target, weight=mask_weight, reduction="mean") + return loss_mask + + def forward_train(self, body_feats, rois, rois_num, inputs, targets, + bbox_feat): + """ + body_feats (list[Tensor]): Multi-level backbone features + rois (list[Tensor]): Proposals for each batch with shape [N, 4] + rois_num (Tensor): The number of proposals for each batch + inputs (dict): ground truth info + """ + #assert self.bbox_head + tgt_labels, _, tgt_gt_inds = targets + rois, rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights = self.mask_assigner( + rois, tgt_labels, tgt_gt_inds, inputs) + + if self.share_bbox_feat: + rois_feat = paddle.gather(bbox_feat, mask_index) + else: + rois_feat = self.roi_extractor(body_feats, rois, rois_num) + mask_feat = self.head(rois_feat) + mask_logits = self.mask_fcn_logits(mask_feat) + + loss_mask = self.get_loss(mask_logits, tgt_classes, tgt_masks, + tgt_weights) + return {'loss_mask': loss_mask} def forward_test(self, - scale_factor, body_feats, - bboxes, - bbox_feat, - mask_index, - spatial_scale, - stage=0, - bbox_head_feat_func=None): - bbox, bbox_num = bboxes - - if bbox.shape[0] == 0: - mask_head_out = paddle.full([1, 6], -1) + rois, + rois_num, + scale_factor, + feat_func=None): + """ + body_feats (list[Tensor]): Multi-level backbone features + rois (Tensor): Prediction from bbox head with shape [N, 6] + rois_num (Tensor): The number of prediction for each batch + scale_factor (Tensor): The scale factor from origin size to input size + """ + if rois.shape[0] == 0: + mask_out = paddle.full([1, 1, 1, 1], -1) else: - scale_factor_list = [] - for idx in range(bbox_num.shape[0]): - num = bbox_num[idx] - scale = scale_factor[idx, 0] - ones = paddle.ones(num) - scale_expand = ones * scale - scale_factor_list.append(scale_expand) - scale_factor_list = paddle.cast( - paddle.concat(scale_factor_list), 'float32') - scale_factor_list = paddle.reshape(scale_factor_list, shape=[-1, 1]) - scaled_bbox = paddle.multiply(bbox[:, 2:], scale_factor_list) - scaled_bboxes = (scaled_bbox, bbox_num) - mask_feat = self.mask_feat(body_feats, scaled_bboxes, bbox_feat, - mask_index, spatial_scale, stage, - bbox_head_feat_func) - mask_logit = self.mask_fcn_logits[stage](mask_feat) - mask_head_out = F.sigmoid(mask_logit) - return mask_head_out + bbox = [rois[:, 2:]] + labels = rois[:, 0].cast('int32') + rois_feat = self.roi_extractor(body_feats, bbox, rois_num) + if self.share_bbox_feat: + assert feat_func is not None + rois_feat = feat_func(rois_feat) + + mask_feat = self.head(rois_feat) + mask_logit = self.mask_fcn_logits(mask_feat) + mask_num_class = mask_logit.shape[1] + if mask_num_class == 1: + mask_out = F.sigmoid(mask_logit) + else: + num_masks = mask_logit.shape[0] + pred_masks = paddle.split(mask_logit, num_masks) + mask_out = [] + # TODO: need to optimize gather + for i, pred_mask in enumerate(pred_masks): + mask = paddle.gather(pred_mask, labels[i], axis=1) + mask_out.append(mask) + mask_out = F.sigmoid(paddle.concat(mask_out)) + return mask_out def forward(self, - inputs, body_feats, - bboxes, - bbox_feat, - mask_index, - spatial_scale, - bbox_head_feat_func=None, - stage=0): + rois, + rois_num, + inputs, + targets=None, + bbox_feat=None, + feat_func=None): if self.training: - mask_head_out = self.forward_train(body_feats, bboxes, bbox_feat, - mask_index, spatial_scale, stage) + return self.forward_train(body_feats, rois, rois_num, inputs, + targets, bbox_feat) else: - scale_factor = inputs['scale_factor'] - mask_head_out = self.forward_test( - scale_factor, body_feats, bboxes, bbox_feat, mask_index, - spatial_scale, stage, bbox_head_feat_func) - return mask_head_out - - def get_loss(self, mask_head_out, mask_target): - mask_logits = paddle.flatten(mask_head_out, start_axis=1, stop_axis=-1) - mask_label = paddle.cast(x=mask_target, dtype='float32') - mask_label.stop_gradient = True - loss_mask = ops.sigmoid_cross_entropy_with_logits( - input=mask_logits, - label=mask_label, - ignore_index=-1, - normalize=True) - loss_mask = paddle.sum(loss_mask) - - return {'loss_mask': loss_mask} + im_scale = inputs['scale_factor'] + return self.forward_test(body_feats, rois, rois_num, im_scale, + feat_func) diff --git a/dygraph/ppdet/modeling/heads/roi_extractor.py b/dygraph/ppdet/modeling/heads/roi_extractor.py index 43121370ad3a8a2c1d813e5051c28f9f227eb0dc..abdeb016326eb59c05987f1229e5a44d6a6b78fa 100644 --- a/dygraph/ppdet/modeling/heads/roi_extractor.py +++ b/dygraph/ppdet/modeling/heads/roi_extractor.py @@ -17,32 +17,47 @@ from ppdet.core.workspace import register from ppdet.modeling import ops +def _to_list(v): + if not isinstance(v, (list, tuple)): + return [v] + return v + + @register class RoIAlign(object): def __init__(self, resolution=14, + spatial_scale=0.0625, sampling_ratio=0, canconical_level=4, canonical_size=224, start_level=0, - end_level=3): + end_level=3, + aligned=False): super(RoIAlign, self).__init__() self.resolution = resolution + self.spatial_scale = _to_list(spatial_scale) self.sampling_ratio = sampling_ratio self.canconical_level = canconical_level self.canonical_size = canonical_size self.start_level = start_level self.end_level = end_level + self.aligned = aligned - def __call__(self, feats, rois, spatial_scale): - roi, rois_num = rois - if self.start_level == self.end_level: + @classmethod + def from_config(cls, cfg, input_shape): + return {'spatial_scale': [1. / i.stride for i in input_shape]} + + def __call__(self, feats, roi, rois_num): + roi = paddle.concat(roi) if len(roi) > 1 else roi[0] + if len(feats) == 1: rois_feat = ops.roi_align( feats[self.start_level], roi, self.resolution, - spatial_scale, - rois_num=rois_num) + self.spatial_scale[0], + rois_num=rois_num, + aligned=self.aligned) else: offset = 2 k_min = self.start_level + offset @@ -60,10 +75,12 @@ class RoIAlign(object): feats[lvl], rois_dist[lvl], self.resolution, - spatial_scale[lvl], + self.spatial_scale[lvl], sampling_ratio=self.sampling_ratio, - rois_num=rois_num_dist[lvl]) - rois_feat_list.append(roi_feat) + rois_num=rois_num_dist[lvl], + aligned=self.aligned) + if roi_feat.shape[0] > 0: + rois_feat_list.append(roi_feat) rois_feat_shuffle = paddle.concat(rois_feat_list) rois_feat = paddle.gather(rois_feat_shuffle, restore_index) diff --git a/dygraph/ppdet/modeling/layers.py b/dygraph/ppdet/modeling/layers.py index 74e1a4152da699f0b49d2d39bcc5bb21a13f5239..4af24110fc6e2c3a633bb52c6b20826e1f370908 100644 --- a/dygraph/ppdet/modeling/layers.py +++ b/dygraph/ppdet/modeling/layers.py @@ -27,9 +27,9 @@ from paddle.nn.initializer import Normal, Constant from paddle.regularizer import L2Decay from ppdet.core.workspace import register, serializable -from ppdet.py_op.target import generate_rpn_anchor_target, generate_proposal_target, generate_mask_target -from ppdet.py_op.post_process import bbox_post_process +from ppdet.modeling.bbox_utils import delta2bbox from . import ops + from paddle.vision.ops import DeformConv2D @@ -223,53 +223,6 @@ class AnchorGeneratorRPN(object): return anchor, var -@register -@serializable -class AnchorTargetGeneratorRPN(object): - def __init__(self, - batch_size_per_im=256, - straddle_thresh=0., - fg_fraction=0.5, - positive_overlap=0.7, - negative_overlap=0.3, - use_random=True): - super(AnchorTargetGeneratorRPN, self).__init__() - self.batch_size_per_im = batch_size_per_im - self.straddle_thresh = straddle_thresh - self.fg_fraction = fg_fraction - self.positive_overlap = positive_overlap - self.negative_overlap = negative_overlap - self.use_random = use_random - - def __call__(self, cls_logits, bbox_pred, anchor_box, gt_boxes, is_crowd, - im_info): - anchor_box = anchor_box.numpy() - gt_boxes = gt_boxes.numpy() - is_crowd = is_crowd.numpy() - im_info = im_info.numpy() - loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights = generate_rpn_anchor_target( - anchor_box, gt_boxes, is_crowd, im_info, self.straddle_thresh, - self.batch_size_per_im, self.positive_overlap, - self.negative_overlap, self.fg_fraction, self.use_random) - - loc_indexes = to_tensor(loc_indexes) - score_indexes = to_tensor(score_indexes) - tgt_labels = to_tensor(tgt_labels) - tgt_bboxes = to_tensor(tgt_bboxes) - bbox_inside_weights = to_tensor(bbox_inside_weights) - - loc_indexes.stop_gradient = True - score_indexes.stop_gradient = True - tgt_labels.stop_gradient = True - - cls_logits = paddle.reshape(x=cls_logits, shape=(-1, )) - bbox_pred = paddle.reshape(x=bbox_pred, shape=(-1, 4)) - pred_cls_logits = paddle.gather(cls_logits, score_indexes) - pred_bbox_pred = paddle.gather(bbox_pred, loc_indexes) - - return pred_cls_logits, pred_bbox_pred, tgt_labels, tgt_bboxes, bbox_inside_weights - - @register @serializable class AnchorGeneratorSSD(object): @@ -335,248 +288,52 @@ class AnchorGeneratorSSD(object): return boxes -@register -@serializable -class ProposalGenerator(object): - __append_doc__ = True - - def __init__(self, - train_pre_nms_top_n=12000, - train_post_nms_top_n=2000, - infer_pre_nms_top_n=6000, - infer_post_nms_top_n=1000, - nms_thresh=.5, - min_size=.1, - eta=1.): - super(ProposalGenerator, self).__init__() - self.train_pre_nms_top_n = train_pre_nms_top_n - self.train_post_nms_top_n = train_post_nms_top_n - self.infer_pre_nms_top_n = infer_pre_nms_top_n - self.infer_post_nms_top_n = infer_post_nms_top_n - self.nms_thresh = nms_thresh - self.min_size = min_size - self.eta = eta - - def __call__(self, - scores, - bbox_deltas, - anchors, - variances, - im_shape, - is_train=False): - pre_nms_top_n = self.train_pre_nms_top_n if is_train else self.infer_pre_nms_top_n - post_nms_top_n = self.train_post_nms_top_n if is_train else self.infer_post_nms_top_n - # TODO delete im_info - if im_shape.shape[1] > 2: - import paddle.fluid as fluid - rpn_rois, rpn_rois_prob, rpn_rois_num = fluid.layers.generate_proposals( - scores, - bbox_deltas, - im_shape, - anchors, - variances, - pre_nms_top_n=pre_nms_top_n, - post_nms_top_n=post_nms_top_n, - nms_thresh=self.nms_thresh, - min_size=self.min_size, - eta=self.eta, - return_rois_num=True) - else: - rpn_rois, rpn_rois_prob, rpn_rois_num = ops.generate_proposals( - scores, - bbox_deltas, - im_shape, - anchors, - variances, - pre_nms_top_n=pre_nms_top_n, - post_nms_top_n=post_nms_top_n, - nms_thresh=self.nms_thresh, - min_size=self.min_size, - eta=self.eta, - return_rois_num=True) - return rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n - - -@register -@serializable -class ProposalTargetGenerator(object): - __shared__ = ['num_classes'] - - def __init__(self, - batch_size_per_im=512, - fg_fraction=.25, - fg_thresh=[.5, ], - bg_thresh_hi=[.5, ], - bg_thresh_lo=[0., ], - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - num_classes=81, - use_random=True, - is_cls_agnostic=False): - super(ProposalTargetGenerator, self).__init__() - self.batch_size_per_im = batch_size_per_im - self.fg_fraction = fg_fraction - self.fg_thresh = fg_thresh - self.bg_thresh_hi = bg_thresh_hi - self.bg_thresh_lo = bg_thresh_lo - self.bbox_reg_weights = bbox_reg_weights - self.num_classes = num_classes - self.use_random = use_random - self.is_cls_agnostic = is_cls_agnostic - - def __call__(self, - rpn_rois, - rpn_rois_num, - gt_classes, - is_crowd, - gt_boxes, - im_info, - stage=0, - max_overlap=None): - rpn_rois = rpn_rois.numpy() - rpn_rois_num = rpn_rois_num.numpy() - gt_classes = gt_classes.numpy() - gt_boxes = gt_boxes.numpy() - is_crowd = is_crowd.numpy() - im_info = im_info.numpy() - max_overlap = max_overlap if max_overlap is None else max_overlap.numpy( - ) - reg_weights = [i / (stage + 1) for i in self.bbox_reg_weights] - is_cascade = True if stage > 0 else False - num_classes = 2 if is_cascade else self.num_classes - outs = generate_proposal_target( - rpn_rois, rpn_rois_num, gt_classes, is_crowd, gt_boxes, im_info, - self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage], - self.bg_thresh_hi[stage], self.bg_thresh_lo[stage], reg_weights, - num_classes, self.use_random, self.is_cls_agnostic, is_cascade, - max_overlap) - outs = [to_tensor(v) for v in outs] - for v in outs: - v.stop_gradient = True - return outs - - -@register -@serializable -class MaskTargetGenerator(object): - __shared__ = ['num_classes', 'mask_resolution'] - - def __init__(self, num_classes=81, mask_resolution=14): - super(MaskTargetGenerator, self).__init__() - self.num_classes = num_classes - self.mask_resolution = mask_resolution - - def __call__(self, im_info, gt_classes, is_crowd, gt_segms, rois, rois_num, - labels_int32): - im_info = im_info.numpy() - gt_classes = gt_classes.numpy() - is_crowd = is_crowd.numpy() - gt_segms = gt_segms.numpy() - rois = rois.numpy() - rois_num = rois_num.numpy() - labels_int32 = labels_int32.numpy() - outs = generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, - rois, rois_num, labels_int32, - self.num_classes, self.mask_resolution) - - outs = [to_tensor(v) for v in outs] - for v in outs: - v.stop_gradient = True - return outs - - @register @serializable class RCNNBox(object): - __shared__ = ['num_classes', 'batch_size'] - def __init__(self, - num_classes=81, - batch_size=1, - prior_box_var=[0.1, 0.1, 0.2, 0.2], + prior_box_var=[10., 10., 5., 5.], code_type="decode_center_size", - box_normalized=False, - axis=1, - var_weight=1.): + box_normalized=False): super(RCNNBox, self).__init__() - self.num_classes = num_classes - self.batch_size = batch_size self.prior_box_var = prior_box_var self.code_type = code_type self.box_normalized = box_normalized - self.axis = axis - self.var_weight = var_weight def __call__(self, bbox_head_out, rois, im_shape, scale_factor): bbox_pred, cls_prob = bbox_head_out roi, rois_num = rois - origin_shape = im_shape / scale_factor + origin_shape = paddle.floor(im_shape / scale_factor + 0.5) scale_list = [] origin_shape_list = [] - for idx in range(self.batch_size): - scale = scale_factor[idx, :][0] + for idx, roi_per_im in enumerate(roi): rois_num_per_im = rois_num[idx] - expand_scale = paddle.expand(scale, [rois_num_per_im, 1]) - scale_list.append(expand_scale) - expand_im_shape = paddle.expand(origin_shape[idx, :], + expand_im_shape = paddle.expand(im_shape[idx, :], [rois_num_per_im, 2]) origin_shape_list.append(expand_im_shape) - scale = paddle.concat(scale_list) origin_shape = paddle.concat(origin_shape_list) - bbox = roi / scale - prior_box_var = [i / self.var_weight for i in self.prior_box_var] - bbox = ops.box_coder( - prior_box=bbox, - prior_box_var=prior_box_var, - target_box=bbox_pred, - code_type=self.code_type, - box_normalized=self.box_normalized, - axis=self.axis) - # TODO: Updata box_clip - origin_h = paddle.unsqueeze(origin_shape[:, 0] - 1, axis=1) - origin_w = paddle.unsqueeze(origin_shape[:, 1] - 1, axis=1) - zeros = paddle.zeros(paddle.shape(origin_h), 'float32') + # [N, C*4] + bbox = paddle.concat(roi) + bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var) + scores = cls_prob[:, :-1] + + # [N*C, 4] + + bbox_num_class = bbox.shape[1] // 4 + bbox = paddle.reshape(bbox, [-1, bbox_num_class, 4]) + + origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1) + origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1) + zeros = paddle.zeros_like(origin_h) x1 = paddle.maximum(paddle.minimum(bbox[:, :, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(bbox[:, :, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(bbox[:, :, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(bbox[:, :, 3], origin_h), zeros) bbox = paddle.stack([x1, y1, x2, y2], axis=-1) - bboxes = (bbox, rois_num) - return bboxes, cls_prob - - -@register -@serializable -class DecodeClipNms(object): - __shared__ = ['num_classes'] - - def __init__( - self, - num_classes=81, - keep_top_k=100, - score_threshold=0.05, - nms_threshold=0.5, ): - super(DecodeClipNms, self).__init__() - self.num_classes = num_classes - self.keep_top_k = keep_top_k - self.score_threshold = score_threshold - self.nms_threshold = nms_threshold - - def __call__(self, bboxes, bbox_prob, bbox_delta, im_info): - bboxes_np = (i.numpy() for i in bboxes) - # bbox, bbox_num - outs = bbox_post_process(bboxes_np, - bbox_prob.numpy(), - bbox_delta.numpy(), - im_info.numpy(), self.keep_top_k, - self.score_threshold, self.nms_threshold, - self.num_classes) - outs = [to_tensor(v) for v in outs] - for v in outs: - v.stop_gradient = True - return outs + return bboxes, scores @register @@ -589,7 +346,6 @@ class MultiClassNMS(object): nms_threshold=.5, normalized=False, nms_eta=1.0, - background_label=0, return_rois_num=True): super(MultiClassNMS, self).__init__() self.score_threshold = score_threshold @@ -598,14 +354,28 @@ class MultiClassNMS(object): self.nms_threshold = nms_threshold self.normalized = normalized self.nms_eta = nms_eta - self.background_label = background_label self.return_rois_num = return_rois_num - def __call__(self, bboxes, score): + def __call__(self, bboxes, score, background_label=-1): + """ + bboxes (Tensor|List[Tensor]): 1. (Tensor) Predicted bboxes with shape + [N, M, 4], N is the batch size and M + is the number of bboxes + 2. (List[Tensor]) bboxes and bbox_num, + bboxes have shape of [M, C, 4], C + is the class number and bbox_num means + the number of bboxes of each batch with + shape [N,] + score (Tensor): Predicted scores with shape [N, C, M] or [M, C] + background_label (int): Ignore the background label; For example, RCNN + is num_classes and YOLO is -1. + """ kwargs = self.__dict__.copy() if isinstance(bboxes, tuple): bboxes, bbox_num = bboxes kwargs.update({'rois_num': bbox_num}) + if background_label > -1: + kwargs.update({'background_label': background_label}) return ops.multiclass_nms(bboxes, score, **kwargs) diff --git a/dygraph/ppdet/modeling/mask.py b/dygraph/ppdet/modeling/mask.py deleted file mode 100644 index c4d1a0b6222d2c5e0d4d38501ceb18d586cb89a4..0000000000000000000000000000000000000000 --- a/dygraph/ppdet/modeling/mask.py +++ /dev/null @@ -1,33 +0,0 @@ -import numpy as np -from ppdet.core.workspace import register - - -@register -class Mask(object): - __inject__ = ['mask_target_generator'] - - def __init__(self, mask_target_generator): - super(Mask, self).__init__() - self.mask_target_generator = mask_target_generator - - def __call__(self, inputs, rois, targets): - mask_rois, rois_has_mask_int32 = self.generate_mask_target(inputs, rois, - targets) - return mask_rois, rois_has_mask_int32 - - def generate_mask_target(self, inputs, rois, targets): - labels_int32 = targets['labels_int32'] - proposals, proposals_num = rois - mask_rois, mask_rois_num, self.rois_has_mask_int32, self.mask_int32 = self.mask_target_generator( - im_info=inputs['im_info'], - gt_classes=inputs['gt_class'], - is_crowd=inputs['is_crowd'], - gt_segms=inputs['gt_poly'], - rois=proposals, - rois_num=proposals_num, - labels_int32=labels_int32) - self.mask_rois = (mask_rois, mask_rois_num) - return self.mask_rois, self.rois_has_mask_int32 - - def get_targets(self): - return self.mask_int32 diff --git a/dygraph/ppdet/modeling/necks/fpn.py b/dygraph/ppdet/modeling/necks/fpn.py index 454d8cc31d677ab48342fa1d4de3a6be9e445aa8..0b817a2c11e9df49b5ee71d570edf7e6e69d9dee 100644 --- a/dygraph/ppdet/modeling/necks/fpn.py +++ b/dygraph/ppdet/modeling/necks/fpn.py @@ -21,6 +21,7 @@ from paddle.nn import Conv2D from paddle.nn.initializer import XavierUniform from paddle.regularizer import L2Decay from ppdet.core.workspace import register, serializable +from ..shape_spec import ShapeSpec @register @@ -29,18 +30,19 @@ class FPN(Layer): def __init__(self, in_channels, out_channel, - min_level=0, - max_level=4, - spatial_scale=[0.25, 0.125, 0.0625, 0.03125], + spatial_scales=[0.25, 0.125, 0.0625, 0.03125], has_extra_convs=False, + extra_stage=1, use_c5=True, relu_before_extra_convs=True): super(FPN, self).__init__() - self.min_level = min_level - self.max_level = max_level - self.spatial_scale = spatial_scale + self.out_channel = out_channel + for s in range(extra_stage): + spatial_scales = spatial_scales + [spatial_scales[-1] / 2.] + self.spatial_scales = spatial_scales self.has_extra_convs = has_extra_convs + self.extra_stage = extra_stage self.use_c5 = use_c5 self.relu_before_extra_convs = relu_before_extra_convs @@ -48,11 +50,7 @@ class FPN(Layer): self.fpn_convs = [] fan = out_channel * 3 * 3 - self.num_backbone_stages = len(spatial_scale) - self.num_outs = self.max_level - self.min_level + 1 - self.highest_backbone_level = self.min_level + self.num_backbone_stages - 1 - - for i in range(self.min_level, self.highest_backbone_level + 1): + for i in range(len(in_channels)): if i == 3: lateral_name = 'fpn_inner_res5_sum' else: @@ -65,9 +63,7 @@ class FPN(Layer): out_channels=out_channel, kernel_size=1, weight_attr=ParamAttr( - initializer=XavierUniform(fan_out=in_c)), - bias_attr=ParamAttr( - learning_rate=2., regularizer=L2Decay(0.)))) + initializer=XavierUniform(fan_out=in_c)))) self.lateral_convs.append(lateral) fpn_name = 'fpn_res{}_sum'.format(i + 2) @@ -79,17 +75,14 @@ class FPN(Layer): kernel_size=3, padding=1, weight_attr=ParamAttr( - initializer=XavierUniform(fan_out=fan)), - bias_attr=ParamAttr( - learning_rate=2., regularizer=L2Decay(0.)))) + initializer=XavierUniform(fan_out=fan)))) self.fpn_convs.append(fpn_conv) # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5) - if self.has_extra_convs and self.num_outs > self.num_backbone_stages: - for lvl in range(self.highest_backbone_level + 1, - self.max_level + 1): # P6 P7 ... - if lvl == self.highest_backbone_level + 1 and self.use_c5: - in_c = in_channels[self.highest_backbone_level] + if self.has_extra_convs: + for lvl in range(self.extra_stage): # P6 P7 ... + if lvl == 0 and self.use_c5: + in_c = in_channels[-1] else: in_c = out_channel extra_fpn_name = 'fpn_{}'.format(lvl + 2) @@ -102,51 +95,60 @@ class FPN(Layer): stride=2, padding=1, weight_attr=ParamAttr( - initializer=XavierUniform(fan_out=fan)), - bias_attr=ParamAttr( - learning_rate=2., regularizer=L2Decay(0.)))) + initializer=XavierUniform(fan_out=fan)))) self.fpn_convs.append(extra_fpn_conv) + @classmethod + def from_config(cls, cfg, input_shape): + return { + 'in_channels': [i.channels for i in input_shape], + 'spatial_scales': [1.0 / i.stride for i in input_shape], + } + def forward(self, body_feats): laterals = [] - used_backbone_levels = len(self.spatial_scale) - for i in range(used_backbone_levels): + num_levels = len(body_feats) + for i in range(num_levels): laterals.append(self.lateral_convs[i](body_feats[i])) - used_backbone_levels = len(self.spatial_scale) - for i in range(used_backbone_levels - 1): - idx = used_backbone_levels - 1 - i + for i in range(1, num_levels): + lvl = num_levels - i upsample = F.interpolate( - laterals[idx], + laterals[lvl], scale_factor=2., mode='nearest', ) - laterals[idx - 1] += upsample + laterals[lvl - 1] += upsample fpn_output = [] - for lvl in range(self.min_level, self.highest_backbone_level + 1): - i = lvl - self.min_level - fpn_output.append(self.fpn_convs[i](laterals[i])) + for lvl in range(num_levels): + fpn_output.append(self.fpn_convs[lvl](laterals[lvl])) - spatial_scales = self.spatial_scale - if self.num_outs > len(fpn_output): + if self.extra_stage > 0: # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN) if not self.has_extra_convs: + assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs' fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2)) - spatial_scales = spatial_scales + [spatial_scales[-1] * 0.5] # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5) else: if self.use_c5: extra_source = body_feats[-1] else: extra_source = fpn_output[-1] - fpn_output.append(self.fpn_convs[used_backbone_levels]( - extra_source)) - spatial_scales = spatial_scales + [spatial_scales[-1] * 0.5] - for i in range(used_backbone_levels + 1, self.num_outs): + fpn_output.append(self.fpn_convs[num_levels](extra_source)) + + for i in range(1, self.extra_stage): if self.relu_before_extra_convs: - fpn_output.append(self.fpn_convs[i](F.relu(fpn_output[ - -1]))) + fpn_output.append(self.fpn_convs[num_levels + i](F.relu( + fpn_output[-1]))) else: - fpn_output.append(self.fpn_convs[i](fpn_output[-1])) - spatial_scales = spatial_scales + [spatial_scales[-1] * 0.5] - return fpn_output, spatial_scales + fpn_output.append(self.fpn_convs[num_levels + i]( + fpn_output[-1])) + return fpn_output + + @property + def out_shape(self): + return [ + ShapeSpec( + channels=self.out_channel, stride=1. / s) + for s in self.spatial_scales + ] diff --git a/dygraph/ppdet/modeling/ops.py b/dygraph/ppdet/modeling/ops.py index 76ba97954ef243c693f74c7605626d76a5997fc5..416277fe4c9fcca25c360eb1257618a097b5f0a8 100644 --- a/dygraph/ppdet/modeling/ops.py +++ b/dygraph/ppdet/modeling/ops.py @@ -32,7 +32,6 @@ __all__ = [ 'roi_pool', 'roi_align', 'prior_box', - 'anchor_generator', 'generate_proposals', 'iou_similarity', 'box_coder', @@ -169,6 +168,7 @@ def roi_align(input, spatial_scale=1.0, sampling_ratio=-1, rois_num=None, + aligned=True, name=None): """ @@ -239,7 +239,7 @@ def roi_align(input, align_out = core.ops.roi_align( input, rois, rois_num, "pooled_height", pooled_height, "pooled_width", pooled_width, "spatial_scale", spatial_scale, - "sampling_ratio", sampling_ratio) + "sampling_ratio", sampling_ratio) #, "aligned", aligned) return align_out else: @@ -264,7 +264,8 @@ def roi_align(input, "pooled_height": pooled_height, "pooled_width": pooled_width, "spatial_scale": spatial_scale, - "sampling_ratio": sampling_ratio + "sampling_ratio": sampling_ratio, + #"aligned": aligned, }) return align_out @@ -846,117 +847,6 @@ def prior_box(input, return box, var -@paddle.jit.not_to_static -def anchor_generator(input, - anchor_sizes=None, - aspect_ratios=None, - variance=[0.1, 0.1, 0.2, 0.2], - stride=None, - offset=0.5, - name=None): - """ - - This op generate anchors for Faster RCNN algorithm. - Each position of the input produce N anchors, N = - size(anchor_sizes) * size(aspect_ratios). The order of generated anchors - is firstly aspect_ratios loop then anchor_sizes loop. - - Args: - input(Tensor): 4-D Tensor with shape [N,C,H,W]. The input feature map. - anchor_sizes(float32|list|tuple, optional): The anchor sizes of generated - anchors, given in absolute pixels e.g. [64., 128., 256., 512.]. - For instance, the anchor size of 64 means the area of this anchor - equals to 64**2. None by default. - aspect_ratios(float32|list|tuple, optional): The height / width ratios - of generated anchors, e.g. [0.5, 1.0, 2.0]. None by default. - variance(list|tuple, optional): The variances to be used in box - regression deltas. The data type is float32, [0.1, 0.1, 0.2, 0.2] by - default. - stride(list|tuple, optional): The anchors stride across width and height. - The data type is float32. e.g. [16.0, 16.0]. None by default. - offset(float32, optional): Prior boxes center offset. 0.5 by default. - name(str, optional): For detailed information, please refer - to :ref:`api_guide_Name`. Usually name is no need to set and None - by default. - - Returns: - Tuple: - - Anchors(Tensor): The output anchors with a layout of [H, W, num_anchors, 4]. - H is the height of input, W is the width of input, - num_anchors is the box count of each position. - Each anchor is in (xmin, ymin, xmax, ymax) format an unnormalized. - - Variances(Tensor): The expanded variances of anchors - with a layout of [H, W, num_priors, 4]. - H is the height of input, W is the width of input - num_anchors is the box count of each position. - Each variance is in (xcenter, ycenter, w, h) format. - - - Examples: - - .. code-block:: python - - import paddle - from ppdet.modeling import ops - - paddle.enable_static() - conv1 = paddle.static.data(name='input', shape=[None, 48, 16, 16], dtype='float32') - anchor, var = ops.anchor_generator( - input=conv1, - anchor_sizes=[64, 128, 256, 512], - aspect_ratios=[0.5, 1.0, 2.0], - variance=[0.1, 0.1, 0.2, 0.2], - stride=[16.0, 16.0], - offset=0.5) - """ - helper = LayerHelper("anchor_generator", **locals()) - dtype = helper.input_dtype() - - def _is_list_or_tuple_(data): - return (isinstance(data, list) or isinstance(data, tuple)) - - if not _is_list_or_tuple_(anchor_sizes): - anchor_sizes = [anchor_sizes] - if not _is_list_or_tuple_(aspect_ratios): - aspect_ratios = [aspect_ratios] - if not (_is_list_or_tuple_(stride) and len(stride) == 2): - raise ValueError('stride should be a list or tuple ', - 'with length 2, (stride_width, stride_height).') - - anchor_sizes = list(map(float, anchor_sizes)) - aspect_ratios = list(map(float, aspect_ratios)) - stride = list(map(float, stride)) - - if in_dygraph_mode(): - attrs = ('anchor_sizes', anchor_sizes, 'aspect_ratios', aspect_ratios, - 'variances', variance, 'stride', stride, 'offset', offset) - anchor, var = core.ops.anchor_generator(input, *attrs) - return anchor, var - - else: - attrs = { - 'anchor_sizes': anchor_sizes, - 'aspect_ratios': aspect_ratios, - 'variances': variance, - 'stride': stride, - 'offset': offset - } - - anchor = helper.create_variable_for_type_inference(dtype) - var = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="anchor_generator", - inputs={"Input": input}, - outputs={"Anchors": anchor, - "Variances": var}, - attrs=attrs, ) - anchor.stop_gradient = True - var.stop_gradient = True - return anchor, var - - @paddle.jit.not_to_static def multiclass_nms(bboxes, scores, @@ -966,7 +856,7 @@ def multiclass_nms(bboxes, nms_threshold=0.3, normalized=True, nms_eta=1., - background_label=0, + background_label=-1, return_index=False, return_rois_num=True, rois_num=None, diff --git a/dygraph/ppdet/modeling/post_process.py b/dygraph/ppdet/modeling/post_process.py index 3b75196f434ab8d03b7c2a60b8745e1cf6c1dcd6..a3619a3a8b5f263863ad02d4cbf53abad7aea146 100644 --- a/dygraph/ppdet/modeling/post_process.py +++ b/dygraph/ppdet/modeling/post_process.py @@ -3,47 +3,140 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F from ppdet.core.workspace import register -from ppdet.py_op.post_process import mask_post_process +from ppdet.modeling.bbox_utils import nonempty_bbox from . import ops @register class BBoxPostProcess(object): + __shared__ = ['num_classes'] __inject__ = ['decode', 'nms'] - def __init__(self, decode=None, nms=None): + def __init__(self, num_classes=80, decode=None, nms=None): super(BBoxPostProcess, self).__init__() + self.num_classes = num_classes self.decode = decode self.nms = nms - def __call__(self, head_out, rois, im_shape, scale_factor=None): + def __call__(self, head_out, rois, im_shape, scale_factor): + """ + Decode the bbox and do NMS if needed. + + Returns: + bbox_pred(Tensor): The output is the prediction with shape [N, 6] + including labels, scores and bboxes. The size of + bboxes are corresponding to the input image and + the bboxes may be used in other brunch. + bbox_num(Tensor): The number of prediction of each batch with shape + [N, 6]. + """ if self.nms is not None: bboxes, score = self.decode(head_out, rois, im_shape, scale_factor) - bbox_pred, bbox_num, _ = self.nms(bboxes, score) + bbox_pred, bbox_num, _ = self.nms(bboxes, score, self.num_classes) else: bbox_pred, bbox_num = self.decode(head_out, rois, im_shape, scale_factor) return bbox_pred, bbox_num + def get_pred(self, bboxes, bbox_num, im_shape, scale_factor): + """ + Rescale, clip and filter the bbox from the output of NMS to + get final prediction. + + Args: + bboxes(Tensor): The output of __call__ with shape [N, 6] + Returns: + bbox_pred(Tensor): The output is the prediction with shape [N, 6] + including labels, scores and bboxes. The size of + bboxes are corresponding to the original image. + """ + assert bboxes.shape[0] > 0, 'There is no detection output' + origin_shape = paddle.floor(im_shape / scale_factor + 0.5) + + origin_shape_list = [] + scale_factor_list = [] + # scale_factor: scale_y, scale_x + for i in range(bbox_num.shape[0]): + expand_shape = paddle.expand(origin_shape[i:i + 1, :], + [bbox_num[i], 2]) + scale_y, scale_x = scale_factor[i] + scale = paddle.concat([scale_x, scale_y, scale_x, scale_y]) + expand_scale = paddle.expand(scale, [bbox_num[i], 4]) + origin_shape_list.append(expand_shape) + scale_factor_list.append(expand_scale) + + self.origin_shape_list = paddle.concat(origin_shape_list) + scale_factor_list = paddle.concat(scale_factor_list) + + # bboxes: [N, 6], label, score, bbox + pred_label = bboxes[:, 0:1] + pred_score = bboxes[:, 1:2] + pred_bbox = bboxes[:, 2:] + # rescale bbox to original image + scaled_bbox = pred_bbox / scale_factor_list + origin_h = self.origin_shape_list[:, 0] + origin_w = self.origin_shape_list[:, 1] + zeros = paddle.zeros_like(origin_h) + # clip bbox to [0, original_size] + x1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 0], origin_w), zeros) + y1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 1], origin_h), zeros) + x2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 2], origin_w), zeros) + y2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 3], origin_h), zeros) + pred_bbox = paddle.stack([x1, y1, x2, y2], axis=-1) + # filter empty bbox + keep_mask = nonempty_bbox(pred_bbox, return_mask=True) + keep_mask = paddle.unsqueeze(keep_mask, [1]) + pred_label = paddle.where(keep_mask, pred_label, + paddle.ones_like(pred_label) * -1) + pred_result = paddle.concat([pred_label, pred_score, pred_bbox], axis=1) + return pred_result + + def get_origin_shape(self, ): + return self.origin_shape_list + @register class MaskPostProcess(object): - __shared__ = ['mask_resolution'] - - def __init__(self, mask_resolution=28, binary_thresh=0.5): + def __init__(self, binary_thresh=0.5): super(MaskPostProcess, self).__init__() - self.mask_resolution = mask_resolution self.binary_thresh = binary_thresh - def __call__(self, bboxes, mask_head_out, im_shape, scale_factor=None): - # TODO: modify related ops for deploying - bboxes_np = (i.numpy() for i in bboxes) - mask = mask_post_process(bboxes_np, - mask_head_out.numpy(), - im_shape.numpy(), scale_factor[:, 0].numpy(), - self.mask_resolution, self.binary_thresh) - mask = {'mask': mask} - return mask + def paste_mask(self, masks, boxes, im_h, im_w): + # paste each mask on image + x0, y0, x1, y1 = paddle.split(boxes, 4, axis=1) + masks = paddle.unsqueeze(masks, [0, 1]) + img_y = paddle.arange(0, im_h, dtype='float32') + 0.5 + img_x = paddle.arange(0, im_w, dtype='float32') + 0.5 + img_y = (img_y - y0) / (y1 - y0) * 2 - 1 + img_x = (img_x - x0) / (x1 - x0) * 2 - 1 + img_x = paddle.unsqueeze(img_x, [1]) + img_y = paddle.unsqueeze(img_y, [2]) + N = boxes.shape[0] + + gx = paddle.expand(img_x, [N, img_y.shape[1], img_x.shape[2]]) + gy = paddle.expand(img_y, [N, img_y.shape[1], img_x.shape[2]]) + grid = paddle.stack([gx, gy], axis=3) + img_masks = F.grid_sample(masks, grid, align_corners=False) + return img_masks[:, 0] + + def __call__(self, mask_out, bboxes, bbox_num, origin_shape): + """ + Paste the mask prediction to the original image. + """ + assert bboxes.shape[0] > 0, 'There is no detection output' + + num_mask = mask_out.shape[0] + # TODO: support bs > 1 + pred_result = paddle.zeros( + [num_mask, origin_shape[0][0], origin_shape[0][1]], dtype='bool') + # TODO: optimize chunk paste + for i in range(bboxes.shape[0]): + im_h, im_w = origin_shape[i] + pred_mask = self.paste_mask(mask_out[i], bboxes[i:i + 1, 2:], im_h, + im_w) + pred_mask = pred_mask >= self.binary_thresh + pred_result[i] = pred_mask + return pred_result @register diff --git a/dygraph/ppdet/modeling/proposal_generator/__init__.py b/dygraph/ppdet/modeling/proposal_generator/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9fb518f2af6747ec25f3b5f7428891cbe89b95a8 --- /dev/null +++ b/dygraph/ppdet/modeling/proposal_generator/__init__.py @@ -0,0 +1,2 @@ +from . import rpn_head +from .rpn_head import * diff --git a/dygraph/ppdet/modeling/proposal_generator/anchor_generator.py b/dygraph/ppdet/modeling/proposal_generator/anchor_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..dcb5bf0c709396bfd6b98e1d818d5c0ecf368305 --- /dev/null +++ b/dygraph/ppdet/modeling/proposal_generator/anchor_generator.py @@ -0,0 +1,108 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import math + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from ppdet.core.workspace import register +from .. import ops + + +@register +class AnchorGenerator(object): + def __init__(self, + anchor_sizes=[32, 64, 128, 256, 512], + aspect_ratios=[0.5, 1.0, 2.0], + strides=[16.0], + variance=[1.0, 1.0, 1.0, 1.0], + offset=0.): + super(AnchorGenerator, self).__init__() + self.anchor_sizes = anchor_sizes + self.aspect_ratios = aspect_ratios + self.strides = strides + self.variance = variance + self.cell_anchors = self._calculate_anchors(len(strides)) + self.offset = offset + + def _broadcast_params(self, params, num_features): + if not isinstance(params[0], (list, tuple)): # list[float] + return [params] * num_features + if len(params) == 1: + return list(params) * num_features + return params + + def generate_cell_anchors(self, sizes, aspect_ratios): + anchors = [] + for size in sizes: + area = size**2.0 + for aspect_ratio in aspect_ratios: + w = math.sqrt(area / aspect_ratio) + h = aspect_ratio * w + x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0 + anchors.append([x0, y0, x1, y1]) + return paddle.to_tensor(anchors, dtype='float32') + + def _calculate_anchors(self, num_features): + sizes = self._broadcast_params(self.anchor_sizes, num_features) + aspect_ratios = self._broadcast_params(self.aspect_ratios, num_features) + cell_anchors = [ + self.generate_cell_anchors(s, a) + for s, a in zip(sizes, aspect_ratios) + ] + return cell_anchors + + def _create_grid_offsets(self, size, stride, offset): + grid_height, grid_width = size + shifts_x = paddle.arange( + offset * stride, grid_width * stride, step=stride, dtype='float32') + shifts_y = paddle.arange( + offset * stride, grid_height * stride, step=stride, dtype='float32') + shift_y, shift_x = paddle.meshgrid(shifts_y, shifts_x) + shift_x = shift_x.reshape([-1]) + shift_y = shift_y.reshape([-1]) + return shift_x, shift_y + + def _grid_anchors(self, grid_sizes): + anchors = [] + for size, stride, base_anchors in zip(grid_sizes, self.strides, + self.cell_anchors): + shift_x, shift_y = self._create_grid_offsets(size, stride, + self.offset) + shifts = paddle.stack((shift_x, shift_y, shift_x, shift_y), axis=1) + + anchors.append((shifts.reshape([-1, 1, 4]) + base_anchors.reshape( + [1, -1, 4])).reshape([-1, 4])) + + return anchors + + def __call__(self, input): + grid_sizes = [feature_map.shape[-2:] for feature_map in input] + anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) + return anchors_over_all_feature_maps + + @property + def num_anchors(self): + """ + Returns: + int: number of anchors at every pixel + location, on that feature map. + For example, if at every pixel we use anchors of 3 aspect + ratios and 5 sizes, the number of anchors is 15. + For FPN models, `num_anchors` on every feature map is the same. + """ + return self.cell_anchors[0].shape[0] diff --git a/dygraph/ppdet/modeling/proposal_generator/proposal_generator.py b/dygraph/ppdet/modeling/proposal_generator/proposal_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..8a5df53255d080ec83d083bd0db72b41ca8700b4 --- /dev/null +++ b/dygraph/ppdet/modeling/proposal_generator/proposal_generator.py @@ -0,0 +1,59 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from ppdet.core.workspace import register, serializable +from .. import ops + + +@register +@serializable +class ProposalGenerator(object): + def __init__(self, + pre_nms_top_n=12000, + post_nms_top_n=2000, + nms_thresh=.5, + min_size=.1, + eta=1., + topk_after_collect=False): + super(ProposalGenerator, self).__init__() + self.pre_nms_top_n = pre_nms_top_n + self.post_nms_top_n = post_nms_top_n + self.nms_thresh = nms_thresh + self.min_size = min_size + self.eta = eta + self.topk_after_collect = topk_after_collect + + def __call__(self, scores, bbox_deltas, anchors, im_shape): + + top_n = self.pre_nms_top_n if self.topk_after_collect else self.post_nms_top_n + variances = paddle.ones_like(anchors) + rpn_rois, rpn_rois_prob, rpn_rois_num = ops.generate_proposals( + scores, + bbox_deltas, + im_shape, + anchors, + variances, + pre_nms_top_n=self.pre_nms_top_n, + post_nms_top_n=top_n, + nms_thresh=self.nms_thresh, + min_size=self.min_size, + eta=self.eta, + return_rois_num=True) + return rpn_rois, rpn_rois_prob, rpn_rois_num, self.post_nms_top_n diff --git a/dygraph/ppdet/modeling/proposal_generator/rpn_head.py b/dygraph/ppdet/modeling/proposal_generator/rpn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..c7a615cfb74b3a47b6feb0fc299c553291f6fd8c --- /dev/null +++ b/dygraph/ppdet/modeling/proposal_generator/rpn_head.py @@ -0,0 +1,225 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.initializer import Normal +from paddle.regularizer import L2Decay + +from ppdet.core.workspace import register +from ppdet.modeling import ops + +from .anchor_generator import AnchorGenerator +from .target_layer import RPNTargetAssign +from .proposal_generator import ProposalGenerator + + +class RPNFeat(nn.Layer): + def __init__(self, feat_in=1024, feat_out=1024): + super(RPNFeat, self).__init__() + # rpn feat is shared with each level + self.rpn_conv = nn.Conv2D( + in_channels=feat_in, + out_channels=feat_out, + kernel_size=3, + padding=1, + weight_attr=paddle.ParamAttr(initializer=Normal( + mean=0., std=0.01))) + + def forward(self, feats): + rpn_feats = [] + for feat in feats: + rpn_feats.append(F.relu(self.rpn_conv(feat))) + return rpn_feats + + +@register +class RPNHead(nn.Layer): + def __init__(self, + anchor_generator=AnchorGenerator().__dict__, + rpn_target_assign=RPNTargetAssign().__dict__, + train_proposal=ProposalGenerator(12000, 2000).__dict__, + test_proposal=ProposalGenerator().__dict__, + in_channel=1024): + super(RPNHead, self).__init__() + self.anchor_generator = anchor_generator + self.rpn_target_assign = rpn_target_assign + self.train_proposal = train_proposal + self.test_proposal = test_proposal + if isinstance(anchor_generator, dict): + self.anchor_generator = AnchorGenerator(**anchor_generator) + if isinstance(rpn_target_assign, dict): + self.rpn_target_assign = RPNTargetAssign(**rpn_target_assign) + if isinstance(train_proposal, dict): + self.train_proposal = ProposalGenerator(**train_proposal) + if isinstance(test_proposal, dict): + self.test_proposal = ProposalGenerator(**test_proposal) + + num_anchors = self.anchor_generator.num_anchors + self.rpn_feat = RPNFeat(in_channel, in_channel) + # rpn head is shared with each level + # rpn roi classification scores + self.rpn_rois_score = nn.Conv2D( + in_channels=in_channel, + out_channels=num_anchors, + kernel_size=1, + padding=0, + weight_attr=paddle.ParamAttr(initializer=Normal( + mean=0., std=0.01))) + + # rpn roi bbox regression deltas + self.rpn_rois_delta = nn.Conv2D( + in_channels=in_channel, + out_channels=4 * num_anchors, + kernel_size=1, + padding=0, + weight_attr=paddle.ParamAttr(initializer=Normal( + mean=0., std=0.01))) + + @classmethod + def from_config(cls, cfg, input_shape): + # FPN share same rpn head + if isinstance(input_shape, (list, tuple)): + input_shape = input_shape[0] + return {'in_channel': input_shape.channels} + + def forward(self, feats, inputs): + rpn_feats = self.rpn_feat(feats) + scores = [] + deltas = [] + + for rpn_feat in rpn_feats: + rrs = self.rpn_rois_score(rpn_feat) + rrd = self.rpn_rois_delta(rpn_feat) + scores.append(rrs) + deltas.append(rrd) + + anchors = self.anchor_generator(rpn_feats) + + rois, rois_num = self._gen_proposal(scores, deltas, anchors, inputs) + + if self.training: + loss = self.get_loss(scores, deltas, anchors, inputs) + return rois, rois_num, loss + else: + return rois, rois_num, None + + def _gen_proposal(self, scores, bbox_deltas, anchors, inputs): + """ + scores (list[Tensor]): Multi-level scores prediction + bbox_deltas (list[Tensor]): Multi-level deltas prediction + anchors (list[Tensor]): Multi-level anchors + inputs (dict): ground truth info + """ + prop_gen = self.train_proposal if self.training else self.test_proposal + im_shape = inputs['im_shape'] + batch_size = im_shape.shape[0] + rpn_rois_list = [[] for i in range(batch_size)] + rpn_prob_list = [[] for i in range(batch_size)] + rpn_rois_num_list = [[] for i in range(batch_size)] + # Generate proposals for each level and each batch. + # Discard batch-computing to avoid sorting bbox cross different batches. + for rpn_score, rpn_delta, anchor in zip(scores, bbox_deltas, anchors): + for i in range(batch_size): + rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n = prop_gen( + scores=rpn_score[i:i + 1], + bbox_deltas=rpn_delta[i:i + 1], + anchors=anchor, + im_shape=im_shape[i:i + 1]) + if rpn_rois.shape[0] > 0: + rpn_rois_list[i].append(rpn_rois) + rpn_prob_list[i].append(rpn_rois_prob) + rpn_rois_num_list[i].append(rpn_rois_num) + + # Collect multi-level proposals for each batch + # Get 'topk' of them as final output + rois_collect = [] + rois_num_collect = [] + for i in range(batch_size): + if len(scores) > 1: + rpn_rois = paddle.concat(rpn_rois_list[i]) + rpn_prob = paddle.concat(rpn_prob_list[i]).flatten() + if rpn_prob.shape[0] > post_nms_top_n: + topk_prob, topk_inds = paddle.topk(rpn_prob, post_nms_top_n) + topk_rois = paddle.gather(rpn_rois, topk_inds) + else: + topk_rois = rpn_rois + topk_prob = rpn_prob + else: + topk_rois = rpn_rois_list[i][0] + topk_prob = rpn_prob_list[i][0].flatten() + rois_collect.append(topk_rois) + rois_num_collect.append(paddle.shape(topk_rois)[0]) + rois_num_collect = paddle.concat(rois_num_collect) + return rois_collect, rois_num_collect + + def get_loss(self, pred_scores, pred_deltas, anchors, inputs): + """ + pred_scores (list[Tensor]): Multi-level scores prediction + pred_deltas (list[Tensor]): Multi-level deltas prediction + anchors (list[Tensor]): Multi-level anchors + inputs (dict): ground truth info, including im, gt_bbox, gt_score + """ + anchors = [paddle.reshape(a, shape=(-1, 4)) for a in anchors] + anchors = paddle.concat(anchors) + + scores = [ + paddle.reshape( + paddle.transpose( + v, perm=[0, 2, 3, 1]), + shape=(v.shape[0], -1, 1)) for v in pred_scores + ] + scores = paddle.concat(scores, axis=1) + + deltas = [ + paddle.reshape( + paddle.transpose( + v, perm=[0, 2, 3, 1]), + shape=(v.shape[0], -1, 4)) for v in pred_deltas + ] + deltas = paddle.concat(deltas, axis=1) + + score_tgt, bbox_tgt, loc_tgt, norm = self.rpn_target_assign(inputs, + anchors) + + scores = paddle.reshape(x=scores, shape=(-1, )) + deltas = paddle.reshape(x=deltas, shape=(-1, 4)) + + score_tgt = paddle.concat(score_tgt) + score_tgt.stop_gradient = True + + pos_mask = score_tgt == 1 + pos_ind = paddle.nonzero(pos_mask) + + valid_mask = score_tgt >= 0 + valid_ind = paddle.nonzero(valid_mask) + + # cls loss + score_pred = paddle.gather(scores, valid_ind) + score_label = paddle.gather(score_tgt, valid_ind).cast('float32') + score_label.stop_gradient = True + loss_rpn_cls = F.binary_cross_entropy_with_logits( + logit=score_pred, label=score_label, reduction="sum") + + # reg loss + loc_pred = paddle.gather(deltas, pos_ind) + loc_tgt = paddle.concat(loc_tgt) + loc_tgt = paddle.gather(loc_tgt, pos_ind) + loc_tgt.stop_gradient = True + loss_rpn_reg = paddle.abs(loc_pred - loc_tgt).sum() + return { + 'loss_rpn_cls': loss_rpn_cls / norm, + 'loss_rpn_reg': loss_rpn_reg / norm + } diff --git a/dygraph/ppdet/modeling/proposal_generator/target.py b/dygraph/ppdet/modeling/proposal_generator/target.py new file mode 100644 index 0000000000000000000000000000000000000000..aa2ddba1d8eb5ba9c4cdf4449921bbe96d445f97 --- /dev/null +++ b/dygraph/ppdet/modeling/proposal_generator/target.py @@ -0,0 +1,326 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import math +import numpy as np +import paddle +from ..bbox_utils import bbox2delta, bbox_overlaps +import copy + + +def rpn_anchor_target(anchors, + gt_boxes, + rpn_batch_size_per_im, + rpn_positive_overlap, + rpn_negative_overlap, + rpn_fg_fraction, + use_random=True, + batch_size=1, + weights=[1., 1., 1., 1.]): + tgt_labels = [] + tgt_bboxes = [] + + tgt_deltas = [] + for i in range(batch_size): + gt_bbox = gt_boxes[i] + + # Step1: match anchor and gt_bbox + matches, match_labels, matched_vals = label_box( + anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True) + # Step2: sample anchor + fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im, + rpn_fg_fraction, 0, use_random) + # Fill with the ignore label (-1), then set positive and negative labels + labels = paddle.full(match_labels.shape, -1, dtype='int32') + labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds)) + labels = paddle.scatter(labels, bg_inds, paddle.zeros_like(bg_inds)) + # Step3: make output + matched_gt_boxes = paddle.gather(gt_bbox, matches) + + tgt_delta = bbox2delta(anchors, matched_gt_boxes, weights) + labels.stop_gradient = True + matched_gt_boxes.stop_gradient = True + tgt_delta.stop_gradient = True + tgt_labels.append(labels) + tgt_bboxes.append(matched_gt_boxes) + tgt_deltas.append(tgt_delta) + + return tgt_labels, tgt_bboxes, tgt_deltas + + +def label_box(anchors, gt_boxes, positive_overlap, negative_overlap, + allow_low_quality): + iou = bbox_overlaps(gt_boxes, anchors) + if iou.numel() == 0: + default_matches = paddle.full((iou.shape[1], ), 0, dtype='int64') + default_match_labels = paddle.full((iou.shape[1], ), -1, dtype='int32') + return default_matches, default_match_labels + matched_vals, matches = paddle.topk(iou, k=1, axis=0) + match_labels = paddle.full(matches.shape, -1, dtype='int32') + + match_labels = paddle.where(matched_vals < negative_overlap, + paddle.zeros_like(match_labels), match_labels) + match_labels = paddle.where(matched_vals >= positive_overlap, + paddle.ones_like(match_labels), match_labels) + if allow_low_quality: + highest_quality_foreach_gt = iou.max(axis=1, keepdim=True) + pred_inds_with_highest_quality = ( + iou == highest_quality_foreach_gt).cast('int32').sum(0, + keepdim=True) + match_labels = paddle.where(pred_inds_with_highest_quality > 0, + paddle.ones_like(match_labels), + match_labels) + + matches = matches.flatten() + match_labels = match_labels.flatten() + matched_vals = matched_vals.flatten() + return matches, match_labels, matched_vals + + +def subsample_labels(labels, + num_samples, + fg_fraction, + bg_label=0, + use_random=True): + positive = paddle.nonzero( + paddle.logical_and(labels != -1, labels != bg_label)) + negative = paddle.nonzero(labels == bg_label) + + positive = positive.cast('int32').flatten() + negative = negative.cast('int32').flatten() + + fg_num = int(num_samples * fg_fraction) + fg_num = min(positive.numel(), fg_num) + bg_num = num_samples - fg_num + bg_num = min(negative.numel(), bg_num) + # randomly select positive and negative examples + fg_perm = paddle.randperm(positive.numel(), dtype='int32') + fg_perm = paddle.slice(fg_perm, axes=[0], starts=[0], ends=[fg_num]) + bg_perm = paddle.randperm(negative.numel(), dtype='int32') + bg_perm = paddle.slice(bg_perm, axes=[0], starts=[0], ends=[bg_num]) + if use_random: + fg_inds = paddle.gather(positive, fg_perm) + bg_inds = paddle.gather(negative, bg_perm) + else: + fg_inds = paddle.slice(positive, axes=[0], starts=[0], ends=[fg_num]) + bg_inds = paddle.slice(negative, axes=[0], starts=[0], ends=[bg_num]) + return fg_inds, bg_inds + + +def filter_roi(rois, max_overlap): + ws = rois[:, 2] - rois[:, 0] + hs = rois[:, 3] - rois[:, 1] + valid_mask = paddle.logical_and(ws > 0, hs > 0, max_overlap < 1) + keep = paddle.nonzero(valid_mask) + if keep.numel() > 0: + return rois[keep[:, 1]] + return paddle.zeros((1, 4), dtype='float32') + + +def generate_proposal_target(rpn_rois, + gt_classes, + gt_boxes, + batch_size_per_im, + fg_fraction, + fg_thresh, + bg_thresh, + num_classes, + use_random=True, + is_cascade_rcnn=False, + max_overlaps=None): + + rois_with_gt = [] + tgt_labels = [] + tgt_bboxes = [] + sampled_max_overlaps = [] + tgt_gt_inds = [] + new_rois_num = [] + + for i, rpn_roi in enumerate(rpn_rois): + max_overlap = max_overlaps[i] if is_cascade_rcnn else None + gt_bbox = gt_boxes[i] + gt_classes = gt_classes[i] + if is_cascade_rcnn: + rpn_roi = filter_roi(rpn_roi, max_overlap) + bbox = paddle.concat([rpn_roi, gt_bbox]) + + # Step1: label bbox + matches, match_labels, matched_vals = label_box( + bbox, gt_bbox, fg_thresh, bg_thresh, False) + # Step2: sample bbox + sampled_inds, sampled_gt_classes = sample_bbox( + matches, match_labels, gt_classes, batch_size_per_im, fg_fraction, + num_classes, use_random) + + # Step3: make output + rois_per_image = paddle.gather(bbox, sampled_inds) + sampled_gt_ind = paddle.gather(matches, sampled_inds) + sampled_bbox = paddle.gather(gt_bbox, sampled_gt_ind) + sampled_overlap = paddle.gather(matched_vals, sampled_inds) + + rois_per_image.stop_gradient = True + sampled_gt_ind.stop_gradient = True + sampled_bbox.stop_gradient = True + sampled_overlap.stop_gradient = True + + tgt_labels.append(sampled_gt_classes) + tgt_bboxes.append(sampled_bbox) + rois_with_gt.append(rois_per_image) + sampled_max_overlaps.append(sampled_overlap) + tgt_gt_inds.append(sampled_gt_ind) + new_rois_num.append(paddle.shape(sampled_inds)[0]) + new_rois_num = paddle.concat(new_rois_num) + return rois_with_gt, tgt_labels, tgt_bboxes, tgt_gt_inds, new_rois_num, sampled_max_overlaps + + +def sample_bbox( + matches, + match_labels, + gt_classes, + batch_size_per_im, + fg_fraction, + num_classes, + use_random=True, ): + gt_classes = paddle.gather(gt_classes, matches) + gt_classes = paddle.where(match_labels == 0, + paddle.ones_like(gt_classes) * num_classes, + gt_classes) + gt_classes = paddle.where(match_labels == -1, + paddle.ones_like(gt_classes) * -1, gt_classes) + rois_per_image = int(batch_size_per_im) + + fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image, fg_fraction, + num_classes, use_random) + sampled_inds = paddle.concat([fg_inds, bg_inds]) + sampled_gt_classes = paddle.gather(gt_classes, sampled_inds) + return sampled_inds, sampled_gt_classes + + +def _strip_pad(gt_polys): + new_gt_polys = [] + for i in range(gt_polys.shape[0]): + gt_segs = [] + for j in range(gt_polys[i].shape[0]): + new_poly = [] + polys = gt_polys[i][j] + for ii in range(polys.shape[0]): + x, y = polys[ii] + if (x == -1 and y == -1): + continue + elif (x >= 0 or y >= 0): + new_poly.extend([x, y]) # array, one poly + if len(new_poly) > 6: + gt_segs.append(np.array(new_poly).astype('float64')) + new_gt_polys.append(gt_segs) + return new_gt_polys + + +def polygons_to_mask(polygons, height, width): + """ + Args: + polygons (list[ndarray]): each array has shape (Nx2,) + height, width (int) + Returns: + ndarray: a bool mask of shape (height, width) + """ + import pycocotools.mask as mask_util + assert len(polygons) > 0, "COCOAPI does not support empty polygons" + rles = mask_util.frPyObjects(polygons, height, width) + rle = mask_util.merge(rles) + return mask_util.decode(rle).astype(np.bool) + + +def rasterize_polygons_within_box(poly, box, resolution): + w, h = box[2] - box[0], box[3] - box[1] + + polygons = copy.deepcopy(poly) + for p in polygons: + p[0::2] = p[0::2] - box[0] + p[1::2] = p[1::2] - box[1] + + ratio_h = resolution / max(h, 0.1) + ratio_w = resolution / max(w, 0.1) + + if ratio_h == ratio_w: + for p in polygons: + p *= ratio_h + else: + for p in polygons: + p[0::2] *= ratio_w + p[1::2] *= ratio_h + + # 3. Rasterize the polygons with coco api + mask = polygons_to_mask(polygons, resolution, resolution) + mask = paddle.to_tensor(mask, dtype='int32') + return mask + + +def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds, + num_classes, resolution): + mask_rois = [] + mask_rois_num = [] + tgt_masks = [] + tgt_classes = [] + mask_index = [] + tgt_weights = [] + for k in range(len(rois)): + has_fg = True + rois_per_im = rois[k] + gt_segms_per_im = gt_segms[k] + labels_per_im = labels_int32[k] + fg_inds = paddle.nonzero( + paddle.logical_and(labels_per_im != -1, labels_per_im != + num_classes)) + if fg_inds.numel() == 0: + has_fg = False + fg_inds = paddle.ones([1], dtype='int32') + + inds_per_im = sampled_gt_inds[k] + inds_per_im = paddle.gather(inds_per_im, fg_inds) + + gt_segms_per_im = paddle.gather(gt_segms_per_im, inds_per_im) + + fg_rois = paddle.gather(rois_per_im, fg_inds) + fg_classes = paddle.gather(labels_per_im, fg_inds) + fg_segms = paddle.gather(gt_segms_per_im, fg_inds) + weight = paddle.ones([fg_rois.shape[0]], dtype='float32') + if not has_fg: + weight = weight - 1 + # remove padding + gt_polys = fg_segms.numpy() + boxes = fg_rois.numpy() + new_gt_polys = _strip_pad(gt_polys) + results = [ + rasterize_polygons_within_box(poly, box, resolution) + for poly, box in zip(new_gt_polys, boxes) + ] + tgt_mask = paddle.stack(results) + tgt_mask.stop_gradient = True + fg_rois.stop_gradient = True + + mask_index.append(fg_inds) + mask_rois.append(fg_rois) + mask_rois_num.append(paddle.shape(fg_rois)[0]) + tgt_classes.append(fg_classes) + tgt_masks.append(tgt_mask) + tgt_weights.append(weight) + + mask_index = paddle.concat(mask_index) + mask_rois_num = paddle.concat(mask_rois_num) + tgt_classes = paddle.concat(tgt_classes, axis=0) + tgt_masks = paddle.concat(tgt_masks, axis=0) + tgt_weights = paddle.concat(tgt_weights, axis=0) + + return mask_rois, mask_rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights diff --git a/dygraph/ppdet/modeling/proposal_generator/target_layer.py b/dygraph/ppdet/modeling/proposal_generator/target_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..ed6c08651a92bb9cf51386e1570f50c0af5cd84b --- /dev/null +++ b/dygraph/ppdet/modeling/proposal_generator/target_layer.py @@ -0,0 +1,115 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle + +from ppdet.core.workspace import register, serializable + +from .target import rpn_anchor_target, generate_proposal_target, generate_mask_target + + +@register +@serializable +class RPNTargetAssign(object): + def __init__(self, + batch_size_per_im=256, + fg_fraction=0.5, + positive_overlap=0.7, + negative_overlap=0.3, + use_random=True): + super(RPNTargetAssign, self).__init__() + self.batch_size_per_im = batch_size_per_im + self.fg_fraction = fg_fraction + self.positive_overlap = positive_overlap + self.negative_overlap = negative_overlap + self.use_random = use_random + + def __call__(self, inputs, anchors): + """ + inputs: ground-truth instances. + anchor_box (Tensor): [num_anchors, 4], num_anchors are all anchors in all feature maps. + """ + gt_boxes = inputs['gt_bbox'] + batch_size = gt_boxes.shape[0] + tgt_labels, tgt_bboxes, tgt_deltas = rpn_anchor_target( + anchors, gt_boxes, self.batch_size_per_im, self.positive_overlap, + self.negative_overlap, self.fg_fraction, self.use_random, + batch_size) + norm = self.batch_size_per_im * batch_size + + return tgt_labels, tgt_bboxes, tgt_deltas, norm + + +@register +class BBoxAssigner(object): + __shared__ = ['num_classes'] + + def __init__(self, + batch_size_per_im=512, + fg_fraction=.25, + fg_thresh=[.5, ], + bg_thresh=[.5, ], + use_random=True, + is_cls_agnostic=False, + num_classes=80): + super(BBoxAssigner, self).__init__() + self.batch_size_per_im = batch_size_per_im + self.fg_fraction = fg_fraction + self.fg_thresh = fg_thresh + self.bg_thresh = bg_thresh + self.use_random = use_random + self.is_cls_agnostic = is_cls_agnostic + self.num_classes = num_classes + + def __call__(self, + rpn_rois, + rpn_rois_num, + inputs, + stage=0, + max_overlap=None): + is_cascade = True if stage > 0 else False + gt_classes = inputs['gt_class'] + gt_boxes = inputs['gt_bbox'] + # rois, tgt_labels, tgt_bboxes, tgt_gt_inds + # new_rois_num, sampled_max_overlaps + outs = generate_proposal_target( + rpn_rois, gt_classes, gt_boxes, self.batch_size_per_im, + self.fg_fraction, self.fg_thresh[stage], self.bg_thresh[stage], + self.num_classes, self.use_random, is_cascade, max_overlap) + rois = outs[0] + rois_num = outs[-2] + max_overlaps = outs[-1] + # tgt_labels, tgt_bboxes, tgt_gt_inds + targets = outs[1:4] + return rois, rois_num, max_overlaps, targets + + +@register +@serializable +class MaskAssigner(object): + __shared__ = ['num_classes', 'mask_resolution'] + + def __init__(self, num_classes=80, mask_resolution=14): + super(MaskAssigner, self).__init__() + self.num_classes = num_classes + self.mask_resolution = mask_resolution + + def __call__(self, rois, tgt_labels, tgt_gt_inds, inputs): + gt_segms = inputs['gt_poly'] + + outs = generate_mask_target(gt_segms, rois, tgt_labels, tgt_gt_inds, + self.num_classes, self.mask_resolution) + + # mask_rois, mask_rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights + return outs diff --git a/dygraph/ppdet/modeling/shape_spec.py b/dygraph/ppdet/modeling/shape_spec.py new file mode 100644 index 0000000000000000000000000000000000000000..78e4a3b00bee3b79e70ae75d1a22ce780bd9d7be --- /dev/null +++ b/dygraph/ppdet/modeling/shape_spec.py @@ -0,0 +1,32 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import namedtuple + + +class ShapeSpec( + namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): + """ + A simple structure that contains basic shape specification about a tensor. + It is often used as the auxiliary inputs/outputs of models, + to complement the lack of shape inference ability among paddle modules. + Attributes: + channels: + height: + width: + stride: + """ + + def __new__(cls, *, channels=None, height=None, width=None, stride=None): + return super().__new__(cls, channels, height, width, stride) diff --git a/dygraph/ppdet/py_op/__init__.py b/dygraph/ppdet/py_op/__init__.py index 3ceb1cef677e14329a66c492bd652a48632d4119..d48118906e65f80ebd15bdba7f5779a97b67bbdb 100644 --- a/dygraph/ppdet/py_op/__init__.py +++ b/dygraph/ppdet/py_op/__init__.py @@ -1,4 +1 @@ -from .bbox import * -from .mask import * -from .target import * from .post_process import * diff --git a/dygraph/ppdet/py_op/bbox.py b/dygraph/ppdet/py_op/bbox.py deleted file mode 100755 index dec8819169df8bfbc5561d960d5660e096e08795..0000000000000000000000000000000000000000 --- a/dygraph/ppdet/py_op/bbox.py +++ /dev/null @@ -1,262 +0,0 @@ -import numpy as np -from numba import jit - - -@jit -def bbox2delta(bboxes1, bboxes2, weights): - ex_w = bboxes1[:, 2] - bboxes1[:, 0] + 1 - ex_h = bboxes1[:, 3] - bboxes1[:, 1] + 1 - ex_ctr_x = bboxes1[:, 0] + 0.5 * ex_w - ex_ctr_y = bboxes1[:, 1] + 0.5 * ex_h - - gt_w = bboxes2[:, 2] - bboxes2[:, 0] + 1 - gt_h = bboxes2[:, 3] - bboxes2[:, 1] + 1 - gt_ctr_x = bboxes2[:, 0] + 0.5 * gt_w - gt_ctr_y = bboxes2[:, 1] + 0.5 * gt_h - - dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0] - dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1] - dw = (np.log(gt_w / ex_w)) / weights[2] - dh = (np.log(gt_h / ex_h)) / weights[3] - - deltas = np.vstack([dx, dy, dw, dh]).transpose() - return deltas - - -@jit -def delta2bbox(deltas, boxes, weights, bbox_clip=4.13): - if boxes.shape[0] == 0: - return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) - boxes = boxes.astype(deltas.dtype, copy=False) - - widths = boxes[:, 2] - boxes[:, 0] + 1.0 - heights = boxes[:, 3] - boxes[:, 1] + 1.0 - ctr_x = boxes[:, 0] + 0.5 * widths - ctr_y = boxes[:, 1] + 0.5 * heights - - wx, wy, ww, wh = weights - dx = deltas[:, 0::4] * wx - dy = deltas[:, 1::4] * wy - dw = deltas[:, 2::4] * ww - dh = deltas[:, 3::4] * wh - - # Prevent sending too large values into np.exp() - dw = np.minimum(dw, bbox_clip) - dh = np.minimum(dh, bbox_clip) - - pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] - pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] - pred_w = np.exp(dw) * widths[:, np.newaxis] - pred_h = np.exp(dh) * heights[:, np.newaxis] - - pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) - # x1 - pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w - # y1 - pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h - # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) - pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 - # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) - pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 - - return pred_boxes - - -@jit -def expand_bbox(bboxes, scale): - w_half = (bboxes[:, 2] - bboxes[:, 0]) * .5 - h_half = (bboxes[:, 3] - bboxes[:, 1]) * .5 - x_c = (bboxes[:, 2] + bboxes[:, 0]) * .5 - y_c = (bboxes[:, 3] + bboxes[:, 1]) * .5 - - w_half *= scale - h_half *= scale - - bboxes_exp = np.zeros(bboxes.shape, dtype=np.float32) - bboxes_exp[:, 0] = x_c - w_half - bboxes_exp[:, 2] = x_c + w_half - bboxes_exp[:, 1] = y_c - h_half - bboxes_exp[:, 3] = y_c + h_half - - return bboxes_exp - - -@jit -def clip_bbox(boxes, im_shape): - assert boxes.shape[1] % 4 == 0, \ - 'boxes.shape[1] is {:d}, but must be divisible by 4.'.format( - boxes.shape[1] - ) - # x1 >= 0 - boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) - # y1 >= 0 - boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) - # x2 < im_shape[1] - boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) - # y2 < im_shape[0] - boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) - return boxes - - -@jit -def bbox_overlaps(bboxes1, bboxes2): - w1 = np.maximum(bboxes1[:, 2] - bboxes1[:, 0] + 1, 0) - h1 = np.maximum(bboxes1[:, 3] - bboxes1[:, 1] + 1, 0) - w2 = np.maximum(bboxes2[:, 2] - bboxes2[:, 0] + 1, 0) - h2 = np.maximum(bboxes2[:, 3] - bboxes2[:, 1] + 1, 0) - area1 = w1 * h1 - area2 = w2 * h2 - - boxes1_x1, boxes1_y1, boxes1_x2, boxes1_y2 = np.split(bboxes1, 4, axis=1) - boxes2_x1, boxes2_y1, boxes2_x2, boxes2_y2 = np.split(bboxes2, 4, axis=1) - - all_pairs_min_ymax = np.minimum(boxes1_y2, np.transpose(boxes2_y2)) - all_pairs_max_ymin = np.maximum(boxes1_y1, np.transpose(boxes2_y1)) - inter_h = np.maximum(all_pairs_min_ymax - all_pairs_max_ymin + 1, 0.) - all_pairs_min_xmax = np.minimum(boxes1_x2, np.transpose(boxes2_x2)) - all_pairs_max_xmin = np.maximum(boxes1_x1, np.transpose(boxes2_x1)) - inter_w = np.maximum(all_pairs_min_xmax - all_pairs_max_xmin + 1, 0.) - - inter_area = inter_w * inter_h - - union_area = np.expand_dims(area1, 1) + np.expand_dims(area2, 0) - overlaps = inter_area / (union_area - inter_area) - return overlaps - - -@jit -def nms(dets, thresh): - if dets.shape[0] == 0: - return [] - scores = dets[:, 0] - x1 = dets[:, 1] - y1 = dets[:, 2] - x2 = dets[:, 3] - y2 = dets[:, 4] - areas = (x2 - x1 + 1) * (y2 - y1 + 1) - order = scores.argsort()[::-1] - - ndets = dets.shape[0] - suppressed = np.zeros((ndets), dtype=np.int) - - for _i in range(ndets): - i = order[_i] - if suppressed[i] == 1: - continue - ix1 = x1[i] - iy1 = y1[i] - ix2 = x2[i] - iy2 = y2[i] - iarea = areas[i] - for _j in range(_i + 1, ndets): - j = order[_j] - if suppressed[j] == 1: - continue - xx1 = max(ix1, x1[j]) - yy1 = max(iy1, y1[j]) - xx2 = min(ix2, x2[j]) - yy2 = min(iy2, y2[j]) - w = max(0.0, xx2 - xx1 + 1) - h = max(0.0, yy2 - yy1 + 1) - inter = w * h - ovr = inter / (iarea + areas[j] - inter) - if ovr >= thresh: - suppressed[j] = 1 - - return np.where(suppressed == 0)[0] - - -def nms_with_decode(bboxes, - bbox_probs, - bbox_deltas, - im_info, - keep_top_k=100, - score_thresh=0.05, - nms_thresh=0.5, - class_nums=81, - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2]): - bboxes_num = [0, bboxes.shape[0]] - bboxes_v = np.array(bboxes) - bbox_probs_v = np.array(bbox_probs) - bbox_deltas_v = np.array(bbox_deltas) - variance_v = np.array(bbox_reg_weights) - im_results = [[] for _ in range(len(bboxes_num) - 1)] - new_bboxes_num = [0] - for i in range(len(bboxes_num) - 1): - start = bboxes_num[i] - end = bboxes_num[i + 1] - if start == end: - continue - - bbox_deltas_n = bbox_deltas_v[start:end, :] # box delta - rois_n = bboxes_v[start:end, :] # box - rois_n = rois_n / im_info[i][2] # scale - rois_n = delta2bbox(bbox_deltas_n, rois_n, variance_v) - rois_n = clip_bbox(rois_n, np.round(im_info[i][:2] / im_info[i][2])) - cls_boxes = [[] for _ in range(class_nums)] - scores_n = bbox_probs_v[start:end, :] - for j in range(1, class_nums): - inds = np.where(scores_n[:, j] > score_thresh)[0] - scores_j = scores_n[inds, j] - rois_j = rois_n[inds, j * 4:(j + 1) * 4] - dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype( - np.float32, copy=False) - keep = nms(dets_j, nms_thresh) - nms_dets = dets_j[keep, :] - #add labels - label = np.array([j for _ in range(len(keep))]) - nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype( - np.float32, copy=False) - cls_boxes[j] = nms_dets - - # Limit to max_per_image detections **over all classes** - image_scores = np.hstack( - [cls_boxes[j][:, 1] for j in range(1, class_nums)]) - if len(image_scores) > keep_top_k: - image_thresh = np.sort(image_scores)[-keep_top_k] - for j in range(1, class_nums): - keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0] - cls_boxes[j] = cls_boxes[j][keep, :] - im_results_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)]) - im_results[i] = im_results_n - new_bboxes_num.append(len(im_results_n) + new_bboxes_num[-1]) - labels = im_results_n[:, 0] - scores = im_results_n[:, 1] - boxes = im_results_n[:, 2:] - im_results = np.vstack([im_results[k] for k in range(len(bboxes_num) - 1)]) - new_bboxes_num = np.array(new_bboxes_num) - return new_bboxes_num, im_results - - -@jit -def compute_bbox_targets(bboxes1, bboxes2, labels, bbox_reg_weights): - assert bboxes1.shape[0] == bboxes2.shape[0] - assert bboxes1.shape[1] == 4 - assert bboxes2.shape[1] == 4 - - targets = np.zeros(bboxes1.shape) - bbox_reg_weights = np.asarray(bbox_reg_weights) - targets = bbox2delta( - bboxes1=bboxes1, bboxes2=bboxes2, weights=bbox_reg_weights) - - return np.hstack([labels[:, np.newaxis], targets]).astype( - np.float32, copy=False) - - -#@jit -def expand_bbox_targets(bbox_targets_input, - class_nums=81, - is_cls_agnostic=False): - class_labels = bbox_targets_input[:, 0] - fg_inds = np.where(class_labels > 0)[0] - if is_cls_agnostic: - class_nums = 2 - bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums)) - bbox_inside_weights = np.zeros(bbox_targets.shape) - for ind in fg_inds: - class_label = int(class_labels[ind]) if not is_cls_agnostic else 1 - start_ind = class_label * 4 - end_ind = class_label * 4 + 4 - bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind, 1:] - bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0, 1.0) - return bbox_targets, bbox_inside_weights diff --git a/dygraph/ppdet/py_op/mask.py b/dygraph/ppdet/py_op/mask.py deleted file mode 100755 index 9de446f85bed09d7b02e2eb0f2ce08c61626d468..0000000000000000000000000000000000000000 --- a/dygraph/ppdet/py_op/mask.py +++ /dev/null @@ -1,202 +0,0 @@ -import six -import math -import numpy as np -from numba import jit - - -@jit -def decode(cnts, m): - v = 0 - mask = [] - for j in range(m): - for k in range(cnts[j]): - mask.append(v) - v = 1 - v - return mask - - -#@jit -def poly2mask(xy, k, h, w): - scale = 5. - x = [int(scale * p + 0.5) for p in xy[::2]] - x = x + [x[0]] - y = [int(scale * p + 0.5) for p in xy[1::2]] - y = y + [y[0]] - m = sum([ - int(max(abs(x[j] - x[j + 1]), abs(y[j] - y[j + 1]))) + int(1) - for j in range(k) - ]) - u, v = [], [] - for j in range(k): - xs = x[j] - xe = x[j + 1] - ys = y[j] - ye = y[j + 1] - dx = abs(xe - xs) - dy = abs(ys - ye) - flip = (dx >= dy and xs > xe) or (dx < dy and ys > ye) - if flip: - xs, xe = xe, xs - ys, ye = ye, ys - - if dx >= dy: - if (dx == 0): - assert ye - ys == 0 - - s = 0 if dx == 0 else float(ye - ys) / dx - else: - if (dy == 0): - assert xe - xs == 0 - s = 0 if dy == 0 else float(xe - xs) / dy - - if dx >= dy: - ts = [dx - d if flip else d for d in range(dx + 1)] - u.extend([xs + t for t in ts]) - v.extend([int(ys + s * t + .5) for t in ts]) - else: - ts = [dy - d if flip else d for d in range(dy + 1)] - v.extend([t + ys for t in ts]) - u.extend([int(xs + s * t + .5) for t in ts]) - - k = len(u) - x = np.zeros((k), np.int) - y = np.zeros((k), np.int) - m = 0 - for j in six.moves.xrange(1, k): - if u[j] != u[j - 1]: - xd = float(u[j] if (u[j] < u[j - 1]) else (u[j] - 1)) - xd = (xd + .5) / scale - .5 - if (math.floor(xd) != xd or xd < 0 or xd > (w - 1)): - continue - yd = float(v[j] if v[j] < v[j - 1] else v[j - 1]) - yd = (yd + .5) / scale - .5 - yd = math.ceil(0 if yd < 0 else (h if yd > h else yd)) - x[m] = int(xd) - y[m] = int(yd) - m += 1 - k = m - a = [int(x[i] * h + y[i]) for i in range(k)] - a.append(h * w) - a.sort() - b = [0] + a[:len(a) - 1] - a = [c - d for (c, d) in zip(a, b)] - - k += 1 - b = [0 for i in range(k)] - b[0] = a[0] - m, j = 1, 1 - while (j < k): - if a[j] > 0: - b[m] = a[j] - m += 1 - j += 1 - else: - j += 1 - if (j < k): - b[m - 1] += a[j] - j += 1 - mask = decode(b, m) - mask = np.array(mask, dtype=np.int).reshape((w, h)) - mask = mask.transpose((1, 0)) - return mask - - -def polys_to_boxes(polys): - """Convert a list of polygons into an array of tight bounding boxes.""" - boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32) - for j in range(len(polys)): - x_min, y_min = 10000000, 10000000 - x_max, y_max = 0, 0 - for i in range(len(polys[j])): - poly = polys[j][i] - x0 = min(min(p[::2]) for p in poly) - x_min = min(x0, x_min) - y0 = min(min(p[1::2]) for p in poly) - y_min = min(y0, y_min) - x1 = max(max(p[::2]) for p in poly) - x_max = max(x_max, x1) - y1 = max(max(p[1::2]) for p in poly) - y_max = max(y1, y_max) - boxes_from_polys[j, :] = [x_min, y_min, x_max, y_max] - return boxes_from_polys - - -@jit -def bbox_overlaps_mask(boxes, query_boxes): - N = boxes.shape[0] - K = query_boxes.shape[0] - overlaps = np.zeros((N, K), dtype=boxes.dtype) - for k in range(K): - box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) *\ - (query_boxes[k, 3] - query_boxes[k, 1] + 1) - for n in range(N): - iw = min(boxes[n, 2], query_boxes[k, 2]) -\ - max(boxes[n, 0], query_boxes[k, 0]) + 1 - if iw > 0: - ih = min(boxes[n, 3], query_boxes[k, 3]) -\ - max(boxes[n, 1], query_boxes[k, 1]) + 1 - if ih > 0: - ua = float( - (boxes[n, 2] - boxes[n, 0] + 1) *\ - (boxes[n, 3] - boxes[n, 1] + 1) +\ - box_area - iw * ih) - overlaps[n, k] = iw * ih / ua - return overlaps - - -@jit -def polys_to_mask_wrt_box(polygons, box, M): - """Convert from the COCO polygon segmentation format to a binary mask - encoded as a 2D array of data type numpy.float32. The polygon segmentation - is understood to be enclosed in the given box and rasterized to an M x M - mask. The resulting mask is therefore of shape (M, M). - """ - w = box[2] - box[0] - h = box[3] - box[1] - w = np.maximum(w, 1) - h = np.maximum(h, 1) - - polygons_norm = [] - i = 0 - for poly in polygons: - p = np.array(poly, dtype=np.float32) - p = p.reshape(-1) - p[0::2] = (p[0::2] - box[0]) * M / w - p[1::2] = (p[1::2] - box[1]) * M / h - polygons_norm.append(p) - - mask = [] - for polygons in polygons_norm: - assert polygons.shape[0] % 2 == 0, polygons.shape - k = polygons.shape[0] // 2 - - one_msk = poly2mask(polygons, k, M, M) - mask.append(one_msk) - - mask = np.array(mask) - # Flatten in case polygons was a list - mask = np.sum(mask, axis=0) - mask = np.array(mask > 0, dtype=np.float32) - return mask - - -#@jit -def expand_mask_targets(masks, mask_class_labels, resolution, num_classes): - """Expand masks from shape (#masks, resolution ** 2) - to (#masks, #classes * resolution ** 2) to encode class - specific mask targets. - """ - assert masks.shape[0] == mask_class_labels.shape[0] - # Target values of -1 are "don't care" / ignore labels - mask_targets = -np.ones( - (masks.shape[0], num_classes * resolution**2), dtype=np.int32) - for i in range(masks.shape[0]): - cls = int(mask_class_labels[i]) - start = resolution**2 * cls - end = start + resolution**2 - # Ignore background instance - # (only happens when there is no fg samples in an image) - if cls > 0: - mask_targets[i, start:end] = masks[i, :] - - return mask_targets diff --git a/dygraph/ppdet/py_op/post_process.py b/dygraph/ppdet/py_op/post_process.py index a42946e8128ae18d3e0f9fdaaec9dca0f781f236..fcaeb2861066ee8bcbbc7b223a0268b056315f25 100755 --- a/dygraph/ppdet/py_op/post_process.py +++ b/dygraph/ppdet/py_op/post_process.py @@ -1,157 +1,26 @@ import six import os import numpy as np -from numba import jit -from .bbox import delta2bbox, clip_bbox, expand_bbox, nms -import pycocotools.mask as mask_util import cv2 -def bbox_post_process(bboxes, - bbox_prob, - bbox_deltas, - im_shape, - scale_factor, - keep_top_k=100, - score_thresh=0.05, - nms_thresh=0.5, - class_nums=81, - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - with_background=True): - bbox, bbox_num = bboxes - new_bbox = [[] for _ in range(len(bbox_num))] - new_bbox_num = [] - st_num = 0 - end_num = 0 - for i in range(len(bbox_num)): - box_num = bbox_num[i] - end_num += box_num - - boxes = bbox[st_num:end_num, :] # bbox - boxes = boxes / scale_factor[i] # scale - bbox_delta = bbox_deltas[st_num:end_num, :, :] # bbox delta - bbox_delta = np.reshape(bbox_delta, (box_num, -1)) - # step1: decode - boxes = delta2bbox(bbox_delta, boxes, bbox_reg_weights) - - # step2: clip - boxes = clip_bbox(boxes, im_shape[i][:2] / scale_factor[i]) - # step3: nms - cls_boxes = [[] for _ in range(class_nums)] - scores_n = bbox_prob[st_num:end_num, :] - for j in range(with_background, class_nums): - inds = np.where(scores_n[:, j] > score_thresh)[0] - scores_j = scores_n[inds, j] - rois_j = boxes[inds, j * 4:(j + 1) * 4] - dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype( - np.float32, copy=False) - keep = nms(dets_j, nms_thresh) - nms_dets = dets_j[keep, :] - #add labels - label = np.array([j for _ in range(len(keep))]) - nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype( - np.float32, copy=False) - cls_boxes[j] = nms_dets - - st_num += box_num - - # Limit to max_per_image detections **over all classes** - image_scores = np.hstack( - [cls_boxes[j][:, 1] for j in range(with_background, class_nums)]) - if len(image_scores) > keep_top_k: - image_thresh = np.sort(image_scores)[-keep_top_k] - for j in range(with_background, class_nums): - keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0] - cls_boxes[j] = cls_boxes[j][keep, :] - new_bbox_n = np.vstack( - [cls_boxes[j] for j in range(with_background, class_nums)]) - new_bbox[i] = new_bbox_n - new_bbox_num.append(len(new_bbox_n)) - new_bbox = np.vstack([new_bbox[k] for k in range(len(bbox_num))]) - new_bbox_num = np.array(new_bbox_num).astype('int32') - return new_bbox, new_bbox_num - - -@jit -def mask_post_process(det_res, - im_shape, - scale_factor, - resolution=14, - binary_thresh=0.5): - bbox = det_res['bbox'] - bbox_num = det_res['bbox_num'] - masks = det_res['mask'] - if masks.shape[0] == 0: - return masks - M = resolution - scale = (M + 2.0) / M - boxes = bbox[:, 2:] - labels = bbox[:, 0] - segms_results = [[] for _ in range(len(bbox_num))] - sum = 0 - st_num = 0 - end_num = 0 - for i in range(len(bbox_num)): - length = bbox_num[i] - end_num += length - cls_segms = [] - boxes_n = boxes[st_num:end_num] - labels_n = labels[st_num:end_num] - masks_n = masks[st_num:end_num] - - im_h = int(round(im_shape[i][0] / scale_factor[i, 0])) - im_w = int(round(im_shape[i][1] / scale_factor[i, 0])) - boxes_n = expand_bbox(boxes_n, scale) - boxes_n = boxes_n.astype(np.int32) - padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) - for j in range(len(boxes_n)): - class_id = int(labels_n[j]) - padded_mask[1:-1, 1:-1] = masks_n[j, class_id, :, :] - - ref_box = boxes_n[j, :] - w = ref_box[2] - ref_box[0] + 1 - h = ref_box[3] - ref_box[1] + 1 - w = np.maximum(w, 1) - h = np.maximum(h, 1) - - mask = cv2.resize(padded_mask, (w, h)) - mask = np.array(mask > binary_thresh, dtype=np.uint8) - im_mask = np.zeros((im_h, im_w), dtype=np.uint8) - - x_0 = max(ref_box[0], 0) - x_1 = min(ref_box[2] + 1, im_w) - y_0 = max(ref_box[1], 0) - y_1 = min(ref_box[3] + 1, im_h) - im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[ - 1]), (x_0 - ref_box[0]):(x_1 - ref_box[0])] - sum += im_mask.sum() - rle = mask_util.encode( - np.array( - im_mask[:, :, np.newaxis], order='F'))[0] - cls_segms.append(rle) - segms_results[i] = np.array(cls_segms)[:, np.newaxis] - st_num += length - segms_results = np.vstack([segms_results[k] for k in range(len(bbox_num))]) - bboxes = np.hstack([segms_results, bbox]) - return bboxes[:, :3] - - -@jit -def get_det_res(bboxes, bbox_nums, image_id, num_id_to_cat_id_map): +def get_det_res(bboxes, scores, labels, bbox_nums, image_id, + label_to_cat_id_map): det_res = [] k = 0 for i in range(len(bbox_nums)): cur_image_id = int(image_id[i][0]) det_nums = bbox_nums[i] for j in range(det_nums): - dt = bboxes[k] + box = bboxes[k] + score = float(scores[k]) + label = int(labels[k]) + if label < 0: continue k = k + 1 - num_id, score, xmin, ymin, xmax, ymax = dt.tolist() - if num_id < 0: - continue - category_id = num_id_to_cat_id_map[num_id] - w = xmax - xmin + 1 - h = ymax - ymin + 1 + xmin, ymin, xmax, ymax = box.tolist() + category_id = label_to_cat_id_map[label] + w = xmax - xmin + h = ymax - ymin bbox = [xmin, ymin, w, h] dt_res = { 'image_id': cur_image_id, @@ -163,25 +32,30 @@ def get_det_res(bboxes, bbox_nums, image_id, num_id_to_cat_id_map): return det_res -@jit -def get_seg_res(masks, mask_nums, image_id, num_id_to_cat_id_map): +def get_seg_res(masks, scores, labels, mask_nums, image_id, + label_to_cat_id_map): + import pycocotools.mask as mask_util seg_res = [] k = 0 for i in range(len(mask_nums)): cur_image_id = int(image_id[i][0]) det_nums = mask_nums[i] for j in range(det_nums): - dt = masks[k] + mask = masks[k] + score = float(scores[k]) + label = int(labels[k]) k = k + 1 - sg, num_id, score = dt.tolist() - cat_id = num_id_to_cat_id_map[num_id] + cat_id = label_to_cat_id_map[label] + rle = mask_util.encode( + np.array( + mask[:, :, None], order="F", dtype="uint8"))[0] if six.PY3: - if 'counts' in sg: - sg['counts'] = sg['counts'].decode("utf8") + if 'counts' in rle: + rle['counts'] = rle['counts'].decode("utf8") sg_res = { 'image_id': cur_image_id, 'category_id': cat_id, - 'segmentation': sg, + 'segmentation': rle, 'score': score } seg_res.append(sg_res) diff --git a/dygraph/ppdet/py_op/target.py b/dygraph/ppdet/py_op/target.py deleted file mode 100755 index 6278adf283ffc4c6a65690b6f4d420b7ab4a2913..0000000000000000000000000000000000000000 --- a/dygraph/ppdet/py_op/target.py +++ /dev/null @@ -1,409 +0,0 @@ -import six -import math -import numpy as np -from numba import jit -from .bbox import * -from .mask import * - - -@jit -def generate_rpn_anchor_target(anchors, - gt_boxes, - is_crowd, - im_info, - rpn_straddle_thresh, - rpn_batch_size_per_im, - rpn_positive_overlap, - rpn_negative_overlap, - rpn_fg_fraction, - use_random=True, - anchor_reg_weights=[1., 1., 1., 1.]): - anchor_num = anchors.shape[0] - batch_size = gt_boxes.shape[0] - - loc_indexes = [] - cls_indexes = [] - tgt_labels = [] - tgt_deltas = [] - anchor_inside_weights = [] - - for i in range(batch_size): - - # TODO: move anchor filter into anchor generator - im_height = im_info[i][0] - im_width = im_info[i][1] - im_scale = im_info[i][2] - if rpn_straddle_thresh >= 0: - anchor_inds = np.where((anchors[:, 0] >= -rpn_straddle_thresh) & ( - anchors[:, 1] >= -rpn_straddle_thresh) & ( - anchors[:, 2] < im_width + rpn_straddle_thresh) & ( - anchors[:, 3] < im_height + rpn_straddle_thresh))[0] - anchor = anchors[anchor_inds, :] - else: - anchor_inds = np.arange(anchors.shape[0]) - anchor = anchors - - gt_bbox = gt_boxes[i] * im_scale - is_crowd_slice = is_crowd[i] - not_crowd_inds = np.where(is_crowd_slice == 0)[0] - gt_bbox = gt_bbox[not_crowd_inds] - - # Step1: match anchor and gt_bbox - anchor_gt_bbox_inds, anchor_gt_bbox_iou, labels = label_anchor(anchor, - gt_bbox) - - # Step2: sample anchor - fg_inds, bg_inds, fg_fake_inds, fake_num = sample_anchor( - anchor_gt_bbox_iou, labels, rpn_positive_overlap, - rpn_negative_overlap, rpn_batch_size_per_im, rpn_fg_fraction, - use_random) - - # Step3: make output - loc_inds = np.hstack([fg_fake_inds, fg_inds]) - cls_inds = np.hstack([fg_inds, bg_inds]) - - sampled_labels = labels[cls_inds] - - sampled_anchors = anchor[loc_inds] - sampled_gt_boxes = gt_bbox[anchor_gt_bbox_inds[loc_inds]] - sampled_deltas = bbox2delta(sampled_anchors, sampled_gt_boxes, - anchor_reg_weights) - - anchor_inside_weight = np.zeros((len(loc_inds), 4), dtype=np.float32) - anchor_inside_weight[fake_num:, :] = 1 - - loc_indexes.append(anchor_inds[loc_inds] + i * anchor_num) - cls_indexes.append(anchor_inds[cls_inds] + i * anchor_num) - tgt_labels.append(sampled_labels) - tgt_deltas.append(sampled_deltas) - anchor_inside_weights.append(anchor_inside_weight) - - loc_indexes = np.concatenate(loc_indexes) - cls_indexes = np.concatenate(cls_indexes) - tgt_labels = np.concatenate(tgt_labels).astype('float32') - tgt_deltas = np.vstack(tgt_deltas).astype('float32') - anchor_inside_weights = np.vstack(anchor_inside_weights) - - return loc_indexes, cls_indexes, tgt_labels, tgt_deltas, anchor_inside_weights - - -@jit -def label_anchor(anchors, gt_boxes): - iou = bbox_overlaps(anchors, gt_boxes) - # every gt's anchor's index - gt_bbox_anchor_inds = iou.argmax(axis=0) - gt_bbox_anchor_iou = iou[gt_bbox_anchor_inds, np.arange(iou.shape[1])] - gt_bbox_anchor_iou_inds = np.where(iou == gt_bbox_anchor_iou)[0] - - # every anchor's gt bbox's index - anchor_gt_bbox_inds = iou.argmax(axis=1) - anchor_gt_bbox_iou = iou[np.arange(iou.shape[0]), anchor_gt_bbox_inds] - - labels = np.ones((iou.shape[0], ), dtype=np.int32) * -1 - labels[gt_bbox_anchor_iou_inds] = 1 - - return anchor_gt_bbox_inds, anchor_gt_bbox_iou, labels - - -@jit -def sample_anchor(anchor_gt_bbox_iou, - labels, - rpn_positive_overlap, - rpn_negative_overlap, - rpn_batch_size_per_im, - rpn_fg_fraction, - use_random=True): - - labels[anchor_gt_bbox_iou >= rpn_positive_overlap] = 1 - num_fg = int(rpn_fg_fraction * rpn_batch_size_per_im) - fg_inds = np.where(labels == 1)[0] - if len(fg_inds) > num_fg and use_random: - disable_inds = np.random.choice( - fg_inds, size=(len(fg_inds) - num_fg), replace=False) - else: - disable_inds = fg_inds[num_fg:] - labels[disable_inds] = -1 - fg_inds = np.where(labels == 1)[0] - - num_bg = rpn_batch_size_per_im - np.sum(labels == 1) - bg_inds = np.where(anchor_gt_bbox_iou < rpn_negative_overlap)[0] - if len(bg_inds) > num_bg and use_random: - enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)] - else: - enable_inds = bg_inds[:num_bg] - - fg_fake_inds = np.array([], np.int32) - fg_value = np.array([fg_inds[0]], np.int32) - fake_num = 0 - for bg_id in enable_inds: - if bg_id in fg_inds: - fake_num += 1 - fg_fake_inds = np.hstack([fg_fake_inds, fg_value]) - labels[enable_inds] = 0 - - fg_inds = np.where(labels == 1)[0] - bg_inds = np.where(labels == 0)[0] - - return fg_inds, bg_inds, fg_fake_inds, fake_num - - -@jit -def filter_roi(rois, max_overlap): - ws = rois[:, 2] - rois[:, 0] + 1 - hs = rois[:, 3] - rois[:, 1] + 1 - keep = np.where((ws > 0) & (hs > 0) & (max_overlap < 1))[0] - if len(keep) > 0: - return rois[keep, :] - return np.zeros((1, 4)).astype('float32') - - -@jit -def generate_proposal_target(rpn_rois, - rpn_rois_num, - gt_classes, - is_crowd, - gt_boxes, - im_info, - batch_size_per_im, - fg_fraction, - fg_thresh, - bg_thresh_hi, - bg_thresh_lo, - bbox_reg_weights, - class_nums=81, - use_random=True, - is_cls_agnostic=False, - is_cascade_rcnn=False, - max_overlaps=None): - - rois = [] - tgt_labels = [] - tgt_deltas = [] - rois_inside_weights = [] - rois_outside_weights = [] - sampled_max_overlaps = [] - new_rois_num = [] - st_num = 0 - end_num = 0 - for im_i in range(len(rpn_rois_num)): - length = rpn_rois_num[im_i] - end_num += length - rpn_roi = rpn_rois[st_num:end_num] - max_overlap = max_overlaps[st_num:end_num] if is_cascade_rcnn else None - im_scale = im_info[im_i][2] - rpn_roi = rpn_roi / im_scale - gt_bbox = gt_boxes[im_i] - - if is_cascade_rcnn: - rpn_roi = filter_roi(rpn_roi, max_overlap) - bbox = np.vstack([gt_bbox, rpn_roi]).astype('float32') - - # Step1: label bbox - roi_gt_bbox_inds, labels, max_overlap = label_bbox( - bbox, gt_bbox, gt_classes[im_i], is_crowd[im_i]) - - # Step2: sample bbox - fg_inds, bg_inds, fg_nums = sample_bbox( - max_overlap, batch_size_per_im, fg_fraction, fg_thresh, - bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, - use_random, is_cls_agnostic, is_cascade_rcnn) - - # Step3: make output - sampled_inds = np.append(fg_inds, bg_inds) - - sampled_labels = labels[sampled_inds] - sampled_labels[fg_nums:] = 0 - - sampled_boxes = bbox[sampled_inds] - sampled_max_overlap = max_overlap[sampled_inds] - sampled_gt_boxes = gt_bbox[roi_gt_bbox_inds[sampled_inds]] - sampled_gt_boxes[fg_nums:, :] = 0 - sampled_deltas = compute_bbox_targets(sampled_boxes, sampled_gt_boxes, - sampled_labels, bbox_reg_weights) - sampled_deltas[fg_nums:, :] = 0 - sampled_deltas, bbox_inside_weights = expand_bbox_targets( - sampled_deltas, class_nums, is_cls_agnostic) - bbox_outside_weights = np.array( - bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) - - roi = sampled_boxes * im_scale - st_num += length - - rois.append(roi) - new_rois_num.append(roi.shape[0]) - tgt_labels.append(sampled_labels) - tgt_deltas.append(sampled_deltas) - rois_inside_weights.append(bbox_inside_weights) - rois_outside_weights.append(bbox_outside_weights) - sampled_max_overlaps.append(sampled_max_overlap) - - rois = np.concatenate(rois, axis=0).astype(np.float32) - tgt_labels = np.concatenate( - tgt_labels, axis=0).astype(np.int32).reshape(-1, 1) - tgt_deltas = np.concatenate(tgt_deltas, axis=0).astype(np.float32) - rois_inside_weights = np.concatenate( - rois_inside_weights, axis=0).astype(np.float32) - rois_outside_weights = np.concatenate( - rois_outside_weights, axis=0).astype(np.float32) - sampled_max_overlaps = np.concatenate( - sampled_max_overlaps, axis=0).astype(np.float32) - new_rois_num = np.asarray(new_rois_num, np.int32) - return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, new_rois_num, sampled_max_overlaps - - -@jit -def label_bbox(boxes, gt_boxes, gt_classes, is_crowd, class_nums=81): - - iou = bbox_overlaps(boxes, gt_boxes) - - # every roi's gt box's index - roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32) - roi_gt_bbox_iou = np.zeros((boxes.shape[0], class_nums), dtype=np.float32) - - iou_argmax = iou.argmax(axis=1) - iou_max = iou.max(axis=1) - overlapped_boxes_ind = np.where(iou_max > 0)[0].astype('int32') - roi_gt_bbox_inds[overlapped_boxes_ind] = iou_argmax[overlapped_boxes_ind] - overlapped_boxes_gt_classes = gt_classes[iou_argmax[ - overlapped_boxes_ind]].astype('int32') - roi_gt_bbox_iou[overlapped_boxes_ind, - overlapped_boxes_gt_classes] = iou_max[overlapped_boxes_ind] - - crowd_ind = np.where(is_crowd)[0] - roi_gt_bbox_iou[crowd_ind] = -1 - - max_overlap = roi_gt_bbox_iou.max(axis=1) - labels = roi_gt_bbox_iou.argmax(axis=1) - - return roi_gt_bbox_inds, labels, max_overlap - - -@jit -def sample_bbox(max_overlap, - batch_size_per_im, - fg_fraction, - fg_thresh, - bg_thresh_hi, - bg_thresh_lo, - bbox_reg_weights, - class_nums, - use_random=True, - is_cls_agnostic=False, - is_cascade_rcnn=False): - - rois_per_image = int(batch_size_per_im) - fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) - - if is_cascade_rcnn: - fg_inds = np.where(max_overlap >= fg_thresh)[0] - bg_inds = np.where((max_overlap < bg_thresh_hi) & (max_overlap >= - bg_thresh_lo))[0] - fg_nums = fg_inds.shape[0] - bg_nums = bg_inds.shape[0] - else: - # sampe fg - fg_inds = np.where(max_overlap >= fg_thresh)[0] - fg_nums = np.minimum(fg_rois_per_im, fg_inds.shape[0]) - if (fg_inds.shape[0] > fg_nums) and use_random: - fg_inds = np.random.choice(fg_inds, size=fg_nums, replace=False) - fg_inds = fg_inds[:fg_nums] - - # sample bg - bg_inds = np.where((max_overlap < bg_thresh_hi) & (max_overlap >= - bg_thresh_lo))[0] - bg_nums = rois_per_image - fg_nums - bg_nums = np.minimum(bg_nums, bg_inds.shape[0]) - if (bg_inds.shape[0] > bg_nums) and use_random: - bg_inds = np.random.choice(bg_inds, size=bg_nums, replace=False) - bg_inds = bg_inds[:bg_nums] - - return fg_inds, bg_inds, fg_nums - - -@jit -def generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, rois, - rois_num, labels_int32, num_classes, resolution): - mask_rois = [] - mask_rois_num = [] - rois_has_mask_int32 = [] - mask_int32 = [] - st_num = 0 - end_num = 0 - for k in range(len(rois_num)): - length = rois_num[k] - end_num += length - - # remove padding - gt_polys = gt_segms[k] - new_gt_polys = [] - for i in range(gt_polys.shape[0]): - gt_segs = [] - for j in range(gt_polys[i].shape[0]): - new_poly = [] - polys = gt_polys[i][j] - for ii in range(polys.shape[0]): - x, y = polys[ii] - if (x == -1 and y == -1): - continue - elif (x >= 0 or y >= 0): - new_poly.append([x, y]) # array, one poly - if len(new_poly) > 0: - gt_segs.append(new_poly) - new_gt_polys.append(gt_segs) - im_scale = im_info[k][2] - boxes = rois[st_num:end_num] / im_scale - - bbox_fg, bbox_has_mask, masks = sample_mask( - boxes, new_gt_polys, labels_int32[st_num:end_num], gt_classes[k], - is_crowd[k], num_classes, resolution) - - st_num += length - - mask_rois.append(bbox_fg * im_scale) - mask_rois_num.append(len(bbox_fg)) - rois_has_mask_int32.append(bbox_has_mask) - mask_int32.append(masks) - - mask_rois = np.concatenate(mask_rois, axis=0).astype(np.float32) - mask_rois_num = np.array(mask_rois_num).astype(np.int32) - rois_has_mask_int32 = np.concatenate( - rois_has_mask_int32, axis=0).astype(np.int32) - mask_int32 = np.concatenate(mask_int32, axis=0).astype(np.int32) - - return mask_rois, mask_rois_num, rois_has_mask_int32, mask_int32 - - -@jit -def sample_mask(boxes, gt_polys, label_int32, gt_classes, is_crowd, num_classes, - resolution): - - gt_polys_inds = np.where((gt_classes > 0) & (is_crowd == 0))[0] - _gt_polys = [gt_polys[i] for i in gt_polys_inds] - boxes_from_polys = polys_to_boxes(_gt_polys) - - fg_inds = np.where(label_int32 > 0)[0] - bbox_has_mask = fg_inds.copy() - - if fg_inds.shape[0] > 0: - labels_fg = label_int32[fg_inds] - masks_fg = np.zeros((fg_inds.shape[0], resolution**2), dtype=np.int32) - bbox_fg = boxes[fg_inds] - - iou = bbox_overlaps_mask(bbox_fg, boxes_from_polys) - fg_polys_inds = np.argmax(iou, axis=1) - - for i in range(bbox_fg.shape[0]): - poly_gt = _gt_polys[fg_polys_inds[i]] - roi_fg = bbox_fg[i] - - mask = polys_to_mask_wrt_box(poly_gt, roi_fg, resolution) - mask = np.array(mask > 0, dtype=np.int32) - masks_fg[i, :] = np.reshape(mask, resolution**2) - else: - bg_inds = np.where(label_int32 == 0)[0] - bbox_fg = boxes[bg_inds[0]].reshape((1, -1)) - masks_fg = -np.ones((1, resolution**2), dtype=np.int32) - labels_fg = np.zeros((1, )) - bbox_has_mask = np.append(bbox_has_mask, 0) - masks = expand_mask_targets(masks_fg, labels_fg, resolution, num_classes) - return bbox_fg, bbox_has_mask, masks diff --git a/dygraph/ppdet/utils/check.py b/dygraph/ppdet/utils/check.py index 4fc1d14f144cc36c5cfb15cd1b3c77a55ed2a943..3a3bcf795875ed543ae04fc3a3cb93233b735eb3 100644 --- a/dygraph/ppdet/utils/check.py +++ b/dygraph/ppdet/utils/check.py @@ -90,11 +90,4 @@ def check_config(cfg): if 'log_iter' not in cfg: cfg.log_iter = 20 - logger.debug("The 'num_classes'(number of classes) you set is {}, " \ - "and 'with_background' in 'dataset' sets {}.\n" \ - "So please note the actual number of categories is {}." - .format(cfg.num_classes, cfg.with_background, - cfg.num_classes + 1)) - cfg.num_classes = cfg.num_classes + int(cfg.with_background) - return cfg