未验证 提交 944eee5c 编写于 作者: W wangguanzhong 提交者: GitHub

refactor rcnn, test=dygraph (#2114)

* refactor rcnn, test=dygraph

* add mask_rcnn, test=dygraph

* add Faster RCNN & Faster FPN, test=dygraph

* update according to the review, test=dygraph
上级 9a2651fe
worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: { }
- RandomFlipImage: {prob: 0.5}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
- Permute: {to_bgr: false, channel_first: true}
- DecodeOp: {}
- RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlipOp: {prob: 0.5}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: true}
- PadBatchOp: {pad_to_stride: 32, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
......@@ -15,12 +15,12 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: { }
- NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] }
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True }
- PermuteOp: { }
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: { pad_to_stride: 32, pad_gt: false }
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......@@ -29,12 +29,12 @@ EvalReader:
TestReader:
sample_transforms:
- DecodeOp: { }
- NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] }
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True }
- PermuteOp: { }
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: { pad_to_stride: 32, pad_gt: false }
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......@@ -2,12 +2,7 @@ architecture: FasterRCNN
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
load_static_weights: True
# Model Achitecture
FasterRCNN:
# model anchor info flow
anchor: Anchor
proposal: Proposal
# model feat info flow
backbone: ResNet
rpn_head: RPNHead
bbox_head: BBoxHead
......@@ -24,70 +19,50 @@ ResNet:
num_stages: 3
RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 1024
feat_out: 1024
anchor_per_position: 15
rpn_channel: 1024
Anchor:
anchor_generator:
name: AnchorGeneratorRPN
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_target_generator:
name: AnchorTargetGeneratorRPN
anchor_sizes: [32, 64, 128, 256, 512]
strides: [16]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
straddle_thresh: 0.0
Proposal:
proposal_generator:
name: ProposalGenerator
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
train_pre_nms_top_n: 12000
train_post_nms_top_n: 2000
infer_pre_nms_top_n: 6000
infer_post_nms_top_n: 1000
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,]
fg_thresh: [0.5,]
fg_fraction: 0.25
pre_nms_top_n: 12000
post_nms_top_n: 2000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 6000
post_nms_top_n: 1000
BBoxHead:
bbox_feat:
name: BBoxFeat
head: Res5Head
roi_extractor:
name: RoIAlign
resolution: 14
sampling_ratio: 0
start_level: 0
end_level: 0
head_feat:
name: Res5Head
feat_in: 1024
feat_out: 512
aligned: True
bbox_assigner: BBoxAssigner
with_pool: true
in_feat: 2048
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: [0.5,]
fg_thresh: [0.5,]
fg_fraction: 0.25
use_random: True
BBoxPostProcess:
decode:
name: RCNNBox
num_classes: 81
batch_size: 1
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
normalized: true
......@@ -2,12 +2,7 @@ architecture: FasterRCNN
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
load_static_weights: True
# Model Achitecture
FasterRCNN:
# model anchor info flow
anchor: Anchor
proposal: Proposal
# model feat info flow
backbone: ResNet
neck: FPN
rpn_head: RPNHead
......@@ -25,72 +20,56 @@ ResNet:
num_stages: 4
FPN:
in_channels: [256, 512, 1024, 2048]
out_channel: 256
min_level: 0
max_level: 4
spatial_scale: [0.25, 0.125, 0.0625, 0.03125]
RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 256
feat_out: 256
anchor_per_position: 3
rpn_channel: 256
Anchor:
anchor_generator:
name: AnchorGeneratorRPN
aspect_ratios: [0.5, 1.0, 2.0]
anchor_start_size: 32
stride: [4., 4.]
anchor_target_generator:
name: AnchorTargetGeneratorRPN
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
straddle_thresh: 0.0
Proposal:
proposal_generator:
name: ProposalGenerator
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
train_pre_nms_top_n: 2000
train_post_nms_top_n: 2000
infer_pre_nms_top_n: 1000
infer_post_nms_top_n: 1000
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,]
fg_thresh: [0.5,]
fg_fraction: 0.25
pre_nms_top_n: 2000
post_nms_top_n: 1000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
BBoxHead:
bbox_feat:
name: BBoxFeat
head: TwoFCHead
roi_extractor:
name: RoIAlign
resolution: 7
sampling_ratio: 2
head_feat:
name: TwoFCHead
in_dim: 256
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: [0.5,]
fg_thresh: [0.5,]
fg_fraction: 0.25
use_random: True
TwoFCHead:
mlp_dim: 1024
in_feat: 1024
BBoxPostProcess:
decode:
name: RCNNBox
num_classes: 81
batch_size: 1
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
normalized: true
worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: { }
- RandomFlipImage: {prob: 0.5}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
- Permute: {to_bgr: false, channel_first: true}
- DecodeOp: {}
- RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlipOp: {prob: 0.5}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1, use_padded_im_info: false, pad_gt: true}
- PadBatchOp: {pad_to_stride: -1., pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
......@@ -15,12 +15,12 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: { }
- NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] }
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True }
- PermuteOp: { }
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: { pad_to_stride: -1, pad_gt: false }
- PadBatchOp: {pad_to_stride: -1., pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......@@ -29,12 +29,12 @@ EvalReader:
TestReader:
sample_transforms:
- DecodeOp: { }
- NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] }
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True }
- PermuteOp: { }
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: { pad_to_stride: -1, pad_gt: false }
- PadBatchOp: {pad_to_stride: -1., pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......@@ -7,8 +7,8 @@ LearningRate:
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
start_factor: 0.1
steps: 1000
OptimizerBuilder:
optimizer:
......
......@@ -2,22 +2,21 @@ worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: {}
- RandomFlipImage: {prob: 0.5, is_mask_flip: true}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
- Permute: {to_bgr: false, channel_first: true}
- RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlipOp: {prob: 0.5, is_mask_flip: true}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: true}
- PadBatchOp: {pad_to_stride: 32, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
......@@ -30,8 +29,8 @@ EvalReader:
TestReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
......
......@@ -2,13 +2,7 @@ architecture: MaskRCNN
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
load_static_weights: True
# Model Achitecture
MaskRCNN:
# model anchor info flow
anchor: Anchor
proposal: Proposal
mask: Mask
# model feat info flow
backbone: ResNet
rpn_head: RPNHead
bbox_head: BBoxHead
......@@ -26,88 +20,69 @@ ResNet:
num_stages: 3
RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 1024
feat_out: 1024
anchor_per_position: 15
Anchor:
anchor_generator:
name: AnchorGeneratorRPN
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_target_generator:
name: AnchorTargetGeneratorRPN
anchor_sizes: [32, 64, 128, 256, 512]
strides: [16]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
straddle_thresh: 0.0
Proposal:
proposal_generator:
name: ProposalGenerator
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 12000
post_nms_top_n: 2000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
train_pre_nms_top_n: 12000
train_post_nms_top_n: 2000
infer_pre_nms_top_n: 6000
infer_post_nms_top_n: 1000
proposal_target_generator:
name: ProposalTargetGenerator
pre_nms_top_n: 6000
post_nms_top_n: 1000
BBoxHead:
head: Res5Head
roi_extractor:
resolution: 14
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
with_pool: true
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,]
bg_thresh: [0.5,]
fg_thresh: [0.5,]
fg_fraction: 0.25
use_random: True
BBoxHead:
bbox_feat:
name: BBoxFeat
roi_extractor: RoIAlign
head_feat:
name: Res5Head
feat_in: 1024
feat_out: 512
with_pool: true
in_feat: 2048
BBoxPostProcess:
decode:
name: RCNNBox
num_classes: 81
batch_size: 1
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
normalized: true
Mask:
mask_target_generator:
name: MaskTargetGenerator
mask_resolution: 14
RoIAlign:
MaskHead:
head: MaskFeat
roi_extractor:
resolution: 14
sampling_ratio: 0
start_level: 0
end_level: 0
MaskHead:
mask_feat:
name: MaskFeat
num_convs: 0
feat_in: 2048
feat_out: 256
mask_roi_extractor: RoIAlign
aligned: True
mask_assigner: MaskAssigner
share_bbox_feat: true
feat_in: 256
MaskFeat:
out_channels: 256
MaskPostProcess:
MaskAssigner:
mask_resolution: 14
MaskPostProcess:
binary_thresh: 0.5
......@@ -2,13 +2,7 @@ architecture: MaskRCNN
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
load_static_weights: True
# Model Achitecture
MaskRCNN:
# model anchor info flow
anchor: Anchor
proposal: Proposal
mask: Mask
# model feat info flow
backbone: ResNet
neck: FPN
rpn_head: RPNHead
......@@ -27,94 +21,73 @@ ResNet:
num_stages: 4
FPN:
in_channels: [256, 512, 1024, 2048]
out_channel: 256
min_level: 0
max_level: 4
spatial_scale: [0.25, 0.125, 0.0625, 0.03125]
RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 256
feat_out: 256
anchor_per_position: 3
rpn_channel: 256
Anchor:
anchor_generator:
name: AnchorGeneratorRPN
aspect_ratios: [0.5, 1.0, 2.0]
anchor_start_size: 32
stride: [4., 4.]
anchor_target_generator:
name: AnchorTargetGeneratorRPN
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
straddle_thresh: 0.0
Proposal:
proposal_generator:
name: ProposalGenerator
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
train_pre_nms_top_n: 2000
train_post_nms_top_n: 2000
infer_pre_nms_top_n: 1000
infer_post_nms_top_n: 1000
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,]
fg_thresh: [0.5,]
fg_fraction: 0.25
pre_nms_top_n: 2000
post_nms_top_n: 1000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
BBoxHead:
bbox_feat:
name: BBoxFeat
head: TwoFCHead
roi_extractor:
name: RoIAlign
resolution: 7
sampling_ratio: 2
head_feat:
name: TwoFCHead
in_dim: 256
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: [0.5,]
fg_thresh: [0.5,]
fg_fraction: 0.25
use_random: True
TwoFCHead:
mlp_dim: 1024
in_feat: 1024
BBoxPostProcess:
decode:
name: RCNNBox
num_classes: 81
batch_size: 1
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
Mask:
mask_target_generator:
name: MaskTargetGenerator
mask_resolution: 28
normalized: true
MaskHead:
mask_feat:
name: MaskFeat
num_convs: 4
feat_in: 256
feat_out: 256
mask_roi_extractor:
name: RoIAlign
head: MaskFeat
roi_extractor:
resolution: 14
sampling_ratio: 2
sampling_ratio: 0
aligned: True
mask_assigner: MaskAssigner
share_bbox_feat: False
feat_in: 256
MaskFeat:
num_convs: 4
out_channels: 256
MaskPostProcess:
MaskAssigner:
mask_resolution: 28
MaskPostProcess:
binary_thresh: 0.5
......@@ -2,12 +2,12 @@ worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: {}
- RandomFlipImage: {prob: 0.5, is_mask_flip: true}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
- Permute: {to_bgr: false, channel_first: true}
- RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlipOp: {prob: 0.5, is_mask_flip: true}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1., use_padded_im_info: false, pad_gt: true}
- PadBatchOp: {pad_to_stride: -1., pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
......@@ -16,8 +16,8 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: -1., pad_gt: false}
......@@ -30,8 +30,8 @@ EvalReader:
TestReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: -1., pad_gt: false}
......
......@@ -7,8 +7,8 @@ LearningRate:
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
start_factor: 0.001
steps: 1000
OptimizerBuilder:
optimizer:
......
......@@ -76,13 +76,6 @@ class ConfigPaser {
std::cerr << "Please set draw_threshold." << std::endl;
return false;
}
// Get with_background
if (config["with_background"].IsDefined()) {
with_background_ = config["with_background"].as<bool>();
} else {
std::cerr << "Please set with_background." << std::endl;
return false;
}
// Get Preprocess for preprocessing
if (config["Preprocess"].IsDefined()) {
preprocess_info_ = config["Preprocess"];
......@@ -111,7 +104,6 @@ class ConfigPaser {
float draw_threshold_;
std::string arch_;
int min_subgraph_size_;
bool with_background_;
YAML::Node preprocess_info_;
std::vector<std::string> label_list_;
std::vector<int> image_shape_;
......
......@@ -99,19 +99,6 @@ def _load_config_with_base(file_path):
return file_cfg
WITHOUT_BACKGROUND_ARCHS = ['YOLOv3', 'FCOS', 'TTFNet']
def _parse_with_background():
arch = global_config.architecture
with_background = arch not in WITHOUT_BACKGROUND_ARCHS
global_config['with_background'] = with_background
global_config['TrainReader']['with_background'] = with_background
global_config['EvalReader']['with_background'] = with_background
global_config['TestReader']['with_background'] = with_background
global_config['num_classes'] += with_background
def load_config(file_path):
"""
Load config from file.
......@@ -129,9 +116,6 @@ def load_config(file_path):
cfg['filename'] = os.path.splitext(os.path.split(file_path)[-1])[0]
merge_config(cfg)
# parse config from merged config
_parse_with_background()
return global_config
......@@ -166,7 +150,7 @@ def merge_config(config, another_cfg=None):
Returns: global config
"""
global global_config
dct = another_cfg if another_cfg is not None else global_config
dct = another_cfg or global_config
return dict_merge(dct, config)
......@@ -231,16 +215,13 @@ def create(cls_or_name, **kwargs):
isinstance(global_config[name], SchemaDict), \
"the module {} is not registered".format(name)
config = global_config[name]
config.update(kwargs)
config.validate()
cls = getattr(config.pymodule, name)
kwargs = {}
kwargs.update(global_config[name])
cls_kwargs = {}
cls_kwargs.update(global_config[name])
# parse `shared` annoation of registered modules
if getattr(config, 'shared', None):
for k in config.shared:
target_key = config[k]
shared_conf = config.schema[k].default
assert isinstance(shared_conf, SharedConfig)
......@@ -249,11 +230,14 @@ def create(cls_or_name, **kwargs):
continue # value is given for the module
elif shared_conf.key in global_config:
# `key` is present in config
kwargs[k] = global_config[shared_conf.key]
cls_kwargs[k] = global_config[shared_conf.key]
else:
kwargs[k] = shared_conf.default_value
cls_kwargs[k] = shared_conf.default_value
# parse `inject` annoation of registered modules
if getattr(cls, 'from_config', None):
cls_kwargs.update(cls.from_config(config, **kwargs))
if getattr(config, 'inject', None):
for k in config.inject:
target_key = config[k]
......@@ -275,18 +259,18 @@ def create(cls_or_name, **kwargs):
continue
target[i] = v
if isinstance(target, SchemaDict):
kwargs[k] = create(inject_name)
cls_kwargs[k] = create(inject_name)
elif isinstance(target_key, str):
if target_key not in global_config:
raise ValueError("Missing injection config:", target_key)
target = global_config[target_key]
if isinstance(target, SchemaDict):
kwargs[k] = create(target_key)
cls_kwargs[k] = create(target_key)
elif hasattr(target, '__dict__'): # serialized object
kwargs[k] = target
cls_kwargs[k] = target
else:
raise ValueError("Unsupported injection type:", target_key)
# prevent modification of global config values of reference types
# (e.g., list, dict) from within the created module instances
#kwargs = copy.deepcopy(kwargs)
return cls(**kwargs)
return cls(**cls_kwargs)
......@@ -37,7 +37,7 @@ MAIN_PID = os.getpid()
class Compose(object):
def __init__(self, transforms, num_classes=81):
def __init__(self, transforms, num_classes=80):
self.transforms = transforms
self.transforms_cls = []
for t in self.transforms:
......@@ -61,7 +61,7 @@ class Compose(object):
class BatchCompose(Compose):
def __init__(self, transforms, num_classes=81):
def __init__(self, transforms, num_classes=80):
super(BatchCompose, self).__init__(transforms, num_classes)
self.output_fields = mp.Manager().list([])
self.lock = mp.Lock()
......@@ -119,8 +119,7 @@ class BaseDataLoader(object):
shuffle=False,
drop_last=False,
drop_empty=True,
num_classes=81,
with_background=True,
num_classes=80,
**kwargs):
# sample transform
self._sample_transforms = Compose(
......@@ -132,7 +131,6 @@ class BaseDataLoader(object):
self.batch_size = batch_size
self.shuffle = shuffle
self.drop_last = drop_last
self.with_background = with_background
self.kwargs = kwargs
def __call__(self,
......@@ -142,7 +140,7 @@ class BaseDataLoader(object):
return_list=False,
use_prefetch=True):
self.dataset = dataset
self.dataset.parse_dataset(self.with_background)
self.dataset.parse_dataset()
# get data
self.dataset.set_transform(self._sample_transforms)
# set kwargs
......@@ -204,13 +202,11 @@ class TrainReader(BaseDataLoader):
shuffle=True,
drop_last=True,
drop_empty=True,
num_classes=81,
with_background=True,
num_classes=80,
**kwargs):
super(TrainReader, self).__init__(inputs_def, sample_transforms,
batch_transforms, batch_size, shuffle,
drop_last, drop_empty, num_classes,
with_background, **kwargs)
super(TrainReader, self).__init__(
inputs_def, sample_transforms, batch_transforms, batch_size,
shuffle, drop_last, drop_empty, num_classes, **kwargs)
@register
......@@ -223,13 +219,11 @@ class EvalReader(BaseDataLoader):
shuffle=False,
drop_last=True,
drop_empty=True,
num_classes=81,
with_background=True,
num_classes=80,
**kwargs):
super(EvalReader, self).__init__(inputs_def, sample_transforms,
batch_transforms, batch_size, shuffle,
drop_last, drop_empty, num_classes,
with_background, **kwargs)
super(EvalReader, self).__init__(
inputs_def, sample_transforms, batch_transforms, batch_size,
shuffle, drop_last, drop_empty, num_classes, **kwargs)
@register
......@@ -242,10 +236,8 @@ class TestReader(BaseDataLoader):
shuffle=False,
drop_last=False,
drop_empty=True,
num_classes=81,
with_background=True,
num_classes=80,
**kwargs):
super(TestReader, self).__init__(inputs_def, sample_transforms,
batch_transforms, batch_size, shuffle,
drop_last, drop_empty, num_classes,
with_background, **kwargs)
super(TestReader, self).__init__(
inputs_def, sample_transforms, batch_transforms, batch_size,
shuffle, drop_last, drop_empty, num_classes, **kwargs)
......@@ -35,7 +35,7 @@ class COCODataSet(DetDataset):
self.load_image_only = False
self.load_semantic = False
def parse_dataset(self, with_background=True):
def parse_dataset(self):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
......@@ -44,16 +44,12 @@ class COCODataSet(DetDataset):
from pycocotools.coco import COCO
coco = COCO(anno_path)
img_ids = coco.getImgIds()
img_ids.sort()
cat_ids = coco.getCatIds()
records = []
ct = 0
# when with_background = True, mapping category to classid, like:
# background:0, first_class:1, second_class:2, ...
catid2clsid = dict({
catid: i + int(with_background)
for i, catid in enumerate(cat_ids)
})
catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
cname2cid = dict({
coco.loadCats(catid)[0]['name']: clsid
for catid, clsid in catid2clsid.items()
......@@ -95,13 +91,14 @@ class COCODataSet(DetDataset):
else:
if not any(np.array(inst['bbox'])):
continue
x, y, box_w, box_h = inst['bbox']
x1 = max(0, x)
y1 = max(0, y)
x2 = min(im_w - 1, x1 + max(0, box_w - 1))
y2 = min(im_h - 1, y1 + max(0, box_h - 1))
if inst['area'] > 0 and x2 >= x1 and y2 >= y1:
inst['clean_bbox'] = [x1, y1, x2, y2]
x1, y1, box_w, box_h = inst['bbox']
x2 = x1 + box_w
y2 = y1 + box_h
eps = 1e-5
if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
inst['clean_bbox'] = [
round(float(x), 3) for x in [x1, y1, x2, y2]
]
bboxes.append(inst)
else:
logger.warning(
......
......@@ -78,7 +78,7 @@ class DetDataset(Dataset):
def set_epoch(self, epoch_id):
self._epoch = epoch_id
def parse_dataset(self, with_background=True):
def parse_dataset(self, ):
raise NotImplemented(
"Need to implement parse_dataset method of Dataset")
......@@ -115,13 +115,17 @@ class ImageFolder(DetDataset):
sample_num=-1,
use_default_label=None,
**kwargs):
super(ImageFolder, self).__init__(dataset_dir, image_dir, anno_path,
sample_num, use_default_label)
super(ImageFolder, self).__init__(
dataset_dir,
image_dir,
anno_path,
sample_num=sample_num,
use_default_label=use_default_label)
self._imid2path = {}
self.roidbs = None
self.sample_num = sample_num
def parse_dataset(self, with_background=True):
def parse_dataset(self, ):
if not self.roidbs:
self.roidbs = self._load_images()
......
......@@ -58,14 +58,11 @@ class VOCDataSet(DetDataset):
sample_num=sample_num)
self.label_list = label_list
def parse_dataset(self, with_background=True):
def parse_dataset(self, ):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
# mapping category name to class id
# if with_background is True:
# background:0, first_class:1, second_class:2, ...
# if with_background is False:
# first_class:0, second_class:1, ...
records = []
ct = 0
......@@ -76,12 +73,12 @@ class VOCDataSet(DetDataset):
raise ValueError("label_list {} does not exists".format(
label_path))
with open(label_path, 'r') as fr:
label_id = int(with_background)
label_id = 0
for line in fr.readlines():
cname2cid[line.strip()] = label_id
label_id += 1
else:
cname2cid = pascalvoc_label(with_background)
cname2cid = pascalvoc_label()
with open(anno_path, 'r') as fr:
while True:
......@@ -175,29 +172,27 @@ class VOCDataSet(DetDataset):
return os.path.join(self.dataset_dir, self.label_list)
def pascalvoc_label(with_background=True):
def pascalvoc_label():
labels_map = {
'aeroplane': 1,
'bicycle': 2,
'bird': 3,
'boat': 4,
'bottle': 5,
'bus': 6,
'car': 7,
'cat': 8,
'chair': 9,
'cow': 10,
'diningtable': 11,
'dog': 12,
'horse': 13,
'motorbike': 14,
'person': 15,
'pottedplant': 16,
'sheep': 17,
'sofa': 18,
'train': 19,
'tvmonitor': 20
'aeroplane': 0,
'bicycle': 1,
'bird': 2,
'boat': 3,
'bottle': 4,
'bus': 5,
'car': 6,
'cat': 7,
'chair': 8,
'cow': 9,
'diningtable': 10,
'dog': 11,
'horse': 12,
'motorbike': 13,
'person': 14,
'pottedplant': 15,
'sheep': 16,
'sofa': 17,
'train': 18,
'tvmonitor': 19
}
if not with_background:
labels_map = {k: v - 1 for k, v in labels_map.items()}
return labels_map
......@@ -52,7 +52,7 @@ class WIDERFaceDataSet(DataSet):
self.cname2cid = None
self.with_lmk = with_lmk
def load_roidb_and_cname2cid(self, with_background=True):
def load_roidb_and_cname2cid(self, ):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
......@@ -61,7 +61,7 @@ class WIDERFaceDataSet(DataSet):
records = []
ct = 0
file_lists = self._load_file_list(txt_file)
cname2cid = widerface_label(with_background)
cname2cid = widerface_label()
for item in file_lists:
im_fname = item[0]
......@@ -159,8 +159,6 @@ class WIDERFaceDataSet(DataSet):
return list(file_dict.values())
def widerface_label(with_background=True):
labels_map = {'face': 1}
if not with_background:
labels_map = {k: v - 1 for k, v in labels_map.items()}
def widerface_label():
labels_map = {'face': 0}
return labels_map
......@@ -500,7 +500,7 @@ class RandomFlipOp(BaseOperator):
def apply_segm(self, segms, height, width):
def _flip_poly(poly, width):
flipped_poly = np.array(poly)
flipped_poly[0::2] = width - np.array(poly[0::2]) - 1
flipped_poly[0::2] = width - np.array(poly[0::2])
return flipped_poly.tolist()
def _flip_rle(rle, height, width):
......@@ -526,7 +526,7 @@ class RandomFlipOp(BaseOperator):
for i in range(gt_keypoint.shape[1]):
if i % 2 == 0:
old_x = gt_keypoint[:, i].copy()
gt_keypoint[:, i] = width - old_x - 1
gt_keypoint[:, i] = width - old_x
return gt_keypoint
def apply_image(self, image):
......@@ -535,8 +535,8 @@ class RandomFlipOp(BaseOperator):
def apply_bbox(self, bbox, width):
oldx1 = bbox[:, 0].copy()
oldx2 = bbox[:, 2].copy()
bbox[:, 0] = width - oldx2 - 1
bbox[:, 2] = width - oldx1 - 1
bbox[:, 0] = width - oldx2
bbox[:, 2] = width - oldx1
return bbox
def apply(self, sample, context=None):
......@@ -601,6 +601,7 @@ class ResizeOp(BaseOperator):
def apply_image(self, image, scale):
im_scale_x, im_scale_y = scale
return cv2.resize(
image,
None,
......@@ -614,8 +615,8 @@ class ResizeOp(BaseOperator):
resize_w, resize_h = size
bbox[:, 0::2] *= im_scale_x
bbox[:, 1::2] *= im_scale_y
bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w - 1)
bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h - 1)
bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w)
bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
return bbox
def apply_segm(self, segms, im_size, scale):
......
......@@ -43,9 +43,8 @@ def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
preprocess_list = []
anno_file = dataset_cfg.get_anno()
with_background = reader_cfg['with_background']
clsid2catid, catid2name = get_categories(metric, anno_file, with_background)
clsid2catid, catid2name = get_categories(metric, anno_file)
label_list = [str(cat) for cat in catid2name.values()]
......@@ -73,7 +72,7 @@ def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
})
break
return with_background, preprocess_list, label_list, image_shape
return preprocess_list, label_list, image_shape
def _dump_infer_config(config, path, image_shape, model):
......@@ -102,7 +101,7 @@ def _dump_infer_config(config, path, image_shape, model):
if 'mask_post_process' in model.__dict__ and model.__dict__[
'mask_post_process']:
infer_cfg['mask_resolution'] = model.mask_post_process.mask_resolution
infer_cfg['with_background'], infer_cfg['Preprocess'], infer_cfg[
infer_cfg['Preprocess'], infer_cfg[
'label_list'], image_shape = _parse_reader(
config['TestReader'], config['TestDataset'], config['metric'],
infer_cfg['arch'], image_shape)
......
......@@ -97,19 +97,11 @@ class Trainer(object):
def _init_metrics(self):
if self.mode == 'eval':
if self.cfg.metric == 'COCO':
mask_resolution = self.model.mask_post_process.mask_resolution if getattr(
self.model, 'mask_post_process', None) else None
self._metrics = [
COCOMetric(
anno_file=self.dataset.get_anno(),
with_background=self.cfg.with_background,
mask_resolution=mask_resolution)
]
self._metrics = [COCOMetric(anno_file=self.dataset.get_anno())]
elif self.cfg.metric == 'VOC':
self._metrics = [
VOCMetric(
anno_file=self.dataset.get_anno(),
with_background=self.cfg.with_background,
class_num=self.cfg.num_classes,
map_type=self.cfg.map_type)
]
......@@ -240,9 +232,7 @@ class Trainer(object):
imid2path = self.dataset.get_imid2path()
anno_file = self.dataset.get_anno()
with_background = self.cfg.with_background
clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file,
with_background)
clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file)
# Run Infer
for step_id, data in enumerate(loader):
......@@ -255,14 +245,6 @@ class Trainer(object):
for key, value in outs.items():
outs[key] = value.numpy()
# FIXME: for more elegent coding
if 'mask' in outs and 'bbox' in outs:
mask_resolution = self.model.mask_post_process.mask_resolution
from ppdet.py_op.post_process import mask_post_process
outs['mask'] = mask_post_process(outs, outs['im_shape'],
outs['scale_factor'],
mask_resolution)
batch_res = get_infer_results(outs, clsid2catid)
bbox_num = outs['bbox_num']
start = 0
......
......@@ -25,15 +25,13 @@ logger = setup_logger(__name__)
__all__ = ['get_categories']
def get_categories(metric_type, anno_file=None, with_background=True):
def get_categories(metric_type, anno_file=None):
"""
Get class id to category id map and category id
to category name map from annotation file.
Args:
anno_file (str): annotation file path
with_background (bool, default True):
whether load background as class 0.
"""
if metric_type.lower() == 'coco':
if anno_file and os.path.isfile(anno_file):
......@@ -43,21 +41,14 @@ def get_categories(metric_type, anno_file=None, with_background=True):
coco = COCO(anno_file)
cats = coco.loadCats(coco.getCatIds())
clsid2catid = {
i + int(with_background): cat['id']
for i, cat in enumerate(cats)
}
clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
catid2name = {cat['id']: cat['name'] for cat in cats}
if with_background:
clsid2catid.update({0: 0})
catid2name.update({0: 'background'})
return clsid2catid, catid2name
# anno file not exist, load default categories of COCO17
else:
return _coco17_category(with_background)
return _coco17_category()
elif metric_type.lower() == 'voc':
if anno_file and os.path.isfile(anno_file):
......@@ -66,9 +57,7 @@ def get_categories(metric_type, anno_file=None, with_background=True):
for line in f.readlines():
cats.append(line.strip())
if cats[0] != 'background' and with_background:
cats.insert(0, 'background')
if cats[0] == 'background' and not with_background:
if cats[0] == 'background':
cats = cats[1:]
clsid2catid = {i: i for i in range(len(cats))}
......@@ -79,25 +68,22 @@ def get_categories(metric_type, anno_file=None, with_background=True):
# anno file not exist, load default categories of
# VOC all 20 categories
else:
return _vocall_category(with_background)
return _vocall_category()
elif metric_type.lower() == 'oid':
if anno_file and os.path.isfile(anno_file):
logger.warn("only default categories support for OID19")
return _oid19_category(with_background)
return _oid19_category()
else:
raise ValueError("unknown metric type {}".format(metric_type))
def _coco17_category(with_background=True):
def _coco17_category():
"""
Get class id to category id map and category id
to category name map of COCO2017 dataset
Args:
with_background (bool, default True):
whether load background as class 0.
"""
clsid2catid = {
1: 1,
......@@ -266,39 +252,30 @@ def _coco17_category(with_background=True):
90: 'toothbrush'
}
if not with_background:
clsid2catid = {k - 1: v for k, v in clsid2catid.items()}
catid2name.pop(0)
else:
clsid2catid.update({0: 0})
return clsid2catid, catid2name
def _vocall_category(with_background=True):
def _vocall_category():
"""
Get class id to category id map and category id
to category name map of mixup voc dataset
Args:
with_background (bool, default True):
whether load background as class 0.
"""
label_map = pascalvoc_label(with_background)
label_map = pascalvoc_label()
label_map = sorted(label_map.items(), key=lambda x: x[1])
cats = [l[0] for l in label_map]
if with_background:
cats.insert(0, 'background')
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
def _oid19_category(with_background=True):
clsid2catid = {k: k for k in range(1, 501)}
def _oid19_category():
clsid2catid = {k: k + 1 for k in range(500)}
catid2name = {
0: "background",
......@@ -804,6 +781,4 @@ def _oid19_category(with_background=True):
500: "Toilet",
}
if not with_background:
clsid2catid = {k - 1: v for k, v in clsid2catid.items()}
return clsid2catid, catid2name
......@@ -38,17 +38,17 @@ def get_infer_results(outs, catid):
)
im_id = outs['im_id']
im_shape = outs['im_shape']
scale_factor = outs['scale_factor']
infer_res = {}
if 'bbox' in outs:
infer_res['bbox'] = get_det_res(outs['bbox'], outs['bbox_num'], im_id,
infer_res['bbox'] = get_det_res(outs['bbox'], outs['score'],
outs['label'], outs['bbox_num'], im_id,
catid)
if 'mask' in outs:
# mask post process
infer_res['mask'] = get_seg_res(outs['mask'], outs['bbox_num'], im_id,
infer_res['mask'] = get_seg_res(outs['mask'], outs['score'],
outs['label'], outs['bbox_num'], im_id,
catid)
if 'segm' in outs:
......
......@@ -49,14 +49,11 @@ class Metric(paddle.metric.Metric):
class COCOMetric(Metric):
def __init__(self, anno_file, with_background=True, mask_resolution=None):
def __init__(self, anno_file):
assert os.path.isfile(anno_file), \
"anno_file {} not a file".format(anno_file)
self.anno_file = anno_file
self.with_background = with_background
self.mask_resolution = mask_resolution
self.clsid2catid, self.catid2name = get_categories('COCO', anno_file,
with_background)
self.clsid2catid, self.catid2name = get_categories('COCO', anno_file)
self.reset()
......@@ -71,16 +68,9 @@ class COCOMetric(Metric):
for k, v in outputs.items():
outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
# some input fields also needed
for k in ['im_id', 'scale_factor', 'im_shape']:
v = inputs[k]
outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
if 'mask' in outs and 'bbox' in outs:
from ppdet.py_op.post_process import mask_post_process
outs['mask'] = mask_post_process(outs, outs['im_shape'],
outs['scale_factor'],
self.mask_resolution)
im_id = inputs['im_id']
outs['im_id'] = im_id.numpy() if isinstance(im_id,
paddle.Tensor) else im_id
infer_results = get_infer_results(outs, self.clsid2catid)
self.results['bbox'] += infer_results[
......@@ -131,7 +121,6 @@ class COCOMetric(Metric):
class VOCMetric(Metric):
def __init__(self,
anno_file,
with_background=True,
class_num=20,
overlap_thresh=0.5,
map_type='11point',
......@@ -140,9 +129,7 @@ class VOCMetric(Metric):
assert os.path.isfile(anno_file), \
"anno_file {} not a file".format(anno_file)
self.anno_file = anno_file
self.with_background = with_background
self.clsid2catid, self.catid2name = get_categories('VOC', anno_file,
with_background)
self.clsid2catid, self.catid2name = get_categories('VOC', anno_file)
self.overlap_thresh = overlap_thresh
self.map_type = map_type
......
from . import ops
from . import bbox
from . import mask
from . import backbones
from . import necks
from . import proposal_generator
from . import heads
from . import losses
from . import architectures
......@@ -11,10 +10,9 @@ from . import layers
from . import utils
from .ops import *
from .bbox import *
from .mask import *
from .backbones import *
from .necks import *
from .proposal_generator import *
from .heads import *
from .losses import *
from .architectures import *
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['FasterRCNN']
......@@ -12,91 +26,86 @@ __all__ = ['FasterRCNN']
@register
class FasterRCNN(BaseArch):
__category__ = 'architecture'
__inject__ = [
'anchor', 'proposal', 'backbone', 'neck', 'rpn_head', 'bbox_head',
'bbox_post_process'
]
__inject__ = ['bbox_post_process']
def __init__(self,
anchor,
proposal,
backbone,
rpn_head,
bbox_head,
bbox_post_process,
neck=None):
"""
backbone (nn.Layer): backbone instance.
rpn_head (nn.Layer): generates proposals using backbone features.
bbox_head (nn.Layer): a head that performs per-region computation.
mask_head (nn.Layer): generates mask from bbox and backbone features.
"""
super(FasterRCNN, self).__init__()
self.anchor = anchor
self.proposal = proposal
self.backbone = backbone
self.neck = neck
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.bbox_post_process = bbox_post_process
self.neck = neck
def model_arch(self):
# Backbone
body_feats = self.backbone(self.inputs)
spatial_scale = 0.0625
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
rpn_head = create(cfg['rpn_head'], **kwargs)
bbox_head = create(cfg['bbox_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"rpn_head": rpn_head,
"bbox_head": bbox_head,
}
# Neck
def _forward(self):
body_feats = self.backbone(self.inputs)
if self.neck is not None:
body_feats, spatial_scale = self.neck(body_feats)
# RPN
# rpn_head returns two list: rpn_feat, rpn_head_out
# each element in rpn_feats contains rpn feature on each level,
# and the length is 1 when the neck is not applied.
# each element in rpn_head_out contains (rpn_rois_score, rpn_rois_delta)
rpn_feat, self.rpn_head_out = self.rpn_head(self.inputs, body_feats)
# Anchor
# anchor_out returns a list,
# each element contains (anchor, anchor_var)
self.anchor_out = self.anchor(rpn_feat)
body_feats = self.neck(body_feats)
if self.training:
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
bbox_loss, _ = self.bbox_head(body_feats, rois, rois_num,
self.inputs)
return rpn_loss, bbox_loss
else:
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
preds, _ = self.bbox_head(body_feats, rois, rois_num, None)
# Proposal RoI
# compute targets here when training
rois = self.proposal(self.inputs, self.rpn_head_out, self.anchor_out,
self.training)
# BBox Head
bbox_feat, self.bbox_head_out, self.bbox_head_feat_func = self.bbox_head(
body_feats, rois, spatial_scale)
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
im_shape, scale_factor)
if not self.training:
bbox_pred, bboxes = self.bbox_head.get_prediction(
self.bbox_head_out, rois)
# Refine bbox by the output from bbox_head at test stage
self.bboxes = self.bbox_post_process(bbox_pred, bboxes,
self.inputs['im_shape'],
self.inputs['scale_factor'])
else:
# Proposal RoI for Mask branch
# bboxes update at training stage only
bbox_targets = self.proposal.get_targets()[0]
# rescale the prediction back to origin image
bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
im_shape, scale_factor)
return bbox_pred, bbox_num
def get_loss(self, ):
rpn_loss, bbox_loss = self._forward()
loss = {}
# RPN loss
rpn_loss_inputs = self.anchor.generate_loss_inputs(
self.inputs, self.rpn_head_out, self.anchor_out)
loss_rpn = self.rpn_head.get_loss(rpn_loss_inputs)
loss.update(loss_rpn)
# BBox loss
bbox_targets = self.proposal.get_targets()
loss_bbox = self.bbox_head.get_loss([self.bbox_head_out], bbox_targets)
loss.update(loss_bbox)
loss.update(rpn_loss)
loss.update(bbox_loss)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
bbox, bbox_num = self.bboxes
bbox_pred, bbox_num = self._forward()
label = bbox_pred[:, 0]
score = bbox_pred[:, 1]
bbox = bbox_pred[:, 2:]
output = {
'bbox': bbox,
'bbox_num': bbox_num,
'score': score,
'label': label,
'bbox_num': bbox_num
}
return output
......@@ -17,7 +17,7 @@ from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['MaskRCNN']
......@@ -27,22 +27,11 @@ __all__ = ['MaskRCNN']
class MaskRCNN(BaseArch):
__category__ = 'architecture'
__inject__ = [
'anchor',
'proposal',
'mask',
'backbone',
'neck',
'rpn_head',
'bbox_head',
'mask_head',
'bbox_post_process',
'mask_post_process',
]
def __init__(self,
anchor,
proposal,
mask,
backbone,
rpn_head,
bbox_head,
......@@ -50,95 +39,99 @@ class MaskRCNN(BaseArch):
bbox_post_process,
mask_post_process,
neck=None):
"""
backbone (nn.Layer): backbone instance.
rpn_head (nn.Layer): generates proposals using backbone features.
bbox_head (nn.Layer): a head that performs per-region computation.
mask_head (nn.Layer): generates mask from bbox and backbone features.
"""
super(MaskRCNN, self).__init__()
self.anchor = anchor
self.proposal = proposal
self.mask = mask
self.backbone = backbone
self.neck = neck
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.mask_head = mask_head
self.bbox_post_process = bbox_post_process
self.mask_post_process = mask_post_process
def model_arch(self):
# Backbone
body_feats = self.backbone(self.inputs)
spatial_scale = 1. / 16
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
rpn_head = create(cfg['rpn_head'], **kwargs)
bbox_head = create(cfg['bbox_head'], **kwargs)
out_shape = neck and out_shape or bbox_head.get_head().out_shape
kwargs = {'input_shape': out_shape}
mask_head = create(cfg['mask_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"rpn_head": rpn_head,
"bbox_head": bbox_head,
"mask_head": mask_head,
}
# Neck
def _forward(self):
body_feats = self.backbone(self.inputs)
if self.neck is not None:
body_feats, spatial_scale = self.neck(body_feats)
# RPN
# rpn_head returns two list: rpn_feat, rpn_head_out
# each element in rpn_feats contains rpn feature on each level,
# and the length is 1 when the neck is not applied.
# each element in rpn_head_out contains (rpn_rois_score, rpn_rois_delta)
rpn_feat, self.rpn_head_out = self.rpn_head(self.inputs, body_feats)
# Anchor
# anchor_out returns a list,
# each element contains (anchor, anchor_var)
self.anchor_out = self.anchor(rpn_feat)
# Proposal RoI
# compute targets here when training
rois = self.proposal(self.inputs, self.rpn_head_out, self.anchor_out,
self.training)
# BBox Head
bbox_feat, self.bbox_head_out, bbox_head_feat_func = self.bbox_head(
body_feats, rois, spatial_scale)
rois_has_mask_int32 = None
if not self.training:
bbox_pred, bboxes = self.bbox_head.get_prediction(
self.bbox_head_out, rois)
# Refine bbox by the output from bbox_head at test stage
self.bboxes = self.bbox_post_process(bbox_pred, bboxes,
self.inputs['im_shape'],
self.inputs['scale_factor'])
body_feats = self.neck(body_feats)
if self.training:
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
self.inputs)
rois, rois_num = self.bbox_head.get_assigned_rois()
bbox_targets = self.bbox_head.get_assigned_targets()
# Mask Head needs bbox_feat in Mask RCNN
mask_loss = self.mask_head(body_feats, rois, rois_num, self.inputs,
bbox_targets, bbox_feat)
return rpn_loss, bbox_loss, mask_loss
else:
# Proposal RoI for Mask branch
# bboxes update at training stage only
bbox_targets = self.proposal.get_targets()[0]
self.bboxes, rois_has_mask_int32 = self.mask(self.inputs, rois,
bbox_targets)
# Mask Head
self.mask_head_out = self.mask_head(
self.inputs, body_feats, self.bboxes, bbox_feat,
rois_has_mask_int32, spatial_scale, bbox_head_feat_func)
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
preds, feat_func = self.bbox_head(body_feats, rois, rois_num, None)
def get_loss(self, ):
loss = {}
# RPN loss
rpn_loss_inputs = self.anchor.generate_loss_inputs(
self.inputs, self.rpn_head_out, self.anchor_out)
loss_rpn = self.rpn_head.get_loss(rpn_loss_inputs)
loss.update(loss_rpn)
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
# BBox loss
bbox_targets = self.proposal.get_targets()
loss_bbox = self.bbox_head.get_loss([self.bbox_head_out], bbox_targets)
loss.update(loss_bbox)
bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
im_shape, scale_factor)
mask_out = self.mask_head(
body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func)
# Mask loss
mask_targets = self.mask.get_targets()
loss_mask = self.mask_head.get_loss(self.mask_head_out, mask_targets)
loss.update(loss_mask)
# rescale the prediction back to origin image
bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
im_shape, scale_factor)
origin_shape = self.bbox_post_process.get_origin_shape()
mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
bbox_num, origin_shape)
return bbox_pred, bbox_num, mask_pred
def get_loss(self, ):
bbox_loss, mask_loss, rpn_loss = self._forward()
loss = {}
loss.update(rpn_loss)
loss.update(bbox_loss)
loss.update(mask_loss)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
bbox, bbox_num = self.bboxes
bbox_pred, bbox_num, mask_pred = self._forward()
label = bbox_pred[:, 0]
score = bbox_pred[:, 1]
bbox = bbox_pred[:, 2:]
output = {
'label': label,
'score': score,
'bbox': bbox,
'bbox_num': bbox_num,
'mask': self.mask_head_out
'mask': mask_pred,
}
return output
......@@ -31,8 +31,8 @@ class BaseArch(nn.Layer):
inputs[k] = data[i]
return inputs
def model_arch(self):
raise NotImplementedError("Should implement model_arch method!")
def model_arch(self, ):
pass
def get_loss(self, ):
raise NotImplementedError("Should implement get_loss method!")
......
......@@ -13,15 +13,16 @@
# limitations under the License.
import math
from numbers import Integral
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from ppdet.core.workspace import register, serializable
from paddle.regularizer import L2Decay
from .name_adapter import NameAdapter
from numbers import Integral
from ppdet.modeling.layers import DeformableConvV2
from .name_adapter import NameAdapter
from ..shape_spec import ShapeSpec
__all__ = ['ResNet', 'Res5Head']
......@@ -62,7 +63,7 @@ class ConvNormLayer(nn.Layer):
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(
weight_attr=paddle.ParamAttr(
learning_rate=lr, name=name + "_weights"),
bias_attr=False)
else:
......@@ -73,19 +74,19 @@ class ConvNormLayer(nn.Layer):
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(
learning_rate=lr, name=name + "_weights"),
weight_attr=paddle.ParamAttr(
learning_rate=lr, name=name + '_weights'),
bias_attr=False,
name=name)
bn_name = name_adapter.fix_conv_norm_name(name)
norm_lr = 0. if freeze_norm else lr
param_attr = ParamAttr(
param_attr = paddle.ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay),
name=bn_name + "_scale",
trainable=False if freeze_norm else True)
bias_attr = ParamAttr(
bias_attr = paddle.ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay),
name=bn_name + "_offset",
......@@ -483,10 +484,12 @@ class ResNet(nn.Layer):
lr=1.0,
name=_name))
self.pool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
ch_in_list = [64, 256, 512, 1024]
ch_out_list = [64, 128, 256, 512]
self.expansion = 4 if depth >= 50 else 1
self._out_channels = [self.expansion * v for v in ch_out_list]
self._out_strides = [4, 8, 16, 32]
self.res_layers = []
for i in range(num_stages):
......@@ -514,10 +517,18 @@ class ResNet(nn.Layer):
dcn_v2=(i in self.dcn_v2_stages)))
self.res_layers.append(res_layer)
@property
def out_shape(self):
return [
ShapeSpec(
channels=self._out_channels[i], stride=self._out_strides[i])
for i in self.return_idx
]
def forward(self, inputs):
x = inputs['image']
conv1 = self.conv1(x)
x = self.pool(conv1)
x = F.max_pool2d(conv1, kernel_size=3, stride=2, padding=1)
outs = []
for idx, stage in enumerate(self.res_layers):
x = stage(x)
......@@ -530,16 +541,24 @@ class ResNet(nn.Layer):
@register
class Res5Head(nn.Layer):
def __init__(self, depth=50, feat_in=1024, feat_out=512):
def __init__(self, depth=50):
super(Res5Head, self).__init__()
feat_in, feat_out = [1024, 512]
if depth < 50:
feat_in = 256
na = NameAdapter(self)
self.res5_conv = []
self.res5 = self.add_sublayer(
'res5_roi_feat',
Blocks(
depth, feat_in, feat_out, count=3, name_adapter=na,
stage_num=5))
self.feat_out = feat_out * 4
self.feat_out = feat_out if depth < 50 else feat_out * 4
@property
def out_shape(self):
return [ShapeSpec(
channels=self.feat_out,
stride=32, )]
def forward(self, roi_feat, stage=0):
y = self.res5(roi_feat)
......
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from . import ops
@register
class Anchor(object):
__inject__ = ['anchor_generator', 'anchor_target_generator']
def __init__(self, anchor_generator, anchor_target_generator):
super(Anchor, self).__init__()
self.anchor_generator = anchor_generator
self.anchor_target_generator = anchor_target_generator
def __call__(self, rpn_feats):
anchors = []
num_level = len(rpn_feats)
for i, rpn_feat in enumerate(rpn_feats):
anchor, var = self.anchor_generator(rpn_feat, i)
anchors.append((anchor, var))
return anchors
def _get_target_input(self, rpn_feats, anchors):
rpn_score_list = []
rpn_delta_list = []
anchor_list = []
for (rpn_score, rpn_delta), (anchor, var) in zip(rpn_feats, anchors):
rpn_score = paddle.transpose(rpn_score, perm=[0, 2, 3, 1])
rpn_delta = paddle.transpose(rpn_delta, perm=[0, 2, 3, 1])
rpn_score = paddle.reshape(x=rpn_score, shape=(0, -1, 1))
rpn_delta = paddle.reshape(x=rpn_delta, shape=(0, -1, 4))
anchor = paddle.reshape(anchor, shape=(-1, 4))
var = paddle.reshape(var, shape=(-1, 4))
rpn_score_list.append(rpn_score)
rpn_delta_list.append(rpn_delta)
anchor_list.append(anchor)
rpn_scores = paddle.concat(rpn_score_list, axis=1)
rpn_deltas = paddle.concat(rpn_delta_list, axis=1)
anchors = paddle.concat(anchor_list)
return rpn_scores, rpn_deltas, anchors
def generate_loss_inputs(self, inputs, rpn_head_out, anchors):
if len(rpn_head_out) != len(anchors):
raise ValueError(
"rpn_head_out and anchors should have same length, "
" but received rpn_head_out' length is {} and anchors' "
" length is {}".format(len(rpn_head_out), len(anchors)))
rpn_score, rpn_delta, anchors = self._get_target_input(rpn_head_out,
anchors)
score_pred, roi_pred, score_tgt, roi_tgt, roi_weight = self.anchor_target_generator(
bbox_pred=rpn_delta,
cls_logits=rpn_score,
anchor_box=anchors,
gt_boxes=inputs['gt_bbox'],
is_crowd=inputs['is_crowd'],
im_info=inputs['im_info'])
outs = {
'rpn_score_pred': score_pred,
'rpn_score_target': score_tgt,
'rpn_rois_pred': roi_pred,
'rpn_rois_target': roi_tgt,
'rpn_rois_weight': roi_weight
}
return outs
@register
class Proposal(object):
__inject__ = ['proposal_generator', 'proposal_target_generator']
def __init__(self, proposal_generator, proposal_target_generator):
super(Proposal, self).__init__()
self.proposal_generator = proposal_generator
self.proposal_target_generator = proposal_target_generator
def generate_proposal(self, inputs, rpn_head_out, anchor_out, is_train):
# TODO: delete im_info
try:
im_shape = inputs['im_info']
except:
im_shape = inputs['im_shape']
rpn_rois_list = []
rpn_prob_list = []
rpn_rois_num_list = []
for (rpn_score, rpn_delta), (anchor, var) in zip(rpn_head_out,
anchor_out):
rpn_prob = F.sigmoid(rpn_score)
rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n = self.proposal_generator(
scores=rpn_prob,
bbox_deltas=rpn_delta,
anchors=anchor,
variances=var,
im_shape=im_shape,
is_train=is_train)
if len(rpn_head_out) == 1:
return rpn_rois, rpn_rois_num
rpn_rois_list.append(rpn_rois)
rpn_prob_list.append(rpn_rois_prob)
rpn_rois_num_list.append(rpn_rois_num)
start_level = 2
end_level = start_level + len(rpn_head_out)
rois_collect, rois_num_collect = ops.collect_fpn_proposals(
rpn_rois_list,
rpn_prob_list,
start_level,
end_level,
post_nms_top_n,
rois_num_per_level=rpn_rois_num_list)
return rois_collect, rois_num_collect
def generate_proposal_target(self,
inputs,
rois,
rois_num,
stage=0,
max_overlap=None):
outs = self.proposal_target_generator(
rpn_rois=rois,
rpn_rois_num=rois_num,
gt_classes=inputs['gt_class'],
is_crowd=inputs['is_crowd'],
gt_boxes=inputs['gt_bbox'],
im_info=inputs['im_info'],
stage=stage,
max_overlap=max_overlap)
rois = outs[0]
max_overlap = outs[-1]
rois_num = outs[-2]
targets = {
'labels_int32': outs[1],
'bbox_targets': outs[2],
'bbox_inside_weights': outs[3],
'bbox_outside_weights': outs[4]
}
return rois, rois_num, targets, max_overlap
def refine_bbox(self, roi, bbox_delta, stage=1):
out_dim = bbox_delta.shape[1] // 4
bbox_delta_r = paddle.reshape(bbox_delta, (-1, out_dim, 4))
bbox_delta_s = paddle.slice(
bbox_delta_r, axes=[1], starts=[1], ends=[2])
reg_weights = [
i / stage for i in self.proposal_target_generator.bbox_reg_weights
]
refined_bbox = ops.box_coder(
prior_box=roi,
prior_box_var=reg_weights,
target_box=bbox_delta_s,
code_type='decode_center_size',
box_normalized=False,
axis=1)
refined_bbox = paddle.reshape(refined_bbox, shape=[-1, 4])
return refined_bbox
def __call__(self,
inputs,
rpn_head_out,
anchor_out,
is_train=False,
stage=0,
proposal_out=None,
bbox_head_out=None,
max_overlap=None):
if stage == 0:
roi, rois_num = self.generate_proposal(inputs, rpn_head_out,
anchor_out, is_train)
self.targets_list = []
self.max_overlap = None
else:
bbox_delta = bbox_head_out[1]
roi = self.refine_bbox(proposal_out[0], bbox_delta, stage)
rois_num = proposal_out[1]
if is_train:
roi, rois_num, targets, self.max_overlap = self.generate_proposal_target(
inputs, roi, rois_num, stage, self.max_overlap)
self.targets_list.append(targets)
return roi, rois_num
def get_targets(self):
return self.targets_list
def get_max_overlap(self):
return self.max_overlap
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle
def bbox2delta(src_boxes, tgt_boxes, weights):
src_w = src_boxes[:, 2] - src_boxes[:, 0]
src_h = src_boxes[:, 3] - src_boxes[:, 1]
src_ctr_x = src_boxes[:, 0] + 0.5 * src_w
src_ctr_y = src_boxes[:, 1] + 0.5 * src_h
tgt_w = tgt_boxes[:, 2] - tgt_boxes[:, 0]
tgt_h = tgt_boxes[:, 3] - tgt_boxes[:, 1]
tgt_ctr_x = tgt_boxes[:, 0] + 0.5 * tgt_w
tgt_ctr_y = tgt_boxes[:, 1] + 0.5 * tgt_h
wx, wy, ww, wh = weights
dx = wx * (tgt_ctr_x - src_ctr_x) / src_w
dy = wy * (tgt_ctr_y - src_ctr_y) / src_h
dw = ww * paddle.log(tgt_w / src_w)
dh = wh * paddle.log(tgt_h / src_h)
deltas = paddle.stack((dx, dy, dw, dh), axis=1)
return deltas
def delta2bbox(deltas, boxes, weights):
clip_scale = math.log(1000.0 / 16)
if boxes.shape[0] == 0:
return paddle.zeros((0, deltas.shape[1]), dtype='float32')
widths = boxes[:, 2] - boxes[:, 0]
heights = boxes[:, 3] - boxes[:, 1]
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] / wx
dy = deltas[:, 1::4] / wy
dw = deltas[:, 2::4] / ww
dh = deltas[:, 3::4] / wh
# Prevent sending too large values into np.exp()
dw = paddle.clip(dw, max=clip_scale)
dh = paddle.clip(dh, max=clip_scale)
pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1)
pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1)
pred_w = paddle.exp(dw) * widths.unsqueeze(1)
pred_h = paddle.exp(dh) * heights.unsqueeze(1)
pred_boxes = paddle.zeros_like(deltas)
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
return pred_boxes
def expand_bbox(bboxes, scale):
w_half = (bboxes[:, 2] - bboxes[:, 0]) * .5
h_half = (bboxes[:, 3] - bboxes[:, 1]) * .5
x_c = (bboxes[:, 2] + bboxes[:, 0]) * .5
y_c = (bboxes[:, 3] + bboxes[:, 1]) * .5
w_half *= scale
h_half *= scale
bboxes_exp = np.zeros(bboxes.shape, dtype=np.float32)
bboxes_exp[:, 0] = x_c - w_half
bboxes_exp[:, 2] = x_c + w_half
bboxes_exp[:, 1] = y_c - h_half
bboxes_exp[:, 3] = y_c + h_half
return bboxes_exp
def clip_bbox(boxes, im_shape):
h, w = im_shape
x1 = boxes[:, 0].clip(0, w)
y1 = boxes[:, 1].clip(0, h)
x2 = boxes[:, 2].clip(0, w)
y2 = boxes[:, 3].clip(0, h)
return paddle.stack([x1, y1, x2, y2], axis=1)
def nonempty_bbox(boxes, min_size=0, return_mask=False):
w = boxes[:, 2] - boxes[:, 0]
h = boxes[:, 3] - boxes[:, 1]
mask = paddle.logical_and(w > min_size, w > min_size)
if return_mask:
return mask
keep = paddle.nonzero(mask).flatten()
return keep
def bbox_area(boxes):
return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
def bbox_overlaps(boxes1, boxes2):
area1 = bbox_area(boxes1)
area2 = bbox_area(boxes2)
xy_max = paddle.minimum(
paddle.unsqueeze(boxes1, 1)[:, :, 2:], boxes2[:, 2:])
xy_min = paddle.maximum(
paddle.unsqueeze(boxes1, 1)[:, :, :2], boxes2[:, :2])
width_height = xy_max - xy_min
width_height = width_height.clip(min=0)
inter = width_height.prod(axis=2)
overlaps = paddle.where(inter > 0, inter /
(paddle.unsqueeze(area1, 1) + area2 - inter),
paddle.zeros_like(inter))
return overlaps
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from . import rpn_head
from . import bbox_head
from . import mask_head
from . import yolo_head
......@@ -22,7 +21,6 @@ from . import fcos_head
from . import solov2_head
from . import ttf_head
from .rpn_head import *
from .bbox_head import *
from .mask_head import *
from .yolo_head import *
......
......@@ -13,234 +13,216 @@
# limitations under the License.
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import ReLU
from paddle.nn.initializer import Normal, XavierUniform
from paddle.regularizer import L2Decay
from ppdet.core.workspace import register
from ppdet.core.workspace import register, create
from ppdet.modeling import ops
from .roi_extractor import RoIAlign
from ..shape_spec import ShapeSpec
from ..bbox_utils import bbox2delta
@register
class TwoFCHead(nn.Layer):
__shared__ = ['roi_stages']
def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, roi_stages=1):
def __init__(self, in_dim=256, mlp_dim=1024, resolution=7):
super(TwoFCHead, self).__init__()
self.in_dim = in_dim
self.mlp_dim = mlp_dim
self.roi_stages = roi_stages
fan = in_dim * resolution * resolution
self.fc6_list = []
self.fc6_relu_list = []
self.fc7_list = []
self.fc7_relu_list = []
for stage in range(roi_stages):
fc6_name = 'fc6_{}'.format(stage)
fc7_name = 'fc7_{}'.format(stage)
lr_factor = 2**stage
fc6 = self.add_sublayer(
fc6_name,
nn.Linear(
lr_factor = 1.
self.fc6 = nn.Linear(
in_dim * resolution * resolution,
mlp_dim,
weight_attr=ParamAttr(
weight_attr=paddle.ParamAttr(
learning_rate=lr_factor,
initializer=XavierUniform(fan_out=fan)),
bias_attr=ParamAttr(
learning_rate=2. * lr_factor, regularizer=L2Decay(0.))))
fc6_relu = self.add_sublayer(fc6_name + 'act', ReLU())
fc7 = self.add_sublayer(
fc7_name,
nn.Linear(
initializer=XavierUniform(fan_out=fan)))
self.fc7 = nn.Linear(
mlp_dim,
mlp_dim,
weight_attr=ParamAttr(
learning_rate=lr_factor, initializer=XavierUniform()),
bias_attr=ParamAttr(
learning_rate=2. * lr_factor, regularizer=L2Decay(0.))))
fc7_relu = self.add_sublayer(fc7_name + 'act', ReLU())
self.fc6_list.append(fc6)
self.fc6_relu_list.append(fc6_relu)
self.fc7_list.append(fc7)
self.fc7_relu_list.append(fc7_relu)
def forward(self, rois_feat, stage=0):
rois_feat = paddle.flatten(rois_feat, start_axis=1, stop_axis=-1)
fc6 = self.fc6_list[stage](rois_feat)
fc6_relu = self.fc6_relu_list[stage](fc6)
fc7 = self.fc7_list[stage](fc6_relu)
fc7_relu = self.fc7_relu_list[stage](fc7)
return fc7_relu
weight_attr=paddle.ParamAttr(
learning_rate=lr_factor, initializer=XavierUniform()))
@register
class BBoxFeat(nn.Layer):
__inject__ = ['roi_extractor', 'head_feat']
@classmethod
def from_config(cls, cfg, input_shape):
s = input_shape
s = s[0] if isinstance(s, (list, tuple)) else s
return {'in_dim': s.channels}
def __init__(self, roi_extractor, head_feat):
super(BBoxFeat, self).__init__()
self.roi_extractor = roi_extractor
self.head_feat = head_feat
self.rois_feat_list = []
@property
def out_shape(self):
return [ShapeSpec(channels=self.mlp_dim, )]
def forward(self, body_feats, rois, spatial_scale, stage=0):
rois_feat = self.roi_extractor(body_feats, rois, spatial_scale)
bbox_feat = self.head_feat(rois_feat, stage)
return rois_feat, bbox_feat
def forward(self, rois_feat):
rois_feat = paddle.flatten(rois_feat, start_axis=1, stop_axis=-1)
fc6 = self.fc6(rois_feat)
fc6 = F.relu(fc6)
fc7 = self.fc7(fc6)
fc7 = F.relu(fc7)
return fc7
@register
class BBoxHead(nn.Layer):
__shared__ = ['num_classes', 'roi_stages']
__inject__ = ['bbox_feat']
__shared__ = ['num_classes']
__inject__ = ['bbox_assigner']
"""
head (nn.Layer): Extract feature in bbox head
in_channel (int): Input channel after RoI extractor
"""
def __init__(self,
bbox_feat,
in_feat=1024,
num_classes=81,
cls_agnostic=False,
roi_stages=1,
head,
in_channel,
roi_extractor=RoIAlign().__dict__,
bbox_assigner='BboxAssigner',
with_pool=False,
score_stage=[0, 1, 2],
delta_stage=[2]):
num_classes=80,
bbox_weight=[10., 10., 5., 5.]):
super(BBoxHead, self).__init__()
self.num_classes = num_classes
self.cls_agnostic = cls_agnostic
self.delta_dim = 2 if cls_agnostic else num_classes
self.bbox_feat = bbox_feat
self.roi_stages = roi_stages
self.bbox_score_list = []
self.bbox_delta_list = []
self.roi_feat_list = [[] for i in range(roi_stages)]
self.head = head
self.roi_extractor = roi_extractor
if isinstance(roi_extractor, dict):
self.roi_extractor = RoIAlign(**roi_extractor)
self.bbox_assigner = bbox_assigner
self.with_pool = with_pool
self.score_stage = score_stage
self.delta_stage = delta_stage
for stage in range(roi_stages):
score_name = 'bbox_score_{}'.format(stage)
delta_name = 'bbox_delta_{}'.format(stage)
lr_factor = 2**stage
bbox_score = self.add_sublayer(
score_name,
nn.Linear(
in_feat,
1 * self.num_classes,
weight_attr=ParamAttr(
learning_rate=lr_factor,
initializer=Normal(
mean=0.0, std=0.01)),
bias_attr=ParamAttr(
learning_rate=2. * lr_factor, regularizer=L2Decay(0.))))
bbox_delta = self.add_sublayer(
delta_name,
nn.Linear(
in_feat,
4 * self.delta_dim,
weight_attr=ParamAttr(
self.num_classes = num_classes
self.bbox_weight = bbox_weight
lr_factor = 1.
self.bbox_score = nn.Linear(
in_channel,
self.num_classes + 1,
weight_attr=paddle.ParamAttr(
learning_rate=lr_factor, initializer=Normal(
mean=0.0, std=0.01)))
self.bbox_delta = nn.Linear(
in_channel,
4 * self.num_classes,
weight_attr=paddle.ParamAttr(
learning_rate=lr_factor,
initializer=Normal(
mean=0.0, std=0.001)),
bias_attr=ParamAttr(
learning_rate=2. * lr_factor, regularizer=L2Decay(0.))))
self.bbox_score_list.append(bbox_score)
self.bbox_delta_list.append(bbox_delta)
def forward(self,
body_feats=None,
rois=None,
spatial_scale=None,
stage=0,
roi_stage=-1):
if rois is not None:
rois_feat, bbox_feat = self.bbox_feat(body_feats, rois,
spatial_scale, stage)
self.roi_feat_list[stage] = rois_feat
mean=0.0, std=0.001)))
self.assigned_label = None
self.assigned_rois = None
@classmethod
def from_config(cls, cfg, input_shape):
roi_pooler = cfg['roi_extractor']
assert isinstance(roi_pooler, dict)
kwargs = RoIAlign.from_config(cfg, input_shape)
roi_pooler.update(kwargs)
kwargs = {'input_shape': input_shape}
head = create(cfg['head'], **kwargs)
return {
'roi_extractor': roi_pooler,
'head': head,
'in_channel': head.out_shape[0].channels
}
def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None):
"""
body_feats (list[Tensor]):
rois (Tensor):
rois_num (Tensor):
inputs (dict{Tensor}):
"""
if self.training:
rois, rois_num, _, targets = self.bbox_assigner(rois, rois_num,
inputs)
self.assigned_rois = (rois, rois_num)
self.assigned_targets = targets
rois_feat = self.roi_extractor(body_feats, rois, rois_num)
bbox_feat = self.head(rois_feat)
#if self.with_pool:
if len(bbox_feat.shape) > 2 and bbox_feat.shape[-1] > 1:
feat = F.adaptive_avg_pool2d(bbox_feat, output_size=1)
feat = paddle.squeeze(feat, axis=[2, 3])
else:
rois_feat = self.roi_feat_list[roi_stage]
bbox_feat = self.bbox_feat.head_feat(rois_feat, stage)
if self.with_pool:
bbox_feat_ = F.adaptive_avg_pool2d(bbox_feat, output_size=1)
bbox_feat_ = paddle.squeeze(bbox_feat_, axis=[2, 3])
scores = self.bbox_score_list[stage](bbox_feat_)
deltas = self.bbox_delta_list[stage](bbox_feat_)
feat = bbox_feat
scores = self.bbox_score(feat)
deltas = self.bbox_delta(feat)
if self.training:
loss = self.get_loss(scores, deltas, targets, rois)
return loss, bbox_feat
else:
scores = self.bbox_score_list[stage](bbox_feat)
deltas = self.bbox_delta_list[stage](bbox_feat)
bbox_head_out = (scores, deltas)
return bbox_feat, bbox_head_out, self.bbox_feat.head_feat
def _get_head_loss(self, score, delta, target):
# bbox cls
labels_int64 = paddle.cast(x=target['labels_int32'], dtype='int64')
labels_int64.stop_gradient = True
loss_bbox_cls = F.softmax_with_cross_entropy(
logits=score, label=labels_int64)
loss_bbox_cls = paddle.mean(loss_bbox_cls)
pred = self.get_prediction(scores, deltas)
return pred, self.head
def get_loss(self, scores, deltas, targets, rois):
"""
scores (Tensor): scores from bbox head outputs
deltas (Tensor): deltas from bbox head outputs
targets (list[List[Tensor]]): bbox targets containing tgt_labels, tgt_bboxes and tgt_gt_inds
rois (List[Tensor]): RoIs generated in each batch
"""
# TODO: better pass args
tgt_labels, tgt_bboxes, tgt_gt_inds = targets
tgt_labels = paddle.concat(tgt_labels) if len(
tgt_labels) > 1 else tgt_labels[0]
tgt_labels = tgt_labels.cast('int64')
tgt_labels.stop_gradient = True
loss_bbox_cls = F.cross_entropy(
input=scores, label=tgt_labels, reduction='mean')
# bbox reg
loss_bbox_reg = ops.smooth_l1(
input=delta,
label=target['bbox_targets'],
inside_weight=target['bbox_inside_weights'],
outside_weight=target['bbox_outside_weights'],
sigma=1.0)
loss_bbox_reg = paddle.mean(loss_bbox_reg)
return loss_bbox_cls, loss_bbox_reg
def get_loss(self, bbox_head_out, targets):
loss_bbox = {}
cls_agnostic_bbox_reg = deltas.shape[1] == 4
fg_inds = paddle.nonzero(
paddle.logical_and(tgt_labels >= 0, tgt_labels <
self.num_classes)).flatten()
if cls_agnostic_bbox_reg:
reg_delta = paddle.gather(deltas, fg_inds)
else:
fg_gt_classes = paddle.gather(tgt_labels, fg_inds)
reg_row_inds = paddle.arange(fg_gt_classes.shape[0]).unsqueeze(1)
reg_row_inds = paddle.tile(reg_row_inds, [1, 4]).reshape([-1, 1])
reg_col_inds = 4 * fg_gt_classes.unsqueeze(1) + paddle.arange(4)
reg_col_inds = reg_col_inds.reshape([-1, 1])
reg_inds = paddle.concat([reg_row_inds, reg_col_inds], axis=1)
reg_delta = paddle.gather(deltas, fg_inds)
reg_delta = paddle.gather_nd(reg_delta, reg_inds).reshape([-1, 4])
rois = paddle.concat(rois) if len(rois) > 1 else rois[0]
tgt_bboxes = paddle.concat(tgt_bboxes) if len(
tgt_bboxes) > 1 else tgt_bboxes[0]
reg_target = bbox2delta(rois, tgt_bboxes, self.bbox_weight)
reg_target = paddle.gather(reg_target, fg_inds)
reg_target.stop_gradient = True
loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum(
) / tgt_labels.shape[0]
cls_name = 'loss_bbox_cls'
reg_name = 'loss_bbox_reg'
for lvl, (bboxhead, target) in enumerate(zip(bbox_head_out, targets)):
score, delta = bboxhead
if len(targets) > 1:
cls_name = 'loss_bbox_cls_{}'.format(lvl)
reg_name = 'loss_bbox_reg_{}'.format(lvl)
loss_bbox_cls, loss_bbox_reg = self._get_head_loss(score, delta,
target)
loss_weight = 1. / 2**lvl
loss_bbox[cls_name] = loss_bbox_cls * loss_weight
loss_bbox[reg_name] = loss_bbox_reg * loss_weight
loss_bbox = {}
loss_bbox[cls_name] = loss_bbox_cls
loss_bbox[reg_name] = loss_bbox_reg
return loss_bbox
def get_prediction(self, bbox_head_out, rois):
proposal, proposal_num = rois
score, delta = bbox_head_out
def get_prediction(self, score, delta):
bbox_prob = F.softmax(score)
delta = paddle.reshape(delta, (-1, self.delta_dim, 4))
bbox_pred = (delta, bbox_prob)
return bbox_pred, rois
def get_cascade_prediction(self, bbox_head_out, rois):
proposal_list = []
prob_list = []
delta_list = []
for stage in range(len(rois)):
proposals = rois[stage]
bboxhead = bbox_head_out[stage]
score, delta = bboxhead
proposal, proposal_num = proposals
if stage in self.score_stage:
if stage < 2:
_, head_out, _ = self(stage=stage, roi_stage=-1)
score = head_out[0]
return delta, bbox_prob
bbox_prob = F.softmax(score)
prob_list.append(bbox_prob)
if stage in self.delta_stage:
proposal_list.append(proposal)
delta_list.append(delta)
bbox_prob = paddle.mean(paddle.stack(prob_list), axis=0)
delta = paddle.mean(paddle.stack(delta_list), axis=0)
proposal = paddle.mean(paddle.stack(proposal_list), axis=0)
delta = paddle.reshape(delta, (-1, self.delta_dim, 4))
if self.cls_agnostic:
N, C, M = delta.shape
delta = delta[:, 1:2, :]
delta = paddle.expand(delta, [N, self.num_classes, M])
bboxes = (proposal, proposal_num)
bbox_pred = (delta, bbox_prob)
return bbox_pred, bboxes
def get_head(self, ):
return self.head
def get_assigned_targets(self, ):
return self.assigned_targets
def get_assigned_rois(self, ):
return self.assigned_rois
......@@ -13,195 +13,196 @@
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn import Layer, Sequential
from paddle.nn import Conv2D, Conv2DTranspose, ReLU
from paddle.nn.initializer import KaimingNormal
from paddle.regularizer import L2Decay
from ppdet.core.workspace import register
from ppdet.core.workspace import register, create
from ppdet.modeling import ops
from .roi_extractor import RoIAlign
@register
class MaskFeat(Layer):
__inject__ = ['mask_roi_extractor']
def __init__(self,
mask_roi_extractor=None,
num_convs=0,
feat_in=2048,
feat_out=256,
mask_num_stages=1,
share_bbox_feat=False):
@register
class MaskFeat(nn.Layer):
def __init__(self, num_convs=0, in_channels=2048, out_channels=256):
super(MaskFeat, self).__init__()
self.num_convs = num_convs
self.feat_in = feat_in
self.feat_out = feat_out
self.mask_roi_extractor = mask_roi_extractor
self.mask_num_stages = mask_num_stages
self.share_bbox_feat = share_bbox_feat
self.upsample_module = []
fan_conv = feat_out * 3 * 3
fan_deconv = feat_out * 2 * 2
for i in range(self.mask_num_stages):
name = 'stage_{}'.format(i)
mask_conv = Sequential()
for j in range(self.num_convs):
conv_name = 'mask_inter_feat_{}'.format(j + 1)
self.in_channels = in_channels
self.out_channels = out_channels
fan_conv = out_channels * 3 * 3
fan_deconv = out_channels * 2 * 2
mask_conv = nn.Sequential()
for i in range(self.num_convs):
conv_name = 'mask_inter_feat_{}'.format(i + 1)
mask_conv.add_sublayer(
conv_name,
Conv2D(
in_channels=feat_in if j == 0 else feat_out,
out_channels=feat_out,
nn.Conv2D(
in_channels=in_channels if i == 0 else out_channels,
out_channels=out_channels,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
initializer=KaimingNormal(fan_in=fan_conv)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
mask_conv.add_sublayer(conv_name + 'act', ReLU())
weight_attr=paddle.ParamAttr(
initializer=KaimingNormal(fan_in=fan_conv))))
mask_conv.add_sublayer(conv_name + 'act', nn.ReLU())
mask_conv.add_sublayer(
'conv5_mask',
Conv2DTranspose(
in_channels=self.feat_in,
out_channels=self.feat_out,
nn.Conv2DTranspose(
in_channels=self.in_channels,
out_channels=self.out_channels,
kernel_size=2,
stride=2,
weight_attr=ParamAttr(
initializer=KaimingNormal(fan_in=fan_deconv)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
mask_conv.add_sublayer('conv5_mask' + 'act', ReLU())
upsample = self.add_sublayer(name, mask_conv)
self.upsample_module.append(upsample)
weight_attr=paddle.ParamAttr(
initializer=KaimingNormal(fan_in=fan_deconv))))
mask_conv.add_sublayer('conv5_mask' + 'act', nn.ReLU())
self.upsample = mask_conv
def forward(self,
body_feats,
bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage=0,
bbox_head_feat_func=None):
if self.share_bbox_feat and mask_index is not None:
rois_feat = paddle.gather(bbox_feat, mask_index)
else:
rois_feat = self.mask_roi_extractor(body_feats, bboxes,
spatial_scale)
if self.share_bbox_feat and bbox_head_feat_func is not None and not self.training:
rois_feat = bbox_head_feat_func(rois_feat)
@classmethod
def from_config(cls, cfg, input_shape):
if isinstance(input_shape, (list, tuple)):
input_shape = input_shape[0]
return {'in_channels': input_shape.channels, }
def out_channel(self):
return self.out_channels
# upsample
mask_feat = self.upsample_module[stage](rois_feat)
return mask_feat
def forward(self, feats):
return self.upsample(feats)
@register
class MaskHead(Layer):
__shared__ = ['num_classes', 'mask_num_stages']
__inject__ = ['mask_feat']
class MaskHead(nn.Layer):
__shared__ = ['num_classes']
__inject__ = ['mask_assigner']
def __init__(self,
mask_feat,
feat_in=256,
num_classes=81,
mask_num_stages=1):
head,
roi_extractor=RoIAlign().__dict__,
mask_assigner='MaskAssigner',
num_classes=80,
share_bbox_feat=False):
super(MaskHead, self).__init__()
self.mask_feat = mask_feat
self.feat_in = feat_in
self.num_classes = num_classes
self.mask_num_stages = mask_num_stages
self.mask_fcn_logits = []
for i in range(self.mask_num_stages):
name = 'mask_fcn_logits_{}'.format(i)
self.mask_fcn_logits.append(
self.add_sublayer(
name,
Conv2D(
in_channels=self.feat_in,
self.roi_extractor = roi_extractor
if isinstance(roi_extractor, dict):
self.roi_extractor = RoIAlign(**roi_extractor)
self.head = head
self.in_channels = head.out_channel()
self.mask_assigner = mask_assigner
self.share_bbox_feat = share_bbox_feat
self.bbox_head = None
self.mask_fcn_logits = nn.Conv2D(
in_channels=self.in_channels,
out_channels=self.num_classes,
kernel_size=1,
weight_attr=ParamAttr(initializer=KaimingNormal(
fan_in=self.num_classes)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.0)))))
weight_attr=paddle.ParamAttr(initializer=KaimingNormal(
fan_in=self.num_classes)))
def forward_train(self,
body_feats,
bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage=0):
# feat
mask_feat = self.mask_feat(body_feats, bboxes, bbox_feat, mask_index,
spatial_scale, stage)
# logits
mask_head_out = self.mask_fcn_logits[stage](mask_feat)
return mask_head_out
@classmethod
def from_config(cls, cfg, input_shape):
roi_pooler = cfg['roi_extractor']
assert isinstance(roi_pooler, dict)
kwargs = RoIAlign.from_config(cfg, input_shape)
roi_pooler.update(kwargs)
kwargs = {'input_shape': input_shape}
head = create(cfg['head'], **kwargs)
return {
'roi_extractor': roi_pooler,
'head': head,
}
def get_loss(self, mask_logits, mask_label, mask_target, mask_weight):
mask_label = F.one_hot(mask_label, self.num_classes).unsqueeze([2, 3])
mask_label = paddle.expand_as(mask_label, mask_logits)
mask_label.stop_gradient = True
mask_pred = paddle.gather_nd(mask_logits, paddle.nonzero(mask_label))
shape = mask_logits.shape
mask_pred = paddle.reshape(mask_pred, [shape[0], shape[2], shape[3]])
mask_target = mask_target.cast('float32')
mask_weight = mask_weight.unsqueeze([1, 2])
loss_mask = F.binary_cross_entropy_with_logits(
mask_pred, mask_target, weight=mask_weight, reduction="mean")
return loss_mask
def forward_train(self, body_feats, rois, rois_num, inputs, targets,
bbox_feat):
"""
body_feats (list[Tensor]): Multi-level backbone features
rois (list[Tensor]): Proposals for each batch with shape [N, 4]
rois_num (Tensor): The number of proposals for each batch
inputs (dict): ground truth info
"""
#assert self.bbox_head
tgt_labels, _, tgt_gt_inds = targets
rois, rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights = self.mask_assigner(
rois, tgt_labels, tgt_gt_inds, inputs)
if self.share_bbox_feat:
rois_feat = paddle.gather(bbox_feat, mask_index)
else:
rois_feat = self.roi_extractor(body_feats, rois, rois_num)
mask_feat = self.head(rois_feat)
mask_logits = self.mask_fcn_logits(mask_feat)
loss_mask = self.get_loss(mask_logits, tgt_classes, tgt_masks,
tgt_weights)
return {'loss_mask': loss_mask}
def forward_test(self,
scale_factor,
body_feats,
bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage=0,
bbox_head_feat_func=None):
bbox, bbox_num = bboxes
if bbox.shape[0] == 0:
mask_head_out = paddle.full([1, 6], -1)
rois,
rois_num,
scale_factor,
feat_func=None):
"""
body_feats (list[Tensor]): Multi-level backbone features
rois (Tensor): Prediction from bbox head with shape [N, 6]
rois_num (Tensor): The number of prediction for each batch
scale_factor (Tensor): The scale factor from origin size to input size
"""
if rois.shape[0] == 0:
mask_out = paddle.full([1, 1, 1, 1], -1)
else:
bbox = [rois[:, 2:]]
labels = rois[:, 0].cast('int32')
rois_feat = self.roi_extractor(body_feats, bbox, rois_num)
if self.share_bbox_feat:
assert feat_func is not None
rois_feat = feat_func(rois_feat)
mask_feat = self.head(rois_feat)
mask_logit = self.mask_fcn_logits(mask_feat)
mask_num_class = mask_logit.shape[1]
if mask_num_class == 1:
mask_out = F.sigmoid(mask_logit)
else:
scale_factor_list = []
for idx in range(bbox_num.shape[0]):
num = bbox_num[idx]
scale = scale_factor[idx, 0]
ones = paddle.ones(num)
scale_expand = ones * scale
scale_factor_list.append(scale_expand)
scale_factor_list = paddle.cast(
paddle.concat(scale_factor_list), 'float32')
scale_factor_list = paddle.reshape(scale_factor_list, shape=[-1, 1])
scaled_bbox = paddle.multiply(bbox[:, 2:], scale_factor_list)
scaled_bboxes = (scaled_bbox, bbox_num)
mask_feat = self.mask_feat(body_feats, scaled_bboxes, bbox_feat,
mask_index, spatial_scale, stage,
bbox_head_feat_func)
mask_logit = self.mask_fcn_logits[stage](mask_feat)
mask_head_out = F.sigmoid(mask_logit)
return mask_head_out
num_masks = mask_logit.shape[0]
pred_masks = paddle.split(mask_logit, num_masks)
mask_out = []
# TODO: need to optimize gather
for i, pred_mask in enumerate(pred_masks):
mask = paddle.gather(pred_mask, labels[i], axis=1)
mask_out.append(mask)
mask_out = F.sigmoid(paddle.concat(mask_out))
return mask_out
def forward(self,
inputs,
body_feats,
bboxes,
bbox_feat,
mask_index,
spatial_scale,
bbox_head_feat_func=None,
stage=0):
rois,
rois_num,
inputs,
targets=None,
bbox_feat=None,
feat_func=None):
if self.training:
mask_head_out = self.forward_train(body_feats, bboxes, bbox_feat,
mask_index, spatial_scale, stage)
return self.forward_train(body_feats, rois, rois_num, inputs,
targets, bbox_feat)
else:
scale_factor = inputs['scale_factor']
mask_head_out = self.forward_test(
scale_factor, body_feats, bboxes, bbox_feat, mask_index,
spatial_scale, stage, bbox_head_feat_func)
return mask_head_out
def get_loss(self, mask_head_out, mask_target):
mask_logits = paddle.flatten(mask_head_out, start_axis=1, stop_axis=-1)
mask_label = paddle.cast(x=mask_target, dtype='float32')
mask_label.stop_gradient = True
loss_mask = ops.sigmoid_cross_entropy_with_logits(
input=mask_logits,
label=mask_label,
ignore_index=-1,
normalize=True)
loss_mask = paddle.sum(loss_mask)
return {'loss_mask': loss_mask}
im_scale = inputs['scale_factor']
return self.forward_test(body_feats, rois, rois_num, im_scale,
feat_func)
......@@ -17,32 +17,47 @@ from ppdet.core.workspace import register
from ppdet.modeling import ops
def _to_list(v):
if not isinstance(v, (list, tuple)):
return [v]
return v
@register
class RoIAlign(object):
def __init__(self,
resolution=14,
spatial_scale=0.0625,
sampling_ratio=0,
canconical_level=4,
canonical_size=224,
start_level=0,
end_level=3):
end_level=3,
aligned=False):
super(RoIAlign, self).__init__()
self.resolution = resolution
self.spatial_scale = _to_list(spatial_scale)
self.sampling_ratio = sampling_ratio
self.canconical_level = canconical_level
self.canonical_size = canonical_size
self.start_level = start_level
self.end_level = end_level
self.aligned = aligned
def __call__(self, feats, rois, spatial_scale):
roi, rois_num = rois
if self.start_level == self.end_level:
@classmethod
def from_config(cls, cfg, input_shape):
return {'spatial_scale': [1. / i.stride for i in input_shape]}
def __call__(self, feats, roi, rois_num):
roi = paddle.concat(roi) if len(roi) > 1 else roi[0]
if len(feats) == 1:
rois_feat = ops.roi_align(
feats[self.start_level],
roi,
self.resolution,
spatial_scale,
rois_num=rois_num)
self.spatial_scale[0],
rois_num=rois_num,
aligned=self.aligned)
else:
offset = 2
k_min = self.start_level + offset
......@@ -60,9 +75,11 @@ class RoIAlign(object):
feats[lvl],
rois_dist[lvl],
self.resolution,
spatial_scale[lvl],
self.spatial_scale[lvl],
sampling_ratio=self.sampling_ratio,
rois_num=rois_num_dist[lvl])
rois_num=rois_num_dist[lvl],
aligned=self.aligned)
if roi_feat.shape[0] > 0:
rois_feat_list.append(roi_feat)
rois_feat_shuffle = paddle.concat(rois_feat_list)
rois_feat = paddle.gather(rois_feat_shuffle, restore_index)
......
......@@ -27,9 +27,9 @@ from paddle.nn.initializer import Normal, Constant
from paddle.regularizer import L2Decay
from ppdet.core.workspace import register, serializable
from ppdet.py_op.target import generate_rpn_anchor_target, generate_proposal_target, generate_mask_target
from ppdet.py_op.post_process import bbox_post_process
from ppdet.modeling.bbox_utils import delta2bbox
from . import ops
from paddle.vision.ops import DeformConv2D
......@@ -223,53 +223,6 @@ class AnchorGeneratorRPN(object):
return anchor, var
@register
@serializable
class AnchorTargetGeneratorRPN(object):
def __init__(self,
batch_size_per_im=256,
straddle_thresh=0.,
fg_fraction=0.5,
positive_overlap=0.7,
negative_overlap=0.3,
use_random=True):
super(AnchorTargetGeneratorRPN, self).__init__()
self.batch_size_per_im = batch_size_per_im
self.straddle_thresh = straddle_thresh
self.fg_fraction = fg_fraction
self.positive_overlap = positive_overlap
self.negative_overlap = negative_overlap
self.use_random = use_random
def __call__(self, cls_logits, bbox_pred, anchor_box, gt_boxes, is_crowd,
im_info):
anchor_box = anchor_box.numpy()
gt_boxes = gt_boxes.numpy()
is_crowd = is_crowd.numpy()
im_info = im_info.numpy()
loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights = generate_rpn_anchor_target(
anchor_box, gt_boxes, is_crowd, im_info, self.straddle_thresh,
self.batch_size_per_im, self.positive_overlap,
self.negative_overlap, self.fg_fraction, self.use_random)
loc_indexes = to_tensor(loc_indexes)
score_indexes = to_tensor(score_indexes)
tgt_labels = to_tensor(tgt_labels)
tgt_bboxes = to_tensor(tgt_bboxes)
bbox_inside_weights = to_tensor(bbox_inside_weights)
loc_indexes.stop_gradient = True
score_indexes.stop_gradient = True
tgt_labels.stop_gradient = True
cls_logits = paddle.reshape(x=cls_logits, shape=(-1, ))
bbox_pred = paddle.reshape(x=bbox_pred, shape=(-1, 4))
pred_cls_logits = paddle.gather(cls_logits, score_indexes)
pred_bbox_pred = paddle.gather(bbox_pred, loc_indexes)
return pred_cls_logits, pred_bbox_pred, tgt_labels, tgt_bboxes, bbox_inside_weights
@register
@serializable
class AnchorGeneratorSSD(object):
......@@ -335,248 +288,52 @@ class AnchorGeneratorSSD(object):
return boxes
@register
@serializable
class ProposalGenerator(object):
__append_doc__ = True
def __init__(self,
train_pre_nms_top_n=12000,
train_post_nms_top_n=2000,
infer_pre_nms_top_n=6000,
infer_post_nms_top_n=1000,
nms_thresh=.5,
min_size=.1,
eta=1.):
super(ProposalGenerator, self).__init__()
self.train_pre_nms_top_n = train_pre_nms_top_n
self.train_post_nms_top_n = train_post_nms_top_n
self.infer_pre_nms_top_n = infer_pre_nms_top_n
self.infer_post_nms_top_n = infer_post_nms_top_n
self.nms_thresh = nms_thresh
self.min_size = min_size
self.eta = eta
def __call__(self,
scores,
bbox_deltas,
anchors,
variances,
im_shape,
is_train=False):
pre_nms_top_n = self.train_pre_nms_top_n if is_train else self.infer_pre_nms_top_n
post_nms_top_n = self.train_post_nms_top_n if is_train else self.infer_post_nms_top_n
# TODO delete im_info
if im_shape.shape[1] > 2:
import paddle.fluid as fluid
rpn_rois, rpn_rois_prob, rpn_rois_num = fluid.layers.generate_proposals(
scores,
bbox_deltas,
im_shape,
anchors,
variances,
pre_nms_top_n=pre_nms_top_n,
post_nms_top_n=post_nms_top_n,
nms_thresh=self.nms_thresh,
min_size=self.min_size,
eta=self.eta,
return_rois_num=True)
else:
rpn_rois, rpn_rois_prob, rpn_rois_num = ops.generate_proposals(
scores,
bbox_deltas,
im_shape,
anchors,
variances,
pre_nms_top_n=pre_nms_top_n,
post_nms_top_n=post_nms_top_n,
nms_thresh=self.nms_thresh,
min_size=self.min_size,
eta=self.eta,
return_rois_num=True)
return rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n
@register
@serializable
class ProposalTargetGenerator(object):
__shared__ = ['num_classes']
def __init__(self,
batch_size_per_im=512,
fg_fraction=.25,
fg_thresh=[.5, ],
bg_thresh_hi=[.5, ],
bg_thresh_lo=[0., ],
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
num_classes=81,
use_random=True,
is_cls_agnostic=False):
super(ProposalTargetGenerator, self).__init__()
self.batch_size_per_im = batch_size_per_im
self.fg_fraction = fg_fraction
self.fg_thresh = fg_thresh
self.bg_thresh_hi = bg_thresh_hi
self.bg_thresh_lo = bg_thresh_lo
self.bbox_reg_weights = bbox_reg_weights
self.num_classes = num_classes
self.use_random = use_random
self.is_cls_agnostic = is_cls_agnostic
def __call__(self,
rpn_rois,
rpn_rois_num,
gt_classes,
is_crowd,
gt_boxes,
im_info,
stage=0,
max_overlap=None):
rpn_rois = rpn_rois.numpy()
rpn_rois_num = rpn_rois_num.numpy()
gt_classes = gt_classes.numpy()
gt_boxes = gt_boxes.numpy()
is_crowd = is_crowd.numpy()
im_info = im_info.numpy()
max_overlap = max_overlap if max_overlap is None else max_overlap.numpy(
)
reg_weights = [i / (stage + 1) for i in self.bbox_reg_weights]
is_cascade = True if stage > 0 else False
num_classes = 2 if is_cascade else self.num_classes
outs = generate_proposal_target(
rpn_rois, rpn_rois_num, gt_classes, is_crowd, gt_boxes, im_info,
self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage],
self.bg_thresh_hi[stage], self.bg_thresh_lo[stage], reg_weights,
num_classes, self.use_random, self.is_cls_agnostic, is_cascade,
max_overlap)
outs = [to_tensor(v) for v in outs]
for v in outs:
v.stop_gradient = True
return outs
@register
@serializable
class MaskTargetGenerator(object):
__shared__ = ['num_classes', 'mask_resolution']
def __init__(self, num_classes=81, mask_resolution=14):
super(MaskTargetGenerator, self).__init__()
self.num_classes = num_classes
self.mask_resolution = mask_resolution
def __call__(self, im_info, gt_classes, is_crowd, gt_segms, rois, rois_num,
labels_int32):
im_info = im_info.numpy()
gt_classes = gt_classes.numpy()
is_crowd = is_crowd.numpy()
gt_segms = gt_segms.numpy()
rois = rois.numpy()
rois_num = rois_num.numpy()
labels_int32 = labels_int32.numpy()
outs = generate_mask_target(im_info, gt_classes, is_crowd, gt_segms,
rois, rois_num, labels_int32,
self.num_classes, self.mask_resolution)
outs = [to_tensor(v) for v in outs]
for v in outs:
v.stop_gradient = True
return outs
@register
@serializable
class RCNNBox(object):
__shared__ = ['num_classes', 'batch_size']
def __init__(self,
num_classes=81,
batch_size=1,
prior_box_var=[0.1, 0.1, 0.2, 0.2],
prior_box_var=[10., 10., 5., 5.],
code_type="decode_center_size",
box_normalized=False,
axis=1,
var_weight=1.):
box_normalized=False):
super(RCNNBox, self).__init__()
self.num_classes = num_classes
self.batch_size = batch_size
self.prior_box_var = prior_box_var
self.code_type = code_type
self.box_normalized = box_normalized
self.axis = axis
self.var_weight = var_weight
def __call__(self, bbox_head_out, rois, im_shape, scale_factor):
bbox_pred, cls_prob = bbox_head_out
roi, rois_num = rois
origin_shape = im_shape / scale_factor
origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
scale_list = []
origin_shape_list = []
for idx in range(self.batch_size):
scale = scale_factor[idx, :][0]
for idx, roi_per_im in enumerate(roi):
rois_num_per_im = rois_num[idx]
expand_scale = paddle.expand(scale, [rois_num_per_im, 1])
scale_list.append(expand_scale)
expand_im_shape = paddle.expand(origin_shape[idx, :],
expand_im_shape = paddle.expand(im_shape[idx, :],
[rois_num_per_im, 2])
origin_shape_list.append(expand_im_shape)
scale = paddle.concat(scale_list)
origin_shape = paddle.concat(origin_shape_list)
bbox = roi / scale
prior_box_var = [i / self.var_weight for i in self.prior_box_var]
bbox = ops.box_coder(
prior_box=bbox,
prior_box_var=prior_box_var,
target_box=bbox_pred,
code_type=self.code_type,
box_normalized=self.box_normalized,
axis=self.axis)
# TODO: Updata box_clip
origin_h = paddle.unsqueeze(origin_shape[:, 0] - 1, axis=1)
origin_w = paddle.unsqueeze(origin_shape[:, 1] - 1, axis=1)
zeros = paddle.zeros(paddle.shape(origin_h), 'float32')
# [N, C*4]
bbox = paddle.concat(roi)
bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var)
scores = cls_prob[:, :-1]
# [N*C, 4]
bbox_num_class = bbox.shape[1] // 4
bbox = paddle.reshape(bbox, [-1, bbox_num_class, 4])
origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1)
origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1)
zeros = paddle.zeros_like(origin_h)
x1 = paddle.maximum(paddle.minimum(bbox[:, :, 0], origin_w), zeros)
y1 = paddle.maximum(paddle.minimum(bbox[:, :, 1], origin_h), zeros)
x2 = paddle.maximum(paddle.minimum(bbox[:, :, 2], origin_w), zeros)
y2 = paddle.maximum(paddle.minimum(bbox[:, :, 3], origin_h), zeros)
bbox = paddle.stack([x1, y1, x2, y2], axis=-1)
bboxes = (bbox, rois_num)
return bboxes, cls_prob
@register
@serializable
class DecodeClipNms(object):
__shared__ = ['num_classes']
def __init__(
self,
num_classes=81,
keep_top_k=100,
score_threshold=0.05,
nms_threshold=0.5, ):
super(DecodeClipNms, self).__init__()
self.num_classes = num_classes
self.keep_top_k = keep_top_k
self.score_threshold = score_threshold
self.nms_threshold = nms_threshold
def __call__(self, bboxes, bbox_prob, bbox_delta, im_info):
bboxes_np = (i.numpy() for i in bboxes)
# bbox, bbox_num
outs = bbox_post_process(bboxes_np,
bbox_prob.numpy(),
bbox_delta.numpy(),
im_info.numpy(), self.keep_top_k,
self.score_threshold, self.nms_threshold,
self.num_classes)
outs = [to_tensor(v) for v in outs]
for v in outs:
v.stop_gradient = True
return outs
return bboxes, scores
@register
......@@ -589,7 +346,6 @@ class MultiClassNMS(object):
nms_threshold=.5,
normalized=False,
nms_eta=1.0,
background_label=0,
return_rois_num=True):
super(MultiClassNMS, self).__init__()
self.score_threshold = score_threshold
......@@ -598,14 +354,28 @@ class MultiClassNMS(object):
self.nms_threshold = nms_threshold
self.normalized = normalized
self.nms_eta = nms_eta
self.background_label = background_label
self.return_rois_num = return_rois_num
def __call__(self, bboxes, score):
def __call__(self, bboxes, score, background_label=-1):
"""
bboxes (Tensor|List[Tensor]): 1. (Tensor) Predicted bboxes with shape
[N, M, 4], N is the batch size and M
is the number of bboxes
2. (List[Tensor]) bboxes and bbox_num,
bboxes have shape of [M, C, 4], C
is the class number and bbox_num means
the number of bboxes of each batch with
shape [N,]
score (Tensor): Predicted scores with shape [N, C, M] or [M, C]
background_label (int): Ignore the background label; For example, RCNN
is num_classes and YOLO is -1.
"""
kwargs = self.__dict__.copy()
if isinstance(bboxes, tuple):
bboxes, bbox_num = bboxes
kwargs.update({'rois_num': bbox_num})
if background_label > -1:
kwargs.update({'background_label': background_label})
return ops.multiclass_nms(bboxes, score, **kwargs)
......
import numpy as np
from ppdet.core.workspace import register
@register
class Mask(object):
__inject__ = ['mask_target_generator']
def __init__(self, mask_target_generator):
super(Mask, self).__init__()
self.mask_target_generator = mask_target_generator
def __call__(self, inputs, rois, targets):
mask_rois, rois_has_mask_int32 = self.generate_mask_target(inputs, rois,
targets)
return mask_rois, rois_has_mask_int32
def generate_mask_target(self, inputs, rois, targets):
labels_int32 = targets['labels_int32']
proposals, proposals_num = rois
mask_rois, mask_rois_num, self.rois_has_mask_int32, self.mask_int32 = self.mask_target_generator(
im_info=inputs['im_info'],
gt_classes=inputs['gt_class'],
is_crowd=inputs['is_crowd'],
gt_segms=inputs['gt_poly'],
rois=proposals,
rois_num=proposals_num,
labels_int32=labels_int32)
self.mask_rois = (mask_rois, mask_rois_num)
return self.mask_rois, self.rois_has_mask_int32
def get_targets(self):
return self.mask_int32
......@@ -21,6 +21,7 @@ from paddle.nn import Conv2D
from paddle.nn.initializer import XavierUniform
from paddle.regularizer import L2Decay
from ppdet.core.workspace import register, serializable
from ..shape_spec import ShapeSpec
@register
......@@ -29,18 +30,19 @@ class FPN(Layer):
def __init__(self,
in_channels,
out_channel,
min_level=0,
max_level=4,
spatial_scale=[0.25, 0.125, 0.0625, 0.03125],
spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
has_extra_convs=False,
extra_stage=1,
use_c5=True,
relu_before_extra_convs=True):
super(FPN, self).__init__()
self.min_level = min_level
self.max_level = max_level
self.spatial_scale = spatial_scale
self.out_channel = out_channel
for s in range(extra_stage):
spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
self.spatial_scales = spatial_scales
self.has_extra_convs = has_extra_convs
self.extra_stage = extra_stage
self.use_c5 = use_c5
self.relu_before_extra_convs = relu_before_extra_convs
......@@ -48,11 +50,7 @@ class FPN(Layer):
self.fpn_convs = []
fan = out_channel * 3 * 3
self.num_backbone_stages = len(spatial_scale)
self.num_outs = self.max_level - self.min_level + 1
self.highest_backbone_level = self.min_level + self.num_backbone_stages - 1
for i in range(self.min_level, self.highest_backbone_level + 1):
for i in range(len(in_channels)):
if i == 3:
lateral_name = 'fpn_inner_res5_sum'
else:
......@@ -65,9 +63,7 @@ class FPN(Layer):
out_channels=out_channel,
kernel_size=1,
weight_attr=ParamAttr(
initializer=XavierUniform(fan_out=in_c)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
initializer=XavierUniform(fan_out=in_c))))
self.lateral_convs.append(lateral)
fpn_name = 'fpn_res{}_sum'.format(i + 2)
......@@ -79,17 +75,14 @@ class FPN(Layer):
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
initializer=XavierUniform(fan_out=fan)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
initializer=XavierUniform(fan_out=fan))))
self.fpn_convs.append(fpn_conv)
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
if self.has_extra_convs and self.num_outs > self.num_backbone_stages:
for lvl in range(self.highest_backbone_level + 1,
self.max_level + 1): # P6 P7 ...
if lvl == self.highest_backbone_level + 1 and self.use_c5:
in_c = in_channels[self.highest_backbone_level]
if self.has_extra_convs:
for lvl in range(self.extra_stage): # P6 P7 ...
if lvl == 0 and self.use_c5:
in_c = in_channels[-1]
else:
in_c = out_channel
extra_fpn_name = 'fpn_{}'.format(lvl + 2)
......@@ -102,51 +95,60 @@ class FPN(Layer):
stride=2,
padding=1,
weight_attr=ParamAttr(
initializer=XavierUniform(fan_out=fan)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
initializer=XavierUniform(fan_out=fan))))
self.fpn_convs.append(extra_fpn_conv)
@classmethod
def from_config(cls, cfg, input_shape):
return {
'in_channels': [i.channels for i in input_shape],
'spatial_scales': [1.0 / i.stride for i in input_shape],
}
def forward(self, body_feats):
laterals = []
used_backbone_levels = len(self.spatial_scale)
for i in range(used_backbone_levels):
num_levels = len(body_feats)
for i in range(num_levels):
laterals.append(self.lateral_convs[i](body_feats[i]))
used_backbone_levels = len(self.spatial_scale)
for i in range(used_backbone_levels - 1):
idx = used_backbone_levels - 1 - i
for i in range(1, num_levels):
lvl = num_levels - i
upsample = F.interpolate(
laterals[idx],
laterals[lvl],
scale_factor=2.,
mode='nearest', )
laterals[idx - 1] += upsample
laterals[lvl - 1] += upsample
fpn_output = []
for lvl in range(self.min_level, self.highest_backbone_level + 1):
i = lvl - self.min_level
fpn_output.append(self.fpn_convs[i](laterals[i]))
for lvl in range(num_levels):
fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
spatial_scales = self.spatial_scale
if self.num_outs > len(fpn_output):
if self.extra_stage > 0:
# use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
if not self.has_extra_convs:
assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
spatial_scales = spatial_scales + [spatial_scales[-1] * 0.5]
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
else:
if self.use_c5:
extra_source = body_feats[-1]
else:
extra_source = fpn_output[-1]
fpn_output.append(self.fpn_convs[used_backbone_levels](
extra_source))
spatial_scales = spatial_scales + [spatial_scales[-1] * 0.5]
for i in range(used_backbone_levels + 1, self.num_outs):
fpn_output.append(self.fpn_convs[num_levels](extra_source))
for i in range(1, self.extra_stage):
if self.relu_before_extra_convs:
fpn_output.append(self.fpn_convs[i](F.relu(fpn_output[
-1])))
fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
fpn_output[-1])))
else:
fpn_output.append(self.fpn_convs[i](fpn_output[-1]))
spatial_scales = spatial_scales + [spatial_scales[-1] * 0.5]
return fpn_output, spatial_scales
fpn_output.append(self.fpn_convs[num_levels + i](
fpn_output[-1]))
return fpn_output
@property
def out_shape(self):
return [
ShapeSpec(
channels=self.out_channel, stride=1. / s)
for s in self.spatial_scales
]
......@@ -32,7 +32,6 @@ __all__ = [
'roi_pool',
'roi_align',
'prior_box',
'anchor_generator',
'generate_proposals',
'iou_similarity',
'box_coder',
......@@ -169,6 +168,7 @@ def roi_align(input,
spatial_scale=1.0,
sampling_ratio=-1,
rois_num=None,
aligned=True,
name=None):
"""
......@@ -239,7 +239,7 @@ def roi_align(input,
align_out = core.ops.roi_align(
input, rois, rois_num, "pooled_height", pooled_height,
"pooled_width", pooled_width, "spatial_scale", spatial_scale,
"sampling_ratio", sampling_ratio)
"sampling_ratio", sampling_ratio) #, "aligned", aligned)
return align_out
else:
......@@ -264,7 +264,8 @@ def roi_align(input,
"pooled_height": pooled_height,
"pooled_width": pooled_width,
"spatial_scale": spatial_scale,
"sampling_ratio": sampling_ratio
"sampling_ratio": sampling_ratio,
#"aligned": aligned,
})
return align_out
......@@ -846,117 +847,6 @@ def prior_box(input,
return box, var
@paddle.jit.not_to_static
def anchor_generator(input,
anchor_sizes=None,
aspect_ratios=None,
variance=[0.1, 0.1, 0.2, 0.2],
stride=None,
offset=0.5,
name=None):
"""
This op generate anchors for Faster RCNN algorithm.
Each position of the input produce N anchors, N =
size(anchor_sizes) * size(aspect_ratios). The order of generated anchors
is firstly aspect_ratios loop then anchor_sizes loop.
Args:
input(Tensor): 4-D Tensor with shape [N,C,H,W]. The input feature map.
anchor_sizes(float32|list|tuple, optional): The anchor sizes of generated
anchors, given in absolute pixels e.g. [64., 128., 256., 512.].
For instance, the anchor size of 64 means the area of this anchor
equals to 64**2. None by default.
aspect_ratios(float32|list|tuple, optional): The height / width ratios
of generated anchors, e.g. [0.5, 1.0, 2.0]. None by default.
variance(list|tuple, optional): The variances to be used in box
regression deltas. The data type is float32, [0.1, 0.1, 0.2, 0.2] by
default.
stride(list|tuple, optional): The anchors stride across width and height.
The data type is float32. e.g. [16.0, 16.0]. None by default.
offset(float32, optional): Prior boxes center offset. 0.5 by default.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and None
by default.
Returns:
Tuple:
Anchors(Tensor): The output anchors with a layout of [H, W, num_anchors, 4].
H is the height of input, W is the width of input,
num_anchors is the box count of each position.
Each anchor is in (xmin, ymin, xmax, ymax) format an unnormalized.
Variances(Tensor): The expanded variances of anchors
with a layout of [H, W, num_priors, 4].
H is the height of input, W is the width of input
num_anchors is the box count of each position.
Each variance is in (xcenter, ycenter, w, h) format.
Examples:
.. code-block:: python
import paddle
from ppdet.modeling import ops
paddle.enable_static()
conv1 = paddle.static.data(name='input', shape=[None, 48, 16, 16], dtype='float32')
anchor, var = ops.anchor_generator(
input=conv1,
anchor_sizes=[64, 128, 256, 512],
aspect_ratios=[0.5, 1.0, 2.0],
variance=[0.1, 0.1, 0.2, 0.2],
stride=[16.0, 16.0],
offset=0.5)
"""
helper = LayerHelper("anchor_generator", **locals())
dtype = helper.input_dtype()
def _is_list_or_tuple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
if not _is_list_or_tuple_(anchor_sizes):
anchor_sizes = [anchor_sizes]
if not _is_list_or_tuple_(aspect_ratios):
aspect_ratios = [aspect_ratios]
if not (_is_list_or_tuple_(stride) and len(stride) == 2):
raise ValueError('stride should be a list or tuple ',
'with length 2, (stride_width, stride_height).')
anchor_sizes = list(map(float, anchor_sizes))
aspect_ratios = list(map(float, aspect_ratios))
stride = list(map(float, stride))
if in_dygraph_mode():
attrs = ('anchor_sizes', anchor_sizes, 'aspect_ratios', aspect_ratios,
'variances', variance, 'stride', stride, 'offset', offset)
anchor, var = core.ops.anchor_generator(input, *attrs)
return anchor, var
else:
attrs = {
'anchor_sizes': anchor_sizes,
'aspect_ratios': aspect_ratios,
'variances': variance,
'stride': stride,
'offset': offset
}
anchor = helper.create_variable_for_type_inference(dtype)
var = helper.create_variable_for_type_inference(dtype)
helper.append_op(
type="anchor_generator",
inputs={"Input": input},
outputs={"Anchors": anchor,
"Variances": var},
attrs=attrs, )
anchor.stop_gradient = True
var.stop_gradient = True
return anchor, var
@paddle.jit.not_to_static
def multiclass_nms(bboxes,
scores,
......@@ -966,7 +856,7 @@ def multiclass_nms(bboxes,
nms_threshold=0.3,
normalized=True,
nms_eta=1.,
background_label=0,
background_label=-1,
return_index=False,
return_rois_num=True,
rois_num=None,
......
......@@ -3,47 +3,140 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ppdet.py_op.post_process import mask_post_process
from ppdet.modeling.bbox_utils import nonempty_bbox
from . import ops
@register
class BBoxPostProcess(object):
__shared__ = ['num_classes']
__inject__ = ['decode', 'nms']
def __init__(self, decode=None, nms=None):
def __init__(self, num_classes=80, decode=None, nms=None):
super(BBoxPostProcess, self).__init__()
self.num_classes = num_classes
self.decode = decode
self.nms = nms
def __call__(self, head_out, rois, im_shape, scale_factor=None):
def __call__(self, head_out, rois, im_shape, scale_factor):
"""
Decode the bbox and do NMS if needed.
Returns:
bbox_pred(Tensor): The output is the prediction with shape [N, 6]
including labels, scores and bboxes. The size of
bboxes are corresponding to the input image and
the bboxes may be used in other brunch.
bbox_num(Tensor): The number of prediction of each batch with shape
[N, 6].
"""
if self.nms is not None:
bboxes, score = self.decode(head_out, rois, im_shape, scale_factor)
bbox_pred, bbox_num, _ = self.nms(bboxes, score)
bbox_pred, bbox_num, _ = self.nms(bboxes, score, self.num_classes)
else:
bbox_pred, bbox_num = self.decode(head_out, rois, im_shape,
scale_factor)
return bbox_pred, bbox_num
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
"""
Rescale, clip and filter the bbox from the output of NMS to
get final prediction.
Args:
bboxes(Tensor): The output of __call__ with shape [N, 6]
Returns:
bbox_pred(Tensor): The output is the prediction with shape [N, 6]
including labels, scores and bboxes. The size of
bboxes are corresponding to the original image.
"""
assert bboxes.shape[0] > 0, 'There is no detection output'
origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
origin_shape_list = []
scale_factor_list = []
# scale_factor: scale_y, scale_x
for i in range(bbox_num.shape[0]):
expand_shape = paddle.expand(origin_shape[i:i + 1, :],
[bbox_num[i], 2])
scale_y, scale_x = scale_factor[i]
scale = paddle.concat([scale_x, scale_y, scale_x, scale_y])
expand_scale = paddle.expand(scale, [bbox_num[i], 4])
origin_shape_list.append(expand_shape)
scale_factor_list.append(expand_scale)
self.origin_shape_list = paddle.concat(origin_shape_list)
scale_factor_list = paddle.concat(scale_factor_list)
# bboxes: [N, 6], label, score, bbox
pred_label = bboxes[:, 0:1]
pred_score = bboxes[:, 1:2]
pred_bbox = bboxes[:, 2:]
# rescale bbox to original image
scaled_bbox = pred_bbox / scale_factor_list
origin_h = self.origin_shape_list[:, 0]
origin_w = self.origin_shape_list[:, 1]
zeros = paddle.zeros_like(origin_h)
# clip bbox to [0, original_size]
x1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 0], origin_w), zeros)
y1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 1], origin_h), zeros)
x2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 2], origin_w), zeros)
y2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 3], origin_h), zeros)
pred_bbox = paddle.stack([x1, y1, x2, y2], axis=-1)
# filter empty bbox
keep_mask = nonempty_bbox(pred_bbox, return_mask=True)
keep_mask = paddle.unsqueeze(keep_mask, [1])
pred_label = paddle.where(keep_mask, pred_label,
paddle.ones_like(pred_label) * -1)
pred_result = paddle.concat([pred_label, pred_score, pred_bbox], axis=1)
return pred_result
def get_origin_shape(self, ):
return self.origin_shape_list
@register
class MaskPostProcess(object):
__shared__ = ['mask_resolution']
def __init__(self, mask_resolution=28, binary_thresh=0.5):
def __init__(self, binary_thresh=0.5):
super(MaskPostProcess, self).__init__()
self.mask_resolution = mask_resolution
self.binary_thresh = binary_thresh
def __call__(self, bboxes, mask_head_out, im_shape, scale_factor=None):
# TODO: modify related ops for deploying
bboxes_np = (i.numpy() for i in bboxes)
mask = mask_post_process(bboxes_np,
mask_head_out.numpy(),
im_shape.numpy(), scale_factor[:, 0].numpy(),
self.mask_resolution, self.binary_thresh)
mask = {'mask': mask}
return mask
def paste_mask(self, masks, boxes, im_h, im_w):
# paste each mask on image
x0, y0, x1, y1 = paddle.split(boxes, 4, axis=1)
masks = paddle.unsqueeze(masks, [0, 1])
img_y = paddle.arange(0, im_h, dtype='float32') + 0.5
img_x = paddle.arange(0, im_w, dtype='float32') + 0.5
img_y = (img_y - y0) / (y1 - y0) * 2 - 1
img_x = (img_x - x0) / (x1 - x0) * 2 - 1
img_x = paddle.unsqueeze(img_x, [1])
img_y = paddle.unsqueeze(img_y, [2])
N = boxes.shape[0]
gx = paddle.expand(img_x, [N, img_y.shape[1], img_x.shape[2]])
gy = paddle.expand(img_y, [N, img_y.shape[1], img_x.shape[2]])
grid = paddle.stack([gx, gy], axis=3)
img_masks = F.grid_sample(masks, grid, align_corners=False)
return img_masks[:, 0]
def __call__(self, mask_out, bboxes, bbox_num, origin_shape):
"""
Paste the mask prediction to the original image.
"""
assert bboxes.shape[0] > 0, 'There is no detection output'
num_mask = mask_out.shape[0]
# TODO: support bs > 1
pred_result = paddle.zeros(
[num_mask, origin_shape[0][0], origin_shape[0][1]], dtype='bool')
# TODO: optimize chunk paste
for i in range(bboxes.shape[0]):
im_h, im_w = origin_shape[i]
pred_mask = self.paste_mask(mask_out[i], bboxes[i:i + 1, 2:], im_h,
im_w)
pred_mask = pred_mask >= self.binary_thresh
pred_result[i] = pred_mask
return pred_result
@register
......
from . import rpn_head
from .rpn_head import *
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from .. import ops
@register
class AnchorGenerator(object):
def __init__(self,
anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1.0, 2.0],
strides=[16.0],
variance=[1.0, 1.0, 1.0, 1.0],
offset=0.):
super(AnchorGenerator, self).__init__()
self.anchor_sizes = anchor_sizes
self.aspect_ratios = aspect_ratios
self.strides = strides
self.variance = variance
self.cell_anchors = self._calculate_anchors(len(strides))
self.offset = offset
def _broadcast_params(self, params, num_features):
if not isinstance(params[0], (list, tuple)): # list[float]
return [params] * num_features
if len(params) == 1:
return list(params) * num_features
return params
def generate_cell_anchors(self, sizes, aspect_ratios):
anchors = []
for size in sizes:
area = size**2.0
for aspect_ratio in aspect_ratios:
w = math.sqrt(area / aspect_ratio)
h = aspect_ratio * w
x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0
anchors.append([x0, y0, x1, y1])
return paddle.to_tensor(anchors, dtype='float32')
def _calculate_anchors(self, num_features):
sizes = self._broadcast_params(self.anchor_sizes, num_features)
aspect_ratios = self._broadcast_params(self.aspect_ratios, num_features)
cell_anchors = [
self.generate_cell_anchors(s, a)
for s, a in zip(sizes, aspect_ratios)
]
return cell_anchors
def _create_grid_offsets(self, size, stride, offset):
grid_height, grid_width = size
shifts_x = paddle.arange(
offset * stride, grid_width * stride, step=stride, dtype='float32')
shifts_y = paddle.arange(
offset * stride, grid_height * stride, step=stride, dtype='float32')
shift_y, shift_x = paddle.meshgrid(shifts_y, shifts_x)
shift_x = shift_x.reshape([-1])
shift_y = shift_y.reshape([-1])
return shift_x, shift_y
def _grid_anchors(self, grid_sizes):
anchors = []
for size, stride, base_anchors in zip(grid_sizes, self.strides,
self.cell_anchors):
shift_x, shift_y = self._create_grid_offsets(size, stride,
self.offset)
shifts = paddle.stack((shift_x, shift_y, shift_x, shift_y), axis=1)
anchors.append((shifts.reshape([-1, 1, 4]) + base_anchors.reshape(
[1, -1, 4])).reshape([-1, 4]))
return anchors
def __call__(self, input):
grid_sizes = [feature_map.shape[-2:] for feature_map in input]
anchors_over_all_feature_maps = self._grid_anchors(grid_sizes)
return anchors_over_all_feature_maps
@property
def num_anchors(self):
"""
Returns:
int: number of anchors at every pixel
location, on that feature map.
For example, if at every pixel we use anchors of 3 aspect
ratios and 5 sizes, the number of anchors is 15.
For FPN models, `num_anchors` on every feature map is the same.
"""
return self.cell_anchors[0].shape[0]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register, serializable
from .. import ops
@register
@serializable
class ProposalGenerator(object):
def __init__(self,
pre_nms_top_n=12000,
post_nms_top_n=2000,
nms_thresh=.5,
min_size=.1,
eta=1.,
topk_after_collect=False):
super(ProposalGenerator, self).__init__()
self.pre_nms_top_n = pre_nms_top_n
self.post_nms_top_n = post_nms_top_n
self.nms_thresh = nms_thresh
self.min_size = min_size
self.eta = eta
self.topk_after_collect = topk_after_collect
def __call__(self, scores, bbox_deltas, anchors, im_shape):
top_n = self.pre_nms_top_n if self.topk_after_collect else self.post_nms_top_n
variances = paddle.ones_like(anchors)
rpn_rois, rpn_rois_prob, rpn_rois_num = ops.generate_proposals(
scores,
bbox_deltas,
im_shape,
anchors,
variances,
pre_nms_top_n=self.pre_nms_top_n,
post_nms_top_n=top_n,
nms_thresh=self.nms_thresh,
min_size=self.min_size,
eta=self.eta,
return_rois_num=True)
return rpn_rois, rpn_rois_prob, rpn_rois_num, self.post_nms_top_n
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal
from paddle.regularizer import L2Decay
from ppdet.core.workspace import register
from ppdet.modeling import ops
from .anchor_generator import AnchorGenerator
from .target_layer import RPNTargetAssign
from .proposal_generator import ProposalGenerator
class RPNFeat(nn.Layer):
def __init__(self, feat_in=1024, feat_out=1024):
super(RPNFeat, self).__init__()
# rpn feat is shared with each level
self.rpn_conv = nn.Conv2D(
in_channels=feat_in,
out_channels=feat_out,
kernel_size=3,
padding=1,
weight_attr=paddle.ParamAttr(initializer=Normal(
mean=0., std=0.01)))
def forward(self, feats):
rpn_feats = []
for feat in feats:
rpn_feats.append(F.relu(self.rpn_conv(feat)))
return rpn_feats
@register
class RPNHead(nn.Layer):
def __init__(self,
anchor_generator=AnchorGenerator().__dict__,
rpn_target_assign=RPNTargetAssign().__dict__,
train_proposal=ProposalGenerator(12000, 2000).__dict__,
test_proposal=ProposalGenerator().__dict__,
in_channel=1024):
super(RPNHead, self).__init__()
self.anchor_generator = anchor_generator
self.rpn_target_assign = rpn_target_assign
self.train_proposal = train_proposal
self.test_proposal = test_proposal
if isinstance(anchor_generator, dict):
self.anchor_generator = AnchorGenerator(**anchor_generator)
if isinstance(rpn_target_assign, dict):
self.rpn_target_assign = RPNTargetAssign(**rpn_target_assign)
if isinstance(train_proposal, dict):
self.train_proposal = ProposalGenerator(**train_proposal)
if isinstance(test_proposal, dict):
self.test_proposal = ProposalGenerator(**test_proposal)
num_anchors = self.anchor_generator.num_anchors
self.rpn_feat = RPNFeat(in_channel, in_channel)
# rpn head is shared with each level
# rpn roi classification scores
self.rpn_rois_score = nn.Conv2D(
in_channels=in_channel,
out_channels=num_anchors,
kernel_size=1,
padding=0,
weight_attr=paddle.ParamAttr(initializer=Normal(
mean=0., std=0.01)))
# rpn roi bbox regression deltas
self.rpn_rois_delta = nn.Conv2D(
in_channels=in_channel,
out_channels=4 * num_anchors,
kernel_size=1,
padding=0,
weight_attr=paddle.ParamAttr(initializer=Normal(
mean=0., std=0.01)))
@classmethod
def from_config(cls, cfg, input_shape):
# FPN share same rpn head
if isinstance(input_shape, (list, tuple)):
input_shape = input_shape[0]
return {'in_channel': input_shape.channels}
def forward(self, feats, inputs):
rpn_feats = self.rpn_feat(feats)
scores = []
deltas = []
for rpn_feat in rpn_feats:
rrs = self.rpn_rois_score(rpn_feat)
rrd = self.rpn_rois_delta(rpn_feat)
scores.append(rrs)
deltas.append(rrd)
anchors = self.anchor_generator(rpn_feats)
rois, rois_num = self._gen_proposal(scores, deltas, anchors, inputs)
if self.training:
loss = self.get_loss(scores, deltas, anchors, inputs)
return rois, rois_num, loss
else:
return rois, rois_num, None
def _gen_proposal(self, scores, bbox_deltas, anchors, inputs):
"""
scores (list[Tensor]): Multi-level scores prediction
bbox_deltas (list[Tensor]): Multi-level deltas prediction
anchors (list[Tensor]): Multi-level anchors
inputs (dict): ground truth info
"""
prop_gen = self.train_proposal if self.training else self.test_proposal
im_shape = inputs['im_shape']
batch_size = im_shape.shape[0]
rpn_rois_list = [[] for i in range(batch_size)]
rpn_prob_list = [[] for i in range(batch_size)]
rpn_rois_num_list = [[] for i in range(batch_size)]
# Generate proposals for each level and each batch.
# Discard batch-computing to avoid sorting bbox cross different batches.
for rpn_score, rpn_delta, anchor in zip(scores, bbox_deltas, anchors):
for i in range(batch_size):
rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n = prop_gen(
scores=rpn_score[i:i + 1],
bbox_deltas=rpn_delta[i:i + 1],
anchors=anchor,
im_shape=im_shape[i:i + 1])
if rpn_rois.shape[0] > 0:
rpn_rois_list[i].append(rpn_rois)
rpn_prob_list[i].append(rpn_rois_prob)
rpn_rois_num_list[i].append(rpn_rois_num)
# Collect multi-level proposals for each batch
# Get 'topk' of them as final output
rois_collect = []
rois_num_collect = []
for i in range(batch_size):
if len(scores) > 1:
rpn_rois = paddle.concat(rpn_rois_list[i])
rpn_prob = paddle.concat(rpn_prob_list[i]).flatten()
if rpn_prob.shape[0] > post_nms_top_n:
topk_prob, topk_inds = paddle.topk(rpn_prob, post_nms_top_n)
topk_rois = paddle.gather(rpn_rois, topk_inds)
else:
topk_rois = rpn_rois
topk_prob = rpn_prob
else:
topk_rois = rpn_rois_list[i][0]
topk_prob = rpn_prob_list[i][0].flatten()
rois_collect.append(topk_rois)
rois_num_collect.append(paddle.shape(topk_rois)[0])
rois_num_collect = paddle.concat(rois_num_collect)
return rois_collect, rois_num_collect
def get_loss(self, pred_scores, pred_deltas, anchors, inputs):
"""
pred_scores (list[Tensor]): Multi-level scores prediction
pred_deltas (list[Tensor]): Multi-level deltas prediction
anchors (list[Tensor]): Multi-level anchors
inputs (dict): ground truth info, including im, gt_bbox, gt_score
"""
anchors = [paddle.reshape(a, shape=(-1, 4)) for a in anchors]
anchors = paddle.concat(anchors)
scores = [
paddle.reshape(
paddle.transpose(
v, perm=[0, 2, 3, 1]),
shape=(v.shape[0], -1, 1)) for v in pred_scores
]
scores = paddle.concat(scores, axis=1)
deltas = [
paddle.reshape(
paddle.transpose(
v, perm=[0, 2, 3, 1]),
shape=(v.shape[0], -1, 4)) for v in pred_deltas
]
deltas = paddle.concat(deltas, axis=1)
score_tgt, bbox_tgt, loc_tgt, norm = self.rpn_target_assign(inputs,
anchors)
scores = paddle.reshape(x=scores, shape=(-1, ))
deltas = paddle.reshape(x=deltas, shape=(-1, 4))
score_tgt = paddle.concat(score_tgt)
score_tgt.stop_gradient = True
pos_mask = score_tgt == 1
pos_ind = paddle.nonzero(pos_mask)
valid_mask = score_tgt >= 0
valid_ind = paddle.nonzero(valid_mask)
# cls loss
score_pred = paddle.gather(scores, valid_ind)
score_label = paddle.gather(score_tgt, valid_ind).cast('float32')
score_label.stop_gradient = True
loss_rpn_cls = F.binary_cross_entropy_with_logits(
logit=score_pred, label=score_label, reduction="sum")
# reg loss
loc_pred = paddle.gather(deltas, pos_ind)
loc_tgt = paddle.concat(loc_tgt)
loc_tgt = paddle.gather(loc_tgt, pos_ind)
loc_tgt.stop_gradient = True
loss_rpn_reg = paddle.abs(loc_pred - loc_tgt).sum()
return {
'loss_rpn_cls': loss_rpn_cls / norm,
'loss_rpn_reg': loss_rpn_reg / norm
}
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import six
import math
import numpy as np
import paddle
from ..bbox_utils import bbox2delta, bbox_overlaps
import copy
def rpn_anchor_target(anchors,
gt_boxes,
rpn_batch_size_per_im,
rpn_positive_overlap,
rpn_negative_overlap,
rpn_fg_fraction,
use_random=True,
batch_size=1,
weights=[1., 1., 1., 1.]):
tgt_labels = []
tgt_bboxes = []
tgt_deltas = []
for i in range(batch_size):
gt_bbox = gt_boxes[i]
# Step1: match anchor and gt_bbox
matches, match_labels, matched_vals = label_box(
anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True)
# Step2: sample anchor
fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im,
rpn_fg_fraction, 0, use_random)
# Fill with the ignore label (-1), then set positive and negative labels
labels = paddle.full(match_labels.shape, -1, dtype='int32')
labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds))
labels = paddle.scatter(labels, bg_inds, paddle.zeros_like(bg_inds))
# Step3: make output
matched_gt_boxes = paddle.gather(gt_bbox, matches)
tgt_delta = bbox2delta(anchors, matched_gt_boxes, weights)
labels.stop_gradient = True
matched_gt_boxes.stop_gradient = True
tgt_delta.stop_gradient = True
tgt_labels.append(labels)
tgt_bboxes.append(matched_gt_boxes)
tgt_deltas.append(tgt_delta)
return tgt_labels, tgt_bboxes, tgt_deltas
def label_box(anchors, gt_boxes, positive_overlap, negative_overlap,
allow_low_quality):
iou = bbox_overlaps(gt_boxes, anchors)
if iou.numel() == 0:
default_matches = paddle.full((iou.shape[1], ), 0, dtype='int64')
default_match_labels = paddle.full((iou.shape[1], ), -1, dtype='int32')
return default_matches, default_match_labels
matched_vals, matches = paddle.topk(iou, k=1, axis=0)
match_labels = paddle.full(matches.shape, -1, dtype='int32')
match_labels = paddle.where(matched_vals < negative_overlap,
paddle.zeros_like(match_labels), match_labels)
match_labels = paddle.where(matched_vals >= positive_overlap,
paddle.ones_like(match_labels), match_labels)
if allow_low_quality:
highest_quality_foreach_gt = iou.max(axis=1, keepdim=True)
pred_inds_with_highest_quality = (
iou == highest_quality_foreach_gt).cast('int32').sum(0,
keepdim=True)
match_labels = paddle.where(pred_inds_with_highest_quality > 0,
paddle.ones_like(match_labels),
match_labels)
matches = matches.flatten()
match_labels = match_labels.flatten()
matched_vals = matched_vals.flatten()
return matches, match_labels, matched_vals
def subsample_labels(labels,
num_samples,
fg_fraction,
bg_label=0,
use_random=True):
positive = paddle.nonzero(
paddle.logical_and(labels != -1, labels != bg_label))
negative = paddle.nonzero(labels == bg_label)
positive = positive.cast('int32').flatten()
negative = negative.cast('int32').flatten()
fg_num = int(num_samples * fg_fraction)
fg_num = min(positive.numel(), fg_num)
bg_num = num_samples - fg_num
bg_num = min(negative.numel(), bg_num)
# randomly select positive and negative examples
fg_perm = paddle.randperm(positive.numel(), dtype='int32')
fg_perm = paddle.slice(fg_perm, axes=[0], starts=[0], ends=[fg_num])
bg_perm = paddle.randperm(negative.numel(), dtype='int32')
bg_perm = paddle.slice(bg_perm, axes=[0], starts=[0], ends=[bg_num])
if use_random:
fg_inds = paddle.gather(positive, fg_perm)
bg_inds = paddle.gather(negative, bg_perm)
else:
fg_inds = paddle.slice(positive, axes=[0], starts=[0], ends=[fg_num])
bg_inds = paddle.slice(negative, axes=[0], starts=[0], ends=[bg_num])
return fg_inds, bg_inds
def filter_roi(rois, max_overlap):
ws = rois[:, 2] - rois[:, 0]
hs = rois[:, 3] - rois[:, 1]
valid_mask = paddle.logical_and(ws > 0, hs > 0, max_overlap < 1)
keep = paddle.nonzero(valid_mask)
if keep.numel() > 0:
return rois[keep[:, 1]]
return paddle.zeros((1, 4), dtype='float32')
def generate_proposal_target(rpn_rois,
gt_classes,
gt_boxes,
batch_size_per_im,
fg_fraction,
fg_thresh,
bg_thresh,
num_classes,
use_random=True,
is_cascade_rcnn=False,
max_overlaps=None):
rois_with_gt = []
tgt_labels = []
tgt_bboxes = []
sampled_max_overlaps = []
tgt_gt_inds = []
new_rois_num = []
for i, rpn_roi in enumerate(rpn_rois):
max_overlap = max_overlaps[i] if is_cascade_rcnn else None
gt_bbox = gt_boxes[i]
gt_classes = gt_classes[i]
if is_cascade_rcnn:
rpn_roi = filter_roi(rpn_roi, max_overlap)
bbox = paddle.concat([rpn_roi, gt_bbox])
# Step1: label bbox
matches, match_labels, matched_vals = label_box(
bbox, gt_bbox, fg_thresh, bg_thresh, False)
# Step2: sample bbox
sampled_inds, sampled_gt_classes = sample_bbox(
matches, match_labels, gt_classes, batch_size_per_im, fg_fraction,
num_classes, use_random)
# Step3: make output
rois_per_image = paddle.gather(bbox, sampled_inds)
sampled_gt_ind = paddle.gather(matches, sampled_inds)
sampled_bbox = paddle.gather(gt_bbox, sampled_gt_ind)
sampled_overlap = paddle.gather(matched_vals, sampled_inds)
rois_per_image.stop_gradient = True
sampled_gt_ind.stop_gradient = True
sampled_bbox.stop_gradient = True
sampled_overlap.stop_gradient = True
tgt_labels.append(sampled_gt_classes)
tgt_bboxes.append(sampled_bbox)
rois_with_gt.append(rois_per_image)
sampled_max_overlaps.append(sampled_overlap)
tgt_gt_inds.append(sampled_gt_ind)
new_rois_num.append(paddle.shape(sampled_inds)[0])
new_rois_num = paddle.concat(new_rois_num)
return rois_with_gt, tgt_labels, tgt_bboxes, tgt_gt_inds, new_rois_num, sampled_max_overlaps
def sample_bbox(
matches,
match_labels,
gt_classes,
batch_size_per_im,
fg_fraction,
num_classes,
use_random=True, ):
gt_classes = paddle.gather(gt_classes, matches)
gt_classes = paddle.where(match_labels == 0,
paddle.ones_like(gt_classes) * num_classes,
gt_classes)
gt_classes = paddle.where(match_labels == -1,
paddle.ones_like(gt_classes) * -1, gt_classes)
rois_per_image = int(batch_size_per_im)
fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image, fg_fraction,
num_classes, use_random)
sampled_inds = paddle.concat([fg_inds, bg_inds])
sampled_gt_classes = paddle.gather(gt_classes, sampled_inds)
return sampled_inds, sampled_gt_classes
def _strip_pad(gt_polys):
new_gt_polys = []
for i in range(gt_polys.shape[0]):
gt_segs = []
for j in range(gt_polys[i].shape[0]):
new_poly = []
polys = gt_polys[i][j]
for ii in range(polys.shape[0]):
x, y = polys[ii]
if (x == -1 and y == -1):
continue
elif (x >= 0 or y >= 0):
new_poly.extend([x, y]) # array, one poly
if len(new_poly) > 6:
gt_segs.append(np.array(new_poly).astype('float64'))
new_gt_polys.append(gt_segs)
return new_gt_polys
def polygons_to_mask(polygons, height, width):
"""
Args:
polygons (list[ndarray]): each array has shape (Nx2,)
height, width (int)
Returns:
ndarray: a bool mask of shape (height, width)
"""
import pycocotools.mask as mask_util
assert len(polygons) > 0, "COCOAPI does not support empty polygons"
rles = mask_util.frPyObjects(polygons, height, width)
rle = mask_util.merge(rles)
return mask_util.decode(rle).astype(np.bool)
def rasterize_polygons_within_box(poly, box, resolution):
w, h = box[2] - box[0], box[3] - box[1]
polygons = copy.deepcopy(poly)
for p in polygons:
p[0::2] = p[0::2] - box[0]
p[1::2] = p[1::2] - box[1]
ratio_h = resolution / max(h, 0.1)
ratio_w = resolution / max(w, 0.1)
if ratio_h == ratio_w:
for p in polygons:
p *= ratio_h
else:
for p in polygons:
p[0::2] *= ratio_w
p[1::2] *= ratio_h
# 3. Rasterize the polygons with coco api
mask = polygons_to_mask(polygons, resolution, resolution)
mask = paddle.to_tensor(mask, dtype='int32')
return mask
def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds,
num_classes, resolution):
mask_rois = []
mask_rois_num = []
tgt_masks = []
tgt_classes = []
mask_index = []
tgt_weights = []
for k in range(len(rois)):
has_fg = True
rois_per_im = rois[k]
gt_segms_per_im = gt_segms[k]
labels_per_im = labels_int32[k]
fg_inds = paddle.nonzero(
paddle.logical_and(labels_per_im != -1, labels_per_im !=
num_classes))
if fg_inds.numel() == 0:
has_fg = False
fg_inds = paddle.ones([1], dtype='int32')
inds_per_im = sampled_gt_inds[k]
inds_per_im = paddle.gather(inds_per_im, fg_inds)
gt_segms_per_im = paddle.gather(gt_segms_per_im, inds_per_im)
fg_rois = paddle.gather(rois_per_im, fg_inds)
fg_classes = paddle.gather(labels_per_im, fg_inds)
fg_segms = paddle.gather(gt_segms_per_im, fg_inds)
weight = paddle.ones([fg_rois.shape[0]], dtype='float32')
if not has_fg:
weight = weight - 1
# remove padding
gt_polys = fg_segms.numpy()
boxes = fg_rois.numpy()
new_gt_polys = _strip_pad(gt_polys)
results = [
rasterize_polygons_within_box(poly, box, resolution)
for poly, box in zip(new_gt_polys, boxes)
]
tgt_mask = paddle.stack(results)
tgt_mask.stop_gradient = True
fg_rois.stop_gradient = True
mask_index.append(fg_inds)
mask_rois.append(fg_rois)
mask_rois_num.append(paddle.shape(fg_rois)[0])
tgt_classes.append(fg_classes)
tgt_masks.append(tgt_mask)
tgt_weights.append(weight)
mask_index = paddle.concat(mask_index)
mask_rois_num = paddle.concat(mask_rois_num)
tgt_classes = paddle.concat(tgt_classes, axis=0)
tgt_masks = paddle.concat(tgt_masks, axis=0)
tgt_weights = paddle.concat(tgt_weights, axis=0)
return mask_rois, mask_rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from ppdet.core.workspace import register, serializable
from .target import rpn_anchor_target, generate_proposal_target, generate_mask_target
@register
@serializable
class RPNTargetAssign(object):
def __init__(self,
batch_size_per_im=256,
fg_fraction=0.5,
positive_overlap=0.7,
negative_overlap=0.3,
use_random=True):
super(RPNTargetAssign, self).__init__()
self.batch_size_per_im = batch_size_per_im
self.fg_fraction = fg_fraction
self.positive_overlap = positive_overlap
self.negative_overlap = negative_overlap
self.use_random = use_random
def __call__(self, inputs, anchors):
"""
inputs: ground-truth instances.
anchor_box (Tensor): [num_anchors, 4], num_anchors are all anchors in all feature maps.
"""
gt_boxes = inputs['gt_bbox']
batch_size = gt_boxes.shape[0]
tgt_labels, tgt_bboxes, tgt_deltas = rpn_anchor_target(
anchors, gt_boxes, self.batch_size_per_im, self.positive_overlap,
self.negative_overlap, self.fg_fraction, self.use_random,
batch_size)
norm = self.batch_size_per_im * batch_size
return tgt_labels, tgt_bboxes, tgt_deltas, norm
@register
class BBoxAssigner(object):
__shared__ = ['num_classes']
def __init__(self,
batch_size_per_im=512,
fg_fraction=.25,
fg_thresh=[.5, ],
bg_thresh=[.5, ],
use_random=True,
is_cls_agnostic=False,
num_classes=80):
super(BBoxAssigner, self).__init__()
self.batch_size_per_im = batch_size_per_im
self.fg_fraction = fg_fraction
self.fg_thresh = fg_thresh
self.bg_thresh = bg_thresh
self.use_random = use_random
self.is_cls_agnostic = is_cls_agnostic
self.num_classes = num_classes
def __call__(self,
rpn_rois,
rpn_rois_num,
inputs,
stage=0,
max_overlap=None):
is_cascade = True if stage > 0 else False
gt_classes = inputs['gt_class']
gt_boxes = inputs['gt_bbox']
# rois, tgt_labels, tgt_bboxes, tgt_gt_inds
# new_rois_num, sampled_max_overlaps
outs = generate_proposal_target(
rpn_rois, gt_classes, gt_boxes, self.batch_size_per_im,
self.fg_fraction, self.fg_thresh[stage], self.bg_thresh[stage],
self.num_classes, self.use_random, is_cascade, max_overlap)
rois = outs[0]
rois_num = outs[-2]
max_overlaps = outs[-1]
# tgt_labels, tgt_bboxes, tgt_gt_inds
targets = outs[1:4]
return rois, rois_num, max_overlaps, targets
@register
@serializable
class MaskAssigner(object):
__shared__ = ['num_classes', 'mask_resolution']
def __init__(self, num_classes=80, mask_resolution=14):
super(MaskAssigner, self).__init__()
self.num_classes = num_classes
self.mask_resolution = mask_resolution
def __call__(self, rois, tgt_labels, tgt_gt_inds, inputs):
gt_segms = inputs['gt_poly']
outs = generate_mask_target(gt_segms, rois, tgt_labels, tgt_gt_inds,
self.num_classes, self.mask_resolution)
# mask_rois, mask_rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights
return outs
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import namedtuple
class ShapeSpec(
namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
"""
A simple structure that contains basic shape specification about a tensor.
It is often used as the auxiliary inputs/outputs of models,
to complement the lack of shape inference ability among paddle modules.
Attributes:
channels:
height:
width:
stride:
"""
def __new__(cls, *, channels=None, height=None, width=None, stride=None):
return super().__new__(cls, channels, height, width, stride)
from .bbox import *
from .mask import *
from .target import *
from .post_process import *
import numpy as np
from numba import jit
@jit
def bbox2delta(bboxes1, bboxes2, weights):
ex_w = bboxes1[:, 2] - bboxes1[:, 0] + 1
ex_h = bboxes1[:, 3] - bboxes1[:, 1] + 1
ex_ctr_x = bboxes1[:, 0] + 0.5 * ex_w
ex_ctr_y = bboxes1[:, 1] + 0.5 * ex_h
gt_w = bboxes2[:, 2] - bboxes2[:, 0] + 1
gt_h = bboxes2[:, 3] - bboxes2[:, 1] + 1
gt_ctr_x = bboxes2[:, 0] + 0.5 * gt_w
gt_ctr_y = bboxes2[:, 1] + 0.5 * gt_h
dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0]
dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1]
dw = (np.log(gt_w / ex_w)) / weights[2]
dh = (np.log(gt_h / ex_h)) / weights[3]
deltas = np.vstack([dx, dy, dw, dh]).transpose()
return deltas
@jit
def delta2bbox(deltas, boxes, weights, bbox_clip=4.13):
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] * wx
dy = deltas[:, 1::4] * wy
dw = deltas[:, 2::4] * ww
dh = deltas[:, 3::4] * wh
# Prevent sending too large values into np.exp()
dw = np.minimum(dw, bbox_clip)
dh = np.minimum(dh, bbox_clip)
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
# x1
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
# y1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
# x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
# y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
return pred_boxes
@jit
def expand_bbox(bboxes, scale):
w_half = (bboxes[:, 2] - bboxes[:, 0]) * .5
h_half = (bboxes[:, 3] - bboxes[:, 1]) * .5
x_c = (bboxes[:, 2] + bboxes[:, 0]) * .5
y_c = (bboxes[:, 3] + bboxes[:, 1]) * .5
w_half *= scale
h_half *= scale
bboxes_exp = np.zeros(bboxes.shape, dtype=np.float32)
bboxes_exp[:, 0] = x_c - w_half
bboxes_exp[:, 2] = x_c + w_half
bboxes_exp[:, 1] = y_c - h_half
bboxes_exp[:, 3] = y_c + h_half
return bboxes_exp
@jit
def clip_bbox(boxes, im_shape):
assert boxes.shape[1] % 4 == 0, \
'boxes.shape[1] is {:d}, but must be divisible by 4.'.format(
boxes.shape[1]
)
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
@jit
def bbox_overlaps(bboxes1, bboxes2):
w1 = np.maximum(bboxes1[:, 2] - bboxes1[:, 0] + 1, 0)
h1 = np.maximum(bboxes1[:, 3] - bboxes1[:, 1] + 1, 0)
w2 = np.maximum(bboxes2[:, 2] - bboxes2[:, 0] + 1, 0)
h2 = np.maximum(bboxes2[:, 3] - bboxes2[:, 1] + 1, 0)
area1 = w1 * h1
area2 = w2 * h2
boxes1_x1, boxes1_y1, boxes1_x2, boxes1_y2 = np.split(bboxes1, 4, axis=1)
boxes2_x1, boxes2_y1, boxes2_x2, boxes2_y2 = np.split(bboxes2, 4, axis=1)
all_pairs_min_ymax = np.minimum(boxes1_y2, np.transpose(boxes2_y2))
all_pairs_max_ymin = np.maximum(boxes1_y1, np.transpose(boxes2_y1))
inter_h = np.maximum(all_pairs_min_ymax - all_pairs_max_ymin + 1, 0.)
all_pairs_min_xmax = np.minimum(boxes1_x2, np.transpose(boxes2_x2))
all_pairs_max_xmin = np.maximum(boxes1_x1, np.transpose(boxes2_x1))
inter_w = np.maximum(all_pairs_min_xmax - all_pairs_max_xmin + 1, 0.)
inter_area = inter_w * inter_h
union_area = np.expand_dims(area1, 1) + np.expand_dims(area2, 0)
overlaps = inter_area / (union_area - inter_area)
return overlaps
@jit
def nms(dets, thresh):
if dets.shape[0] == 0:
return []
scores = dets[:, 0]
x1 = dets[:, 1]
y1 = dets[:, 2]
x2 = dets[:, 3]
y2 = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
ndets = dets.shape[0]
suppressed = np.zeros((ndets), dtype=np.int)
for _i in range(ndets):
i = order[_i]
if suppressed[i] == 1:
continue
ix1 = x1[i]
iy1 = y1[i]
ix2 = x2[i]
iy2 = y2[i]
iarea = areas[i]
for _j in range(_i + 1, ndets):
j = order[_j]
if suppressed[j] == 1:
continue
xx1 = max(ix1, x1[j])
yy1 = max(iy1, y1[j])
xx2 = min(ix2, x2[j])
yy2 = min(iy2, y2[j])
w = max(0.0, xx2 - xx1 + 1)
h = max(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (iarea + areas[j] - inter)
if ovr >= thresh:
suppressed[j] = 1
return np.where(suppressed == 0)[0]
def nms_with_decode(bboxes,
bbox_probs,
bbox_deltas,
im_info,
keep_top_k=100,
score_thresh=0.05,
nms_thresh=0.5,
class_nums=81,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2]):
bboxes_num = [0, bboxes.shape[0]]
bboxes_v = np.array(bboxes)
bbox_probs_v = np.array(bbox_probs)
bbox_deltas_v = np.array(bbox_deltas)
variance_v = np.array(bbox_reg_weights)
im_results = [[] for _ in range(len(bboxes_num) - 1)]
new_bboxes_num = [0]
for i in range(len(bboxes_num) - 1):
start = bboxes_num[i]
end = bboxes_num[i + 1]
if start == end:
continue
bbox_deltas_n = bbox_deltas_v[start:end, :] # box delta
rois_n = bboxes_v[start:end, :] # box
rois_n = rois_n / im_info[i][2] # scale
rois_n = delta2bbox(bbox_deltas_n, rois_n, variance_v)
rois_n = clip_bbox(rois_n, np.round(im_info[i][:2] / im_info[i][2]))
cls_boxes = [[] for _ in range(class_nums)]
scores_n = bbox_probs_v[start:end, :]
for j in range(1, class_nums):
inds = np.where(scores_n[:, j] > score_thresh)[0]
scores_j = scores_n[inds, j]
rois_j = rois_n[inds, j * 4:(j + 1) * 4]
dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
np.float32, copy=False)
keep = nms(dets_j, nms_thresh)
nms_dets = dets_j[keep, :]
#add labels
label = np.array([j for _ in range(len(keep))])
nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype(
np.float32, copy=False)
cls_boxes[j] = nms_dets
# Limit to max_per_image detections **over all classes**
image_scores = np.hstack(
[cls_boxes[j][:, 1] for j in range(1, class_nums)])
if len(image_scores) > keep_top_k:
image_thresh = np.sort(image_scores)[-keep_top_k]
for j in range(1, class_nums):
keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0]
cls_boxes[j] = cls_boxes[j][keep, :]
im_results_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)])
im_results[i] = im_results_n
new_bboxes_num.append(len(im_results_n) + new_bboxes_num[-1])
labels = im_results_n[:, 0]
scores = im_results_n[:, 1]
boxes = im_results_n[:, 2:]
im_results = np.vstack([im_results[k] for k in range(len(bboxes_num) - 1)])
new_bboxes_num = np.array(new_bboxes_num)
return new_bboxes_num, im_results
@jit
def compute_bbox_targets(bboxes1, bboxes2, labels, bbox_reg_weights):
assert bboxes1.shape[0] == bboxes2.shape[0]
assert bboxes1.shape[1] == 4
assert bboxes2.shape[1] == 4
targets = np.zeros(bboxes1.shape)
bbox_reg_weights = np.asarray(bbox_reg_weights)
targets = bbox2delta(
bboxes1=bboxes1, bboxes2=bboxes2, weights=bbox_reg_weights)
return np.hstack([labels[:, np.newaxis], targets]).astype(
np.float32, copy=False)
#@jit
def expand_bbox_targets(bbox_targets_input,
class_nums=81,
is_cls_agnostic=False):
class_labels = bbox_targets_input[:, 0]
fg_inds = np.where(class_labels > 0)[0]
if is_cls_agnostic:
class_nums = 2
bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums))
bbox_inside_weights = np.zeros(bbox_targets.shape)
for ind in fg_inds:
class_label = int(class_labels[ind]) if not is_cls_agnostic else 1
start_ind = class_label * 4
end_ind = class_label * 4 + 4
bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind, 1:]
bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
import six
import math
import numpy as np
from numba import jit
@jit
def decode(cnts, m):
v = 0
mask = []
for j in range(m):
for k in range(cnts[j]):
mask.append(v)
v = 1 - v
return mask
#@jit
def poly2mask(xy, k, h, w):
scale = 5.
x = [int(scale * p + 0.5) for p in xy[::2]]
x = x + [x[0]]
y = [int(scale * p + 0.5) for p in xy[1::2]]
y = y + [y[0]]
m = sum([
int(max(abs(x[j] - x[j + 1]), abs(y[j] - y[j + 1]))) + int(1)
for j in range(k)
])
u, v = [], []
for j in range(k):
xs = x[j]
xe = x[j + 1]
ys = y[j]
ye = y[j + 1]
dx = abs(xe - xs)
dy = abs(ys - ye)
flip = (dx >= dy and xs > xe) or (dx < dy and ys > ye)
if flip:
xs, xe = xe, xs
ys, ye = ye, ys
if dx >= dy:
if (dx == 0):
assert ye - ys == 0
s = 0 if dx == 0 else float(ye - ys) / dx
else:
if (dy == 0):
assert xe - xs == 0
s = 0 if dy == 0 else float(xe - xs) / dy
if dx >= dy:
ts = [dx - d if flip else d for d in range(dx + 1)]
u.extend([xs + t for t in ts])
v.extend([int(ys + s * t + .5) for t in ts])
else:
ts = [dy - d if flip else d for d in range(dy + 1)]
v.extend([t + ys for t in ts])
u.extend([int(xs + s * t + .5) for t in ts])
k = len(u)
x = np.zeros((k), np.int)
y = np.zeros((k), np.int)
m = 0
for j in six.moves.xrange(1, k):
if u[j] != u[j - 1]:
xd = float(u[j] if (u[j] < u[j - 1]) else (u[j] - 1))
xd = (xd + .5) / scale - .5
if (math.floor(xd) != xd or xd < 0 or xd > (w - 1)):
continue
yd = float(v[j] if v[j] < v[j - 1] else v[j - 1])
yd = (yd + .5) / scale - .5
yd = math.ceil(0 if yd < 0 else (h if yd > h else yd))
x[m] = int(xd)
y[m] = int(yd)
m += 1
k = m
a = [int(x[i] * h + y[i]) for i in range(k)]
a.append(h * w)
a.sort()
b = [0] + a[:len(a) - 1]
a = [c - d for (c, d) in zip(a, b)]
k += 1
b = [0 for i in range(k)]
b[0] = a[0]
m, j = 1, 1
while (j < k):
if a[j] > 0:
b[m] = a[j]
m += 1
j += 1
else:
j += 1
if (j < k):
b[m - 1] += a[j]
j += 1
mask = decode(b, m)
mask = np.array(mask, dtype=np.int).reshape((w, h))
mask = mask.transpose((1, 0))
return mask
def polys_to_boxes(polys):
"""Convert a list of polygons into an array of tight bounding boxes."""
boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
for j in range(len(polys)):
x_min, y_min = 10000000, 10000000
x_max, y_max = 0, 0
for i in range(len(polys[j])):
poly = polys[j][i]
x0 = min(min(p[::2]) for p in poly)
x_min = min(x0, x_min)
y0 = min(min(p[1::2]) for p in poly)
y_min = min(y0, y_min)
x1 = max(max(p[::2]) for p in poly)
x_max = max(x_max, x1)
y1 = max(max(p[1::2]) for p in poly)
y_max = max(y1, y_max)
boxes_from_polys[j, :] = [x_min, y_min, x_max, y_max]
return boxes_from_polys
@jit
def bbox_overlaps_mask(boxes, query_boxes):
N = boxes.shape[0]
K = query_boxes.shape[0]
overlaps = np.zeros((N, K), dtype=boxes.dtype)
for k in range(K):
box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) *\
(query_boxes[k, 3] - query_boxes[k, 1] + 1)
for n in range(N):
iw = min(boxes[n, 2], query_boxes[k, 2]) -\
max(boxes[n, 0], query_boxes[k, 0]) + 1
if iw > 0:
ih = min(boxes[n, 3], query_boxes[k, 3]) -\
max(boxes[n, 1], query_boxes[k, 1]) + 1
if ih > 0:
ua = float(
(boxes[n, 2] - boxes[n, 0] + 1) *\
(boxes[n, 3] - boxes[n, 1] + 1) +\
box_area - iw * ih)
overlaps[n, k] = iw * ih / ua
return overlaps
@jit
def polys_to_mask_wrt_box(polygons, box, M):
"""Convert from the COCO polygon segmentation format to a binary mask
encoded as a 2D array of data type numpy.float32. The polygon segmentation
is understood to be enclosed in the given box and rasterized to an M x M
mask. The resulting mask is therefore of shape (M, M).
"""
w = box[2] - box[0]
h = box[3] - box[1]
w = np.maximum(w, 1)
h = np.maximum(h, 1)
polygons_norm = []
i = 0
for poly in polygons:
p = np.array(poly, dtype=np.float32)
p = p.reshape(-1)
p[0::2] = (p[0::2] - box[0]) * M / w
p[1::2] = (p[1::2] - box[1]) * M / h
polygons_norm.append(p)
mask = []
for polygons in polygons_norm:
assert polygons.shape[0] % 2 == 0, polygons.shape
k = polygons.shape[0] // 2
one_msk = poly2mask(polygons, k, M, M)
mask.append(one_msk)
mask = np.array(mask)
# Flatten in case polygons was a list
mask = np.sum(mask, axis=0)
mask = np.array(mask > 0, dtype=np.float32)
return mask
#@jit
def expand_mask_targets(masks, mask_class_labels, resolution, num_classes):
"""Expand masks from shape (#masks, resolution ** 2)
to (#masks, #classes * resolution ** 2) to encode class
specific mask targets.
"""
assert masks.shape[0] == mask_class_labels.shape[0]
# Target values of -1 are "don't care" / ignore labels
mask_targets = -np.ones(
(masks.shape[0], num_classes * resolution**2), dtype=np.int32)
for i in range(masks.shape[0]):
cls = int(mask_class_labels[i])
start = resolution**2 * cls
end = start + resolution**2
# Ignore background instance
# (only happens when there is no fg samples in an image)
if cls > 0:
mask_targets[i, start:end] = masks[i, :]
return mask_targets
import six
import os
import numpy as np
from numba import jit
from .bbox import delta2bbox, clip_bbox, expand_bbox, nms
import pycocotools.mask as mask_util
import cv2
def bbox_post_process(bboxes,
bbox_prob,
bbox_deltas,
im_shape,
scale_factor,
keep_top_k=100,
score_thresh=0.05,
nms_thresh=0.5,
class_nums=81,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
with_background=True):
bbox, bbox_num = bboxes
new_bbox = [[] for _ in range(len(bbox_num))]
new_bbox_num = []
st_num = 0
end_num = 0
for i in range(len(bbox_num)):
box_num = bbox_num[i]
end_num += box_num
boxes = bbox[st_num:end_num, :] # bbox
boxes = boxes / scale_factor[i] # scale
bbox_delta = bbox_deltas[st_num:end_num, :, :] # bbox delta
bbox_delta = np.reshape(bbox_delta, (box_num, -1))
# step1: decode
boxes = delta2bbox(bbox_delta, boxes, bbox_reg_weights)
# step2: clip
boxes = clip_bbox(boxes, im_shape[i][:2] / scale_factor[i])
# step3: nms
cls_boxes = [[] for _ in range(class_nums)]
scores_n = bbox_prob[st_num:end_num, :]
for j in range(with_background, class_nums):
inds = np.where(scores_n[:, j] > score_thresh)[0]
scores_j = scores_n[inds, j]
rois_j = boxes[inds, j * 4:(j + 1) * 4]
dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
np.float32, copy=False)
keep = nms(dets_j, nms_thresh)
nms_dets = dets_j[keep, :]
#add labels
label = np.array([j for _ in range(len(keep))])
nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype(
np.float32, copy=False)
cls_boxes[j] = nms_dets
st_num += box_num
# Limit to max_per_image detections **over all classes**
image_scores = np.hstack(
[cls_boxes[j][:, 1] for j in range(with_background, class_nums)])
if len(image_scores) > keep_top_k:
image_thresh = np.sort(image_scores)[-keep_top_k]
for j in range(with_background, class_nums):
keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0]
cls_boxes[j] = cls_boxes[j][keep, :]
new_bbox_n = np.vstack(
[cls_boxes[j] for j in range(with_background, class_nums)])
new_bbox[i] = new_bbox_n
new_bbox_num.append(len(new_bbox_n))
new_bbox = np.vstack([new_bbox[k] for k in range(len(bbox_num))])
new_bbox_num = np.array(new_bbox_num).astype('int32')
return new_bbox, new_bbox_num
@jit
def mask_post_process(det_res,
im_shape,
scale_factor,
resolution=14,
binary_thresh=0.5):
bbox = det_res['bbox']
bbox_num = det_res['bbox_num']
masks = det_res['mask']
if masks.shape[0] == 0:
return masks
M = resolution
scale = (M + 2.0) / M
boxes = bbox[:, 2:]
labels = bbox[:, 0]
segms_results = [[] for _ in range(len(bbox_num))]
sum = 0
st_num = 0
end_num = 0
for i in range(len(bbox_num)):
length = bbox_num[i]
end_num += length
cls_segms = []
boxes_n = boxes[st_num:end_num]
labels_n = labels[st_num:end_num]
masks_n = masks[st_num:end_num]
im_h = int(round(im_shape[i][0] / scale_factor[i, 0]))
im_w = int(round(im_shape[i][1] / scale_factor[i, 0]))
boxes_n = expand_bbox(boxes_n, scale)
boxes_n = boxes_n.astype(np.int32)
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
for j in range(len(boxes_n)):
class_id = int(labels_n[j])
padded_mask[1:-1, 1:-1] = masks_n[j, class_id, :, :]
ref_box = boxes_n[j, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > binary_thresh, dtype=np.uint8)
im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
x_0 = max(ref_box[0], 0)
x_1 = min(ref_box[2] + 1, im_w)
y_0 = max(ref_box[1], 0)
y_1 = min(ref_box[3] + 1, im_h)
im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[
1]), (x_0 - ref_box[0]):(x_1 - ref_box[0])]
sum += im_mask.sum()
rle = mask_util.encode(
np.array(
im_mask[:, :, np.newaxis], order='F'))[0]
cls_segms.append(rle)
segms_results[i] = np.array(cls_segms)[:, np.newaxis]
st_num += length
segms_results = np.vstack([segms_results[k] for k in range(len(bbox_num))])
bboxes = np.hstack([segms_results, bbox])
return bboxes[:, :3]
@jit
def get_det_res(bboxes, bbox_nums, image_id, num_id_to_cat_id_map):
def get_det_res(bboxes, scores, labels, bbox_nums, image_id,
label_to_cat_id_map):
det_res = []
k = 0
for i in range(len(bbox_nums)):
cur_image_id = int(image_id[i][0])
det_nums = bbox_nums[i]
for j in range(det_nums):
dt = bboxes[k]
box = bboxes[k]
score = float(scores[k])
label = int(labels[k])
if label < 0: continue
k = k + 1
num_id, score, xmin, ymin, xmax, ymax = dt.tolist()
if num_id < 0:
continue
category_id = num_id_to_cat_id_map[num_id]
w = xmax - xmin + 1
h = ymax - ymin + 1
xmin, ymin, xmax, ymax = box.tolist()
category_id = label_to_cat_id_map[label]
w = xmax - xmin
h = ymax - ymin
bbox = [xmin, ymin, w, h]
dt_res = {
'image_id': cur_image_id,
......@@ -163,25 +32,30 @@ def get_det_res(bboxes, bbox_nums, image_id, num_id_to_cat_id_map):
return det_res
@jit
def get_seg_res(masks, mask_nums, image_id, num_id_to_cat_id_map):
def get_seg_res(masks, scores, labels, mask_nums, image_id,
label_to_cat_id_map):
import pycocotools.mask as mask_util
seg_res = []
k = 0
for i in range(len(mask_nums)):
cur_image_id = int(image_id[i][0])
det_nums = mask_nums[i]
for j in range(det_nums):
dt = masks[k]
mask = masks[k]
score = float(scores[k])
label = int(labels[k])
k = k + 1
sg, num_id, score = dt.tolist()
cat_id = num_id_to_cat_id_map[num_id]
cat_id = label_to_cat_id_map[label]
rle = mask_util.encode(
np.array(
mask[:, :, None], order="F", dtype="uint8"))[0]
if six.PY3:
if 'counts' in sg:
sg['counts'] = sg['counts'].decode("utf8")
if 'counts' in rle:
rle['counts'] = rle['counts'].decode("utf8")
sg_res = {
'image_id': cur_image_id,
'category_id': cat_id,
'segmentation': sg,
'segmentation': rle,
'score': score
}
seg_res.append(sg_res)
......
import six
import math
import numpy as np
from numba import jit
from .bbox import *
from .mask import *
@jit
def generate_rpn_anchor_target(anchors,
gt_boxes,
is_crowd,
im_info,
rpn_straddle_thresh,
rpn_batch_size_per_im,
rpn_positive_overlap,
rpn_negative_overlap,
rpn_fg_fraction,
use_random=True,
anchor_reg_weights=[1., 1., 1., 1.]):
anchor_num = anchors.shape[0]
batch_size = gt_boxes.shape[0]
loc_indexes = []
cls_indexes = []
tgt_labels = []
tgt_deltas = []
anchor_inside_weights = []
for i in range(batch_size):
# TODO: move anchor filter into anchor generator
im_height = im_info[i][0]
im_width = im_info[i][1]
im_scale = im_info[i][2]
if rpn_straddle_thresh >= 0:
anchor_inds = np.where((anchors[:, 0] >= -rpn_straddle_thresh) & (
anchors[:, 1] >= -rpn_straddle_thresh) & (
anchors[:, 2] < im_width + rpn_straddle_thresh) & (
anchors[:, 3] < im_height + rpn_straddle_thresh))[0]
anchor = anchors[anchor_inds, :]
else:
anchor_inds = np.arange(anchors.shape[0])
anchor = anchors
gt_bbox = gt_boxes[i] * im_scale
is_crowd_slice = is_crowd[i]
not_crowd_inds = np.where(is_crowd_slice == 0)[0]
gt_bbox = gt_bbox[not_crowd_inds]
# Step1: match anchor and gt_bbox
anchor_gt_bbox_inds, anchor_gt_bbox_iou, labels = label_anchor(anchor,
gt_bbox)
# Step2: sample anchor
fg_inds, bg_inds, fg_fake_inds, fake_num = sample_anchor(
anchor_gt_bbox_iou, labels, rpn_positive_overlap,
rpn_negative_overlap, rpn_batch_size_per_im, rpn_fg_fraction,
use_random)
# Step3: make output
loc_inds = np.hstack([fg_fake_inds, fg_inds])
cls_inds = np.hstack([fg_inds, bg_inds])
sampled_labels = labels[cls_inds]
sampled_anchors = anchor[loc_inds]
sampled_gt_boxes = gt_bbox[anchor_gt_bbox_inds[loc_inds]]
sampled_deltas = bbox2delta(sampled_anchors, sampled_gt_boxes,
anchor_reg_weights)
anchor_inside_weight = np.zeros((len(loc_inds), 4), dtype=np.float32)
anchor_inside_weight[fake_num:, :] = 1
loc_indexes.append(anchor_inds[loc_inds] + i * anchor_num)
cls_indexes.append(anchor_inds[cls_inds] + i * anchor_num)
tgt_labels.append(sampled_labels)
tgt_deltas.append(sampled_deltas)
anchor_inside_weights.append(anchor_inside_weight)
loc_indexes = np.concatenate(loc_indexes)
cls_indexes = np.concatenate(cls_indexes)
tgt_labels = np.concatenate(tgt_labels).astype('float32')
tgt_deltas = np.vstack(tgt_deltas).astype('float32')
anchor_inside_weights = np.vstack(anchor_inside_weights)
return loc_indexes, cls_indexes, tgt_labels, tgt_deltas, anchor_inside_weights
@jit
def label_anchor(anchors, gt_boxes):
iou = bbox_overlaps(anchors, gt_boxes)
# every gt's anchor's index
gt_bbox_anchor_inds = iou.argmax(axis=0)
gt_bbox_anchor_iou = iou[gt_bbox_anchor_inds, np.arange(iou.shape[1])]
gt_bbox_anchor_iou_inds = np.where(iou == gt_bbox_anchor_iou)[0]
# every anchor's gt bbox's index
anchor_gt_bbox_inds = iou.argmax(axis=1)
anchor_gt_bbox_iou = iou[np.arange(iou.shape[0]), anchor_gt_bbox_inds]
labels = np.ones((iou.shape[0], ), dtype=np.int32) * -1
labels[gt_bbox_anchor_iou_inds] = 1
return anchor_gt_bbox_inds, anchor_gt_bbox_iou, labels
@jit
def sample_anchor(anchor_gt_bbox_iou,
labels,
rpn_positive_overlap,
rpn_negative_overlap,
rpn_batch_size_per_im,
rpn_fg_fraction,
use_random=True):
labels[anchor_gt_bbox_iou >= rpn_positive_overlap] = 1
num_fg = int(rpn_fg_fraction * rpn_batch_size_per_im)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg and use_random:
disable_inds = np.random.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
else:
disable_inds = fg_inds[num_fg:]
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
num_bg = rpn_batch_size_per_im - np.sum(labels == 1)
bg_inds = np.where(anchor_gt_bbox_iou < rpn_negative_overlap)[0]
if len(bg_inds) > num_bg and use_random:
enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)]
else:
enable_inds = bg_inds[:num_bg]
fg_fake_inds = np.array([], np.int32)
fg_value = np.array([fg_inds[0]], np.int32)
fake_num = 0
for bg_id in enable_inds:
if bg_id in fg_inds:
fake_num += 1
fg_fake_inds = np.hstack([fg_fake_inds, fg_value])
labels[enable_inds] = 0
fg_inds = np.where(labels == 1)[0]
bg_inds = np.where(labels == 0)[0]
return fg_inds, bg_inds, fg_fake_inds, fake_num
@jit
def filter_roi(rois, max_overlap):
ws = rois[:, 2] - rois[:, 0] + 1
hs = rois[:, 3] - rois[:, 1] + 1
keep = np.where((ws > 0) & (hs > 0) & (max_overlap < 1))[0]
if len(keep) > 0:
return rois[keep, :]
return np.zeros((1, 4)).astype('float32')
@jit
def generate_proposal_target(rpn_rois,
rpn_rois_num,
gt_classes,
is_crowd,
gt_boxes,
im_info,
batch_size_per_im,
fg_fraction,
fg_thresh,
bg_thresh_hi,
bg_thresh_lo,
bbox_reg_weights,
class_nums=81,
use_random=True,
is_cls_agnostic=False,
is_cascade_rcnn=False,
max_overlaps=None):
rois = []
tgt_labels = []
tgt_deltas = []
rois_inside_weights = []
rois_outside_weights = []
sampled_max_overlaps = []
new_rois_num = []
st_num = 0
end_num = 0
for im_i in range(len(rpn_rois_num)):
length = rpn_rois_num[im_i]
end_num += length
rpn_roi = rpn_rois[st_num:end_num]
max_overlap = max_overlaps[st_num:end_num] if is_cascade_rcnn else None
im_scale = im_info[im_i][2]
rpn_roi = rpn_roi / im_scale
gt_bbox = gt_boxes[im_i]
if is_cascade_rcnn:
rpn_roi = filter_roi(rpn_roi, max_overlap)
bbox = np.vstack([gt_bbox, rpn_roi]).astype('float32')
# Step1: label bbox
roi_gt_bbox_inds, labels, max_overlap = label_bbox(
bbox, gt_bbox, gt_classes[im_i], is_crowd[im_i])
# Step2: sample bbox
fg_inds, bg_inds, fg_nums = sample_bbox(
max_overlap, batch_size_per_im, fg_fraction, fg_thresh,
bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums,
use_random, is_cls_agnostic, is_cascade_rcnn)
# Step3: make output
sampled_inds = np.append(fg_inds, bg_inds)
sampled_labels = labels[sampled_inds]
sampled_labels[fg_nums:] = 0
sampled_boxes = bbox[sampled_inds]
sampled_max_overlap = max_overlap[sampled_inds]
sampled_gt_boxes = gt_bbox[roi_gt_bbox_inds[sampled_inds]]
sampled_gt_boxes[fg_nums:, :] = 0
sampled_deltas = compute_bbox_targets(sampled_boxes, sampled_gt_boxes,
sampled_labels, bbox_reg_weights)
sampled_deltas[fg_nums:, :] = 0
sampled_deltas, bbox_inside_weights = expand_bbox_targets(
sampled_deltas, class_nums, is_cls_agnostic)
bbox_outside_weights = np.array(
bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
roi = sampled_boxes * im_scale
st_num += length
rois.append(roi)
new_rois_num.append(roi.shape[0])
tgt_labels.append(sampled_labels)
tgt_deltas.append(sampled_deltas)
rois_inside_weights.append(bbox_inside_weights)
rois_outside_weights.append(bbox_outside_weights)
sampled_max_overlaps.append(sampled_max_overlap)
rois = np.concatenate(rois, axis=0).astype(np.float32)
tgt_labels = np.concatenate(
tgt_labels, axis=0).astype(np.int32).reshape(-1, 1)
tgt_deltas = np.concatenate(tgt_deltas, axis=0).astype(np.float32)
rois_inside_weights = np.concatenate(
rois_inside_weights, axis=0).astype(np.float32)
rois_outside_weights = np.concatenate(
rois_outside_weights, axis=0).astype(np.float32)
sampled_max_overlaps = np.concatenate(
sampled_max_overlaps, axis=0).astype(np.float32)
new_rois_num = np.asarray(new_rois_num, np.int32)
return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, new_rois_num, sampled_max_overlaps
@jit
def label_bbox(boxes, gt_boxes, gt_classes, is_crowd, class_nums=81):
iou = bbox_overlaps(boxes, gt_boxes)
# every roi's gt box's index
roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32)
roi_gt_bbox_iou = np.zeros((boxes.shape[0], class_nums), dtype=np.float32)
iou_argmax = iou.argmax(axis=1)
iou_max = iou.max(axis=1)
overlapped_boxes_ind = np.where(iou_max > 0)[0].astype('int32')
roi_gt_bbox_inds[overlapped_boxes_ind] = iou_argmax[overlapped_boxes_ind]
overlapped_boxes_gt_classes = gt_classes[iou_argmax[
overlapped_boxes_ind]].astype('int32')
roi_gt_bbox_iou[overlapped_boxes_ind,
overlapped_boxes_gt_classes] = iou_max[overlapped_boxes_ind]
crowd_ind = np.where(is_crowd)[0]
roi_gt_bbox_iou[crowd_ind] = -1
max_overlap = roi_gt_bbox_iou.max(axis=1)
labels = roi_gt_bbox_iou.argmax(axis=1)
return roi_gt_bbox_inds, labels, max_overlap
@jit
def sample_bbox(max_overlap,
batch_size_per_im,
fg_fraction,
fg_thresh,
bg_thresh_hi,
bg_thresh_lo,
bbox_reg_weights,
class_nums,
use_random=True,
is_cls_agnostic=False,
is_cascade_rcnn=False):
rois_per_image = int(batch_size_per_im)
fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
if is_cascade_rcnn:
fg_inds = np.where(max_overlap >= fg_thresh)[0]
bg_inds = np.where((max_overlap < bg_thresh_hi) & (max_overlap >=
bg_thresh_lo))[0]
fg_nums = fg_inds.shape[0]
bg_nums = bg_inds.shape[0]
else:
# sampe fg
fg_inds = np.where(max_overlap >= fg_thresh)[0]
fg_nums = np.minimum(fg_rois_per_im, fg_inds.shape[0])
if (fg_inds.shape[0] > fg_nums) and use_random:
fg_inds = np.random.choice(fg_inds, size=fg_nums, replace=False)
fg_inds = fg_inds[:fg_nums]
# sample bg
bg_inds = np.where((max_overlap < bg_thresh_hi) & (max_overlap >=
bg_thresh_lo))[0]
bg_nums = rois_per_image - fg_nums
bg_nums = np.minimum(bg_nums, bg_inds.shape[0])
if (bg_inds.shape[0] > bg_nums) and use_random:
bg_inds = np.random.choice(bg_inds, size=bg_nums, replace=False)
bg_inds = bg_inds[:bg_nums]
return fg_inds, bg_inds, fg_nums
@jit
def generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, rois,
rois_num, labels_int32, num_classes, resolution):
mask_rois = []
mask_rois_num = []
rois_has_mask_int32 = []
mask_int32 = []
st_num = 0
end_num = 0
for k in range(len(rois_num)):
length = rois_num[k]
end_num += length
# remove padding
gt_polys = gt_segms[k]
new_gt_polys = []
for i in range(gt_polys.shape[0]):
gt_segs = []
for j in range(gt_polys[i].shape[0]):
new_poly = []
polys = gt_polys[i][j]
for ii in range(polys.shape[0]):
x, y = polys[ii]
if (x == -1 and y == -1):
continue
elif (x >= 0 or y >= 0):
new_poly.append([x, y]) # array, one poly
if len(new_poly) > 0:
gt_segs.append(new_poly)
new_gt_polys.append(gt_segs)
im_scale = im_info[k][2]
boxes = rois[st_num:end_num] / im_scale
bbox_fg, bbox_has_mask, masks = sample_mask(
boxes, new_gt_polys, labels_int32[st_num:end_num], gt_classes[k],
is_crowd[k], num_classes, resolution)
st_num += length
mask_rois.append(bbox_fg * im_scale)
mask_rois_num.append(len(bbox_fg))
rois_has_mask_int32.append(bbox_has_mask)
mask_int32.append(masks)
mask_rois = np.concatenate(mask_rois, axis=0).astype(np.float32)
mask_rois_num = np.array(mask_rois_num).astype(np.int32)
rois_has_mask_int32 = np.concatenate(
rois_has_mask_int32, axis=0).astype(np.int32)
mask_int32 = np.concatenate(mask_int32, axis=0).astype(np.int32)
return mask_rois, mask_rois_num, rois_has_mask_int32, mask_int32
@jit
def sample_mask(boxes, gt_polys, label_int32, gt_classes, is_crowd, num_classes,
resolution):
gt_polys_inds = np.where((gt_classes > 0) & (is_crowd == 0))[0]
_gt_polys = [gt_polys[i] for i in gt_polys_inds]
boxes_from_polys = polys_to_boxes(_gt_polys)
fg_inds = np.where(label_int32 > 0)[0]
bbox_has_mask = fg_inds.copy()
if fg_inds.shape[0] > 0:
labels_fg = label_int32[fg_inds]
masks_fg = np.zeros((fg_inds.shape[0], resolution**2), dtype=np.int32)
bbox_fg = boxes[fg_inds]
iou = bbox_overlaps_mask(bbox_fg, boxes_from_polys)
fg_polys_inds = np.argmax(iou, axis=1)
for i in range(bbox_fg.shape[0]):
poly_gt = _gt_polys[fg_polys_inds[i]]
roi_fg = bbox_fg[i]
mask = polys_to_mask_wrt_box(poly_gt, roi_fg, resolution)
mask = np.array(mask > 0, dtype=np.int32)
masks_fg[i, :] = np.reshape(mask, resolution**2)
else:
bg_inds = np.where(label_int32 == 0)[0]
bbox_fg = boxes[bg_inds[0]].reshape((1, -1))
masks_fg = -np.ones((1, resolution**2), dtype=np.int32)
labels_fg = np.zeros((1, ))
bbox_has_mask = np.append(bbox_has_mask, 0)
masks = expand_mask_targets(masks_fg, labels_fg, resolution, num_classes)
return bbox_fg, bbox_has_mask, masks
......@@ -90,11 +90,4 @@ def check_config(cfg):
if 'log_iter' not in cfg:
cfg.log_iter = 20
logger.debug("The 'num_classes'(number of classes) you set is {}, " \
"and 'with_background' in 'dataset' sets {}.\n" \
"So please note the actual number of categories is {}."
.format(cfg.num_classes, cfg.with_background,
cfg.num_classes + 1))
cfg.num_classes = cfg.num_classes + int(cfg.with_background)
return cfg
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册