diff --git a/configs/mask_rcnn_r50_fpn_1x.yml b/configs/mask_rcnn_r50_fpn_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..a1c90e3c02909d426d722d9b1004e9d76e6f8fbc --- /dev/null +++ b/configs/mask_rcnn_r50_fpn_1x.yml @@ -0,0 +1,144 @@ +architecture: MaskRCNN +use_gpu: true +max_iters: 180000 +log_smooth_window: 20 +save_dir: output +snapshot_iter: 10000 +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar +metric: COCO +weights: output/mask_rcnn_r50_fpn_1x/model_final +num_classes: 81 +load_static_weights: True + +# Model Achitecture +MaskRCNN: + # model anchor info flow + anchor: AnchorRPN + proposal: Proposal + mask: Mask + # model feat info flow + backbone: ResNet + neck: FPN + rpn_head: RPNHead + bbox_head: BBoxHead + mask_head: MaskHead + +ResNet: + # index 0 stands for res2 + depth: 50 + norm_type: bn + freeze_at: 0 + return_idx: [0,1,2,3] + num_stages: 4 + +FPN: + in_channels: [256, 512, 1024, 2048] + out_channel: 256 + min_level: 0 + max_level: 4 + spatial_scale: [0.25, 0.125, 0.0625, 0.03125] + + +RPNHead: + rpn_feat: + name: RPNFeat + feat_in: 256 + feat_out: 256 + anchor_per_position: 3 + rpn_channel: 256 + +BBoxHead: + bbox_feat: + name: BBoxFeat + roi_extractor: + name: RoIExtractor + resolution: 7 + sampling_ratio: 2 + head_feat: + name: TwoFCHead + in_dim: 256 + mlp_dim: 1024 + in_feat: 1024 + +MaskHead: + mask_feat: + name: MaskFeat + num_convs: 4 + feat_in: 256 + feat_out: 256 + mask_roi_extractor: + name: RoIExtractor + resolution: 14 + sampling_ratio: 2 + share_bbox_feat: False + feat_in: 256 + +AnchorRPN: + anchor_generator: + name: AnchorGeneratorRPN + aspect_ratios: [0.5, 1.0, 2.0] + anchor_start_size: 32 + stride: [4., 4.] + anchor_target_generator: + name: AnchorTargetGeneratorRPN + batch_size_per_im: 256 + fg_fraction: 0.5 + negative_overlap: 0.3 + positive_overlap: 0.7 + straddle_thresh: 0.0 + +Proposal: + proposal_generator: + name: ProposalGenerator + min_size: 0.0 + nms_thresh: 0.7 + train_pre_nms_top_n: 2000 + train_post_nms_top_n: 2000 + infer_pre_nms_top_n: 1000 + infer_post_nms_top_n: 1000 + proposal_target_generator: + name: ProposalTargetGenerator + batch_size_per_im: 512 + bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] + bg_thresh_hi: [0.5,] + bg_thresh_lo: [0.0,] + fg_thresh: [0.5,] + fg_fraction: 0.25 + bbox_post_process: # used in infer + name: BBoxPostProcess + # decode -> clip -> nms + decode_clip_nms: + name: DecodeClipNms + keep_top_k: 100 + score_threshold: 0.05 + nms_threshold: 0.5 + +Mask: + mask_target_generator: + name: MaskTargetGenerator + mask_resolution: 28 + mask_post_process: + name: MaskPostProcess + mask_resolution: 28 + + +# Train +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 160000] + - !LinearWarmup + start_factor: 0.3333 + steps: 500 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +_READER_: 'mask_reader.yml' diff --git a/configs/mask_reader.yml b/configs/mask_reader.yml index 5280abac3d10006c2b0eced7ed539c815a29fb7c..c5c486965ed09690b715406a7ff0f061c022787d 100644 --- a/configs/mask_reader.yml +++ b/configs/mask_reader.yml @@ -18,8 +18,8 @@ TrainReader: mean: [0.485,0.456,0.406] std: [0.229, 0.224,0.225] - !ResizeImage - target_size: 512 - max_size: 512 + target_size: 800 + max_size: 1333 interp: 1 use_cv2: true - !Permute @@ -39,8 +39,6 @@ TrainReader: EvalReader: inputs_def: fields: ['image', 'im_info', 'im_id', 'im_shape'] - # for voc - #fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult'] dataset: !COCODataSet image_dir: val2017 diff --git a/ppdet/data/source/coco.py b/ppdet/data/source/coco.py index aaeaed58efa85e267bac81a2c9761a658e771c7b..cb823f25ed49722596aa177d8be88ea09da9c9f9 100644 --- a/ppdet/data/source/coco.py +++ b/ppdet/data/source/coco.py @@ -127,7 +127,6 @@ class COCODataSet(DataSet): if not self.load_image_only: ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False) instances = coco.loadAnns(ins_anno_ids) - bboxes = [] for inst in instances: x, y, box_w, box_h = inst['bbox'] @@ -135,6 +134,7 @@ class COCODataSet(DataSet): y1 = max(0, y) x2 = min(im_w - 1, x1 + max(0, box_w - 1)) y2 = min(im_h - 1, y1 + max(0, box_h - 1)) + if inst['area'] > 0 and x2 >= x1 and y2 >= y1: inst['clean_bbox'] = [x1, y1, x2, y2] bboxes.append(inst) @@ -143,6 +143,7 @@ class COCODataSet(DataSet): 'Found an invalid bbox in annotations: im_id: {}, ' 'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format( img_id, float(inst['area']), x1, y1, x2, y2)) + num_bbox = len(bboxes) gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) diff --git a/ppdet/data/transform/batch_operators.py b/ppdet/data/transform/batch_operators.py index 1bed5edaff9fd536cad4a78b0067c0a46fa6476e..53834443810f3982f2ac4ce6ff362aaccec89187 100644 --- a/ppdet/data/transform/batch_operators.py +++ b/ppdet/data/transform/batch_operators.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,7 +24,6 @@ except Exception: import logging import cv2 import numpy as np - from .operators import register_op, BaseOperator from .op_helper import jaccard_overlap, gaussian2D @@ -50,10 +49,11 @@ class PadBatch(BaseOperator): height and width is divisible by `pad_to_stride`. """ - def __init__(self, pad_to_stride=0, use_padded_im_info=True): + def __init__(self, pad_to_stride=0, use_padded_im_info=True, pad_gt=False): super(PadBatch, self).__init__() self.pad_to_stride = pad_to_stride self.use_padded_im_info = use_padded_im_info + self.pad_gt = pad_gt def __call__(self, samples, context=None): """ @@ -61,11 +61,11 @@ class PadBatch(BaseOperator): samples (list): a batch of sample, each is dict. """ coarsest_stride = self.pad_to_stride - if coarsest_stride == 0: - return samples + #if coarsest_stride == 0: + # return samples + max_shape = np.array([data['image'].shape for data in samples]).max( axis=0) - if coarsest_stride > 0: max_shape[1] = int( np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride) @@ -82,6 +82,52 @@ class PadBatch(BaseOperator): data['image'] = padding_im if self.use_padded_im_info: data['im_info'][:2] = max_shape[1:3] + if self.pad_gt: + gt_num = [] + if data['gt_poly'] is not None and len(data['gt_poly']) > 0: + pad_mask = True + else: + pad_mask = False + + if pad_mask: + poly_num = [] + poly_part_num = [] + point_num = [] + for data in samples: + gt_num.append(data['gt_bbox'].shape[0]) + if pad_mask: + poly_num.append(len(data['gt_poly'])) + for poly in data['gt_poly']: + poly_part_num.append(int(len(poly))) + for p_p in poly: + point_num.append(int(len(p_p) / 2)) + gt_num_max = max(gt_num) + gt_box_data = np.zeros([gt_num_max, 4]) + gt_class_data = np.zeros([gt_num_max]) + is_crowd_data = np.ones([gt_num_max]) + + if pad_mask: + poly_num_max = max(poly_num) + poly_part_num_max = max(poly_part_num) + point_num_max = max(point_num) + gt_masks_data = -np.ones( + [poly_num_max, poly_part_num_max, point_num_max, 2]) + + for i, data in enumerate(samples): + gt_num = data['gt_bbox'].shape[0] + gt_box_data[0:gt_num, :] = data['gt_bbox'] + gt_class_data[0:gt_num] = np.squeeze(data['gt_class']) + is_crowd_data[0:gt_num] = np.squeeze(data['is_crowd']) + if pad_mask: + for j, poly in enumerate(data['gt_poly']): + for k, p_p in enumerate(poly): + pp_np = np.array(p_p).reshape(-1, 2) + gt_masks_data[j, k, :pp_np.shape[0], :] = pp_np + data['gt_poly'] = gt_masks_data + data['gt_bbox'] = gt_box_data + data['gt_class'] = gt_class_data + data['is_crowd'] = is_crowd_data + return samples diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py index db73e4174a01192fddb4f71db3dc7a51bec21504..eb9f287fa1fd1cccba897005ced288824dcc1583 100644 --- a/ppdet/data/transform/operators.py +++ b/ppdet/data/transform/operators.py @@ -122,7 +122,6 @@ class DecodeImage(BaseOperator): if self.to_rgb: im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) sample['image'] = im - if 'h' not in sample: sample['h'] = im.shape[0] elif sample['h'] != im.shape[0]: @@ -333,7 +332,6 @@ class ResizeImage(BaseOperator): resize_w = selected_size resize_h = selected_size - if self.use_cv2: im = cv2.resize( im, diff --git a/ppdet/modeling/__init__.py b/ppdet/modeling/__init__.py index 36b9e0d867e2aec2ee0319522ea6785ad32c6cc5..1bcf4e779b9afd4b4770f4ca530df95134c81538 100644 --- a/ppdet/modeling/__init__.py +++ b/ppdet/modeling/__init__.py @@ -2,6 +2,7 @@ from . import ops from . import bbox from . import mask from . import backbone +from . import neck from . import head from . import architecture @@ -9,5 +10,6 @@ from .ops import * from .bbox import * from .mask import * from .backbone import * +from .neck import * from .head import * from .architecture import * diff --git a/ppdet/modeling/architecture/mask_rcnn.py b/ppdet/modeling/architecture/mask_rcnn.py index 640fdd6f44199bee65008cb5beef431147467362..6880a55f989902d87aa53f7aff2bcd5ff0c880b5 100644 --- a/ppdet/modeling/architecture/mask_rcnn.py +++ b/ppdet/modeling/architecture/mask_rcnn.py @@ -4,7 +4,6 @@ from __future__ import print_function from paddle import fluid from ppdet.core.workspace import register -from ppdet.utils.data_structure import BufferDict from .meta_arch import BaseArch __all__ = ['MaskRCNN'] @@ -18,84 +17,107 @@ class MaskRCNN(BaseArch): 'proposal', 'mask', 'backbone', + 'neck', 'rpn_head', 'bbox_head', 'mask_head', ] - def __init__(self, anchor, proposal, mask, backbone, rpn_head, bbox_head, - mask_head, *args, **kwargs): - super(MaskRCNN, self).__init__(*args, **kwargs) + def __init__(self, + anchor, + proposal, + mask, + backbone, + rpn_head, + bbox_head, + mask_head, + neck=None): + super(MaskRCNN, self).__init__() self.anchor = anchor self.proposal = proposal self.mask = mask self.backbone = backbone + self.neck = neck self.rpn_head = rpn_head self.bbox_head = bbox_head self.mask_head = mask_head - def model_arch(self, ): + def model_arch(self): # Backbone - bb_out = self.backbone(self.gbd) - self.gbd.update(bb_out) + body_feats = self.backbone(self.inputs) + spatial_scale = None + + # Neck + if self.neck is not None: + body_feats, spatial_scale = self.neck(body_feats) # RPN - rpn_head_out = self.rpn_head(self.gbd) - self.gbd.update(rpn_head_out) + # rpn_head returns two list: rpn_feat, rpn_head_out + # each element in rpn_feats contains rpn feature on each level, + # and the length is 1 when the neck is not applied. + # each element in rpn_head_out contains (rpn_rois_score, rpn_rois_delta) + rpn_feat, self.rpn_head_out = self.rpn_head(self.inputs, body_feats) # Anchor - anchor_out = self.anchor(self.gbd) - self.gbd.update(anchor_out) - - # Proposal BBox - self.gbd['stage'] = 0 - proposal_out = self.proposal(self.gbd) - self.gbd.update({'proposal_0': proposal_out}) + # anchor_out returns a list, + # each element contains (anchor, anchor_var) + self.anchor_out = self.anchor(rpn_feat) + # Proposal RoI + # compute targets here when training + rois = self.proposal(self.inputs, self.rpn_head_out, self.anchor_out) # BBox Head - bboxhead_out = self.bbox_head(self.gbd) - self.gbd.update({'bbox_head_0': bboxhead_out}) + bbox_feat, self.bbox_head_out = self.bbox_head(body_feats, rois, + spatial_scale) + + rois_has_mask_int32 = None + if self.inputs['mode'] == 'infer': + # Refine bbox by the output from bbox_head at test stage + self.bboxes = self.proposal.post_process(self.inputs, + self.bbox_head_out, rois) + else: + # Proposal RoI for Mask branch + # bboxes update at training stage only + bbox_targets = self.proposal.get_targets()[0] + self.bboxes, rois_has_mask_int32 = self.mask(self.inputs, rois, + bbox_targets) + + # Mask Head + self.mask_head_out = self.mask_head(self.inputs, body_feats, + self.bboxes, bbox_feat, + rois_has_mask_int32, spatial_scale) - if self.gbd['mode'] == 'infer': - bbox_out = self.proposal.post_process(self.gbd) - self.gbd.update(bbox_out) + def loss(self, ): + loss = {} - # Mask - mask_out = self.mask(self.gbd) - self.gbd.update(mask_out) + # RPN loss + rpn_loss_inputs = self.anchor.generate_loss_inputs( + self.inputs, self.rpn_head_out, self.anchor_out) + loss_rpn = self.rpn_head.loss(rpn_loss_inputs) + loss.update(loss_rpn) - # Mask Head - mask_head_out = self.mask_head(self.gbd) - self.gbd.update(mask_head_out) + # BBox loss + bbox_targets = self.proposal.get_targets() + loss_bbox = self.bbox_head.loss(self.bbox_head_out, bbox_targets) + loss.update(loss_bbox) - if self.gbd['mode'] == 'infer': - mask_out = self.mask.post_process(self.gbd) - self.gbd.update(mask_out) + # Mask loss + mask_targets = self.mask.get_targets() + loss_mask = self.mask_head.loss(self.mask_head_out, mask_targets) + loss.update(loss_mask) - def loss(self, ): - losses = [] - rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(self.gbd) - bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(self.gbd) - mask_loss = self.mask_head.loss(self.gbd) - losses = [ - rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss, mask_loss - ] - loss = fluid.layers.sum(losses) - out = { - 'loss': loss, - 'loss_rpn_cls': rpn_cls_loss, - 'loss_rpn_reg': rpn_reg_loss, - 'loss_bbox_cls': bbox_cls_loss, - 'loss_bbox_reg': bbox_reg_loss, - 'loss_mask': mask_loss - } - return out + total_loss = fluid.layers.sums(list(loss.values())) + loss.update({'loss': total_loss}) + return loss def infer(self, ): - outs = { - 'bbox': self.gbd['predicted_bbox'].numpy(), - 'bbox_nums': self.gbd['predicted_bbox_nums'].numpy(), - 'mask': self.gbd['predicted_mask'].numpy(), - 'im_id': self.gbd['im_id'].numpy() + mask = self.mask.post_process(self.bboxes, self.mask_head_out, + self.inputs['im_info']) + bbox, bbox_num = self.bboxes + output = { + 'bbox': bbox.numpy(), + 'bbox_num': bbox_num.numpy(), + 'im_id': self.inputs['im_id'].numpy() } - return inputs + output.update(mask) + return output diff --git a/ppdet/modeling/architecture/meta_arch.py b/ppdet/modeling/architecture/meta_arch.py index 1b0dcaa037ece9ca93d82e42450a80bce50ed68e..b758e816380876d013989b37e0f394e42974e57e 100644 --- a/ppdet/modeling/architecture/meta_arch.py +++ b/ppdet/modeling/architecture/meta_arch.py @@ -13,39 +13,36 @@ __all__ = ['BaseArch'] @register class BaseArch(Layer): - def __init__(self, *args, **kwargs): + def __init__(self): super(BaseArch, self).__init__() - self.args = args - self.kwargs = kwargs - - def forward(self, inputs, inputs_keys): - self.gbd = BufferDict() - self.gbd.update(self.kwargs) - assert self.gbd[ - 'mode'] is not None, "Please specify mode train or infer in config file!" - if self.kwargs['open_debug'] is None: - self.gbd['open_debug'] = False - - self.build_inputs(inputs, inputs_keys) + def forward(self, data, input_def, mode): + self.inputs = self.build_inputs(data, input_def) + self.inputs['mode'] = mode self.model_arch() - self.gbd.debug() - - if self.gbd['mode'] == 'train': + if mode == 'train': out = self.loss() - elif self.gbd['mode'] == 'infer': + elif mode == 'infer': out = self.infer() else: raise "Now, only support train or infer mode!" return out - def build_inputs(self, inputs, inputs_keys): - for i, k in enumerate(inputs_keys): - v = to_variable(np.array([x[i] for x in inputs])) - self.gbd.set(k, v) - - def model_arch(self, ): + def build_inputs(self, data, input_def): + inputs = {} + for name in input_def: + inputs[name] = [] + batch_size = len(data) + for bs in range(batch_size): + for name, input in zip(input_def, data[bs]): + input_v = np.array(input)[np.newaxis, ...] + inputs[name].append(input_v) + for name in input_def: + inputs[name] = to_variable(np.concatenate(inputs[name])) + return inputs + + def model_arch(self, mode): raise NotImplementedError("Should implement model_arch method!") def loss(self, ): diff --git a/ppdet/modeling/backbone/name_adapter.py b/ppdet/modeling/backbone/name_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..3f5ea945126f03e45570aa1e76a0e68ed0591314 --- /dev/null +++ b/ppdet/modeling/backbone/name_adapter.py @@ -0,0 +1,58 @@ +class NameAdapter(object): + """Fix the backbones variable names for pretrained weight""" + + def __init__(self, model): + super(NameAdapter, self).__init__() + self.model = model + + @property + def model_type(self): + return getattr(self.model, '_model_type', '') + + @property + def variant(self): + return getattr(self.model, 'variant', '') + + def fix_conv_norm_name(self, name): + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + # the naming rule is same as pretrained weight + if self.model_type == 'SEResNeXt': + bn_name = name + "_bn" + return bn_name + + def fix_shortcut_name(self, name): + if self.model_type == 'SEResNeXt': + name = 'conv' + name + '_prj' + return name + + def fix_bottleneck_name(self, name): + if self.model_type == 'SEResNeXt': + conv_name1 = 'conv' + name + '_x1' + conv_name2 = 'conv' + name + '_x2' + conv_name3 = 'conv' + name + '_x3' + shortcut_name = name + else: + conv_name1 = name + "_branch2a" + conv_name2 = name + "_branch2b" + conv_name3 = name + "_branch2c" + shortcut_name = name + "_branch1" + return conv_name1, conv_name2, conv_name3, shortcut_name + + def fix_layer_warp_name(self, stage_num, count, i): + name = 'res' + str(stage_num) + if count > 10 and stage_num == 4: + if i == 0: + conv_name = name + "a" + else: + conv_name = name + "b" + str(i) + else: + conv_name = name + chr(ord("a") + i) + if self.model_type == 'SEResNeXt': + conv_name = str(stage_num + 2) + '_' + str(i + 1) + return conv_name + + def fix_c1_stage_name(self): + return "res_conv1" if self.model_type == 'ResNeXt' else "conv1" diff --git a/ppdet/modeling/backbone/resnet.py b/ppdet/modeling/backbone/resnet.py index 87417e84906190d4d7614462060cfd75eaa68755..c44098dde35f5aa7759ff983aa859cda258cd8cb 100755 --- a/ppdet/modeling/backbone/resnet.py +++ b/ppdet/modeling/backbone/resnet.py @@ -1,290 +1,320 @@ import numpy as np import paddle.fluid as fluid -from paddle.fluid.dygraph import Layer +from paddle.fluid.dygraph import Layer, Sequential from paddle.fluid.dygraph import Conv2D, Pool2D, BatchNorm from paddle.fluid.param_attr import ParamAttr from paddle.fluid.initializer import Constant from ppdet.core.workspace import register, serializable +from paddle.fluid.regularizer import L2Decay +from .name_adapter import NameAdapter +from numbers import Integral -class ConvBNLayer(Layer): +class ConvNormLayer(Layer): def __init__(self, - name_scope, ch_in, ch_out, filter_size, stride, - padding, - act='relu', - lr=1.0): - super(ConvBNLayer, self).__init__() - - self.conv = Conv2D( - num_channels=ch_in, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=1, - act=act, - param_attr=ParamAttr( - name=name_scope + "_weights", learning_rate=lr), - bias_attr=ParamAttr(name=name_scope + "_bias")) - if name_scope == "conv1": - bn_name = "bn_" + name_scope - else: - bn_name = "bn" + name_scope[3:] - self.bn = BatchNorm( - num_channels=ch_out, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(name=bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs): - out = self.conv(inputs) - out = self.bn(out) - return out - - -class ConvAffineLayer(Layer): - def __init__(self, - name_scope, - ch_in, - ch_out, - filter_size, - stride, - padding, + name_adapter, + act=None, + norm_type='bn', + norm_decay=0., + freeze_norm=True, lr=1.0, - act='relu'): - super(ConvAffineLayer, self).__init__() + name=None): + super(ConvNormLayer, self).__init__() + assert norm_type in ['bn', 'affine_channel'] + self.norm_type = norm_type + self.act = act self.conv = Conv2D( num_channels=ch_in, num_filters=ch_out, filter_size=filter_size, stride=stride, - padding=padding, + padding=(filter_size - 1) // 2, + groups=1, act=None, param_attr=ParamAttr( - name=name_scope + "_weights", learning_rate=lr), + learning_rate=lr, name=name + "_weights"), bias_attr=False) - if name_scope == "conv1": - bn_name = "bn_" + name_scope - else: - bn_name = "bn" + name_scope[3:] - self.scale = fluid.layers.create_parameter( - shape=[ch_out], - dtype='float32', - attr=ParamAttr( - name=bn_name + '_scale', learning_rate=0.), - default_initializer=Constant(1.)) - - self.offset = fluid.layers.create_parameter( - shape=[ch_out], - dtype='float32', - attr=ParamAttr( - name=bn_name + '_offset', learning_rate=0.), - default_initializer=Constant(0.)) - self.act = act + bn_name = name_adapter.fix_conv_norm_name(name) + norm_lr = 0. if freeze_norm else lr + param_attr = ParamAttr( + learning_rate=norm_lr, + regularizer=L2Decay(norm_decay), + name=bn_name + "_scale", + trainable=False if freeze_norm else True) + bias_attr = ParamAttr( + learning_rate=norm_lr, + regularizer=L2Decay(norm_decay), + name=bn_name + "_offset", + trainable=False if freeze_norm else True) + + if norm_type in ['bn', 'sync_bn']: + global_stats = True if freeze_norm else False + self.norm = BatchNorm( + num_channels=ch_out, + act=act, + param_attr=param_attr, + bias_attr=bias_attr, + use_global_stats=global_stats, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + norm_params = self.norm.parameters() + elif norm_type == 'affine_channel': + self.scale = fluid.layers.create_parameter( + shape=[ch_out], + dtype='float32', + attr=param_attr, + default_initializer=Constant(1.)) + + self.offset = fluid.layers.create_parameter( + shape=[ch_out], + dtype='float32', + attr=bias_attr, + default_initializer=Constant(0.)) + norm_params = [self.scale, self.offset] + + if freeze_norm: + for param in norm_params: + param.stop_gradient = True def forward(self, inputs): out = self.conv(inputs) - out = fluid.layers.affine_channel( - out, scale=self.scale, bias=self.offset) - if self.act == 'relu': - out = fluid.layers.relu(out) + if self.norm_type == 'bn': + out = self.norm(out) + elif self.norm_type == 'affine_channel': + out = fluid.layers.affine_channel( + out, scale=self.scale, bias=self.offset, act=self.act) return out class BottleNeck(Layer): def __init__(self, - name_scope, ch_in, ch_out, stride, - shortcut=True, + shortcut, + name_adapter, + name, + variant='b', lr=1.0, - norm_type='bn'): + norm_type='bn', + norm_decay=0., + freeze_norm=True): super(BottleNeck, self).__init__() - self.name_scope = name_scope - if norm_type == 'bn': - atom_block = ConvBNLayer - elif norm_type == 'affine': - atom_block = ConvAffineLayer + if variant == 'a': + stride1, stride2 = stride, 1 else: - atom_block = None - assert atom_block != None, 'NormType only support BatchNorm and Affine!' + stride1, stride2 = 1, stride + + conv_name1, conv_name2, conv_name3, \ + shortcut_name = name_adapter.fix_bottleneck_name(name) self.shortcut = shortcut if not shortcut: - self.branch1 = atom_block( - name_scope + "_branch1", + self.short = ConvNormLayer( ch_in=ch_in, ch_out=ch_out * 4, filter_size=1, stride=stride, - padding=0, - act=None, - lr=lr) - - self.branch2a = atom_block( - name_scope + "_branch2a", + name_adapter=name_adapter, + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, + lr=lr, + name=shortcut_name) + + self.branch2a = ConvNormLayer( ch_in=ch_in, ch_out=ch_out, filter_size=1, - stride=stride, - padding=0, - lr=lr) + stride=stride1, + name_adapter=name_adapter, + act='relu', + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, + lr=lr, + name=conv_name1) - self.branch2b = atom_block( - name_scope + "_branch2b", + self.branch2b = ConvNormLayer( ch_in=ch_out, ch_out=ch_out, filter_size=3, - stride=1, - padding=1, - lr=lr) + stride=stride2, + name_adapter=name_adapter, + act='relu', + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, + lr=lr, + name=conv_name2) - self.branch2c = atom_block( - name_scope + "_branch2c", + self.branch2c = ConvNormLayer( ch_in=ch_out, ch_out=ch_out * 4, filter_size=1, stride=1, - padding=0, + name_adapter=name_adapter, + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, lr=lr, - act=None) + name=conv_name3) def forward(self, inputs): if self.shortcut: short = inputs else: - short = self.branch1(inputs) + short = self.short(inputs) out = self.branch2a(inputs) out = self.branch2b(out) out = self.branch2c(out) - out = fluid.layers.elementwise_add( - x=short, y=out, act='relu', name=self.name_scope + ".add.output.5") + out = fluid.layers.elementwise_add(x=short, y=out, act='relu') return out class Blocks(Layer): def __init__(self, - name_scope, ch_in, ch_out, count, - stride, + name_adapter, + stage_num, lr=1.0, - norm_type='bn'): + norm_type='bn', + norm_decay=0., + freeze_norm=True): super(Blocks, self).__init__() self.blocks = [] for i in range(count): - if i == 0: - name = name_scope + "a" - self.stride = stride - self.shortcut = False - else: - name = name_scope + chr(ord("a") + i) - self.stride = 1 - self.shortcut = True + conv_name = name_adapter.fix_layer_warp_name(stage_num, count, i) block = self.add_sublayer( - name, + conv_name, BottleNeck( - name, ch_in=ch_in if i == 0 else ch_out * 4, ch_out=ch_out, - stride=self.stride, - shortcut=self.shortcut, + stride=2 if i == 0 and stage_num != 2 else 1, + shortcut=False if i == 0 else True, + name_adapter=name_adapter, + name=conv_name, + variant=name_adapter.variant, lr=lr, - norm_type=norm_type)) + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm)) self.blocks.append(block) - shortcut = True def forward(self, inputs): - res_out = self.blocks[0](inputs) - for block in self.blocks[1:]: - res_out = block(res_out) - return res_out + block_out = inputs + for block in self.blocks: + block_out = block(block_out) + return block_out -ResNet_cfg = {'50': [3, 4, 6, 3], '101': [3, 4, 23, 3], '152': [3, 8, 36, 3]} +ResNet_cfg = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]} @register @serializable class ResNet(Layer): - def __init__(self, depth=50, norm_type='bn', freeze_at='res2'): + def __init__(self, + depth=50, + variant='b', + lr_mult=1., + norm_type='bn', + norm_decay=0, + freeze_norm=True, + freeze_at=0, + return_idx=[0, 1, 2, 3], + num_stages=4): super(ResNet, self).__init__() self.depth = depth + self.variant = variant self.norm_type = norm_type + self.norm_decay = norm_decay + self.freeze_norm = freeze_norm self.freeze_at = freeze_at - - block_nums = ResNet_cfg[str(self.depth)] - if self.norm_type == 'bn': - atom_block = ConvBNLayer - elif self.norm_type == 'affine': - atom_block = ConvAffineLayer + if isinstance(return_idx, Integral): + return_idx = [return_idx] + assert max(return_idx) < num_stages, \ + 'the maximum return index must smaller than num_stages, ' \ + 'but received maximum return index is {} and num_stages ' \ + 'is {}'.format(max(return_idx), num_stages) + self.return_idx = return_idx + self.num_stages = num_stages + + block_nums = ResNet_cfg[depth] + na = NameAdapter(self) + + conv1_name = na.fix_c1_stage_name() + if variant in ['c', 'd']: + conv_def = [ + [3, 32, 3, 2, "conv1_1"], + [32, 32, 3, 1, "conv1_2"], + [32, 64, 3, 1, "conv1_3"], + ] else: - atom_block = None - assert atom_block != None, 'NormType only support BatchNorm and Affine!' - - self.conv1 = atom_block( - 'conv1', ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3) + conv_def = [[3, 64, 7, 2, conv1_name]] + self.conv1 = Sequential() + for (c_in, c_out, k, s, _name) in conv_def: + self.conv1.add_sublayer( + _name, + ConvNormLayer( + ch_in=c_in, + ch_out=c_out, + filter_size=k, + stride=s, + name_adapter=na, + act='relu', + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, + lr=lr_mult, + name=_name)) self.pool = Pool2D( pool_type='max', pool_size=3, pool_stride=2, pool_padding=1) - self.stage2 = Blocks( - "res2", - ch_in=64, - ch_out=64, - count=block_nums[0], - stride=1, - norm_type=norm_type) - - self.stage3 = Blocks( - "res3", - ch_in=256, - ch_out=128, - count=block_nums[1], - stride=2, - norm_type=norm_type) - - self.stage4 = Blocks( - "res4", - ch_in=512, - ch_out=256, - count=block_nums[2], - stride=2, - norm_type=norm_type) + ch_in_list = [64, 256, 512, 1024] + ch_out_list = [64, 128, 256, 512] + + self.res_layers = [] + for i in range(num_stages): + stage_num = i + 2 + res_name = "res{}".format(stage_num) + res_layer = self.add_sublayer( + res_name, + Blocks( + ch_in_list[i], + ch_out_list[i], + count=block_nums[i], + name_adapter=na, + stage_num=stage_num, + lr=lr_mult, + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm)) + self.res_layers.append(res_layer) def forward(self, inputs): x = inputs['image'] - conv1 = self.conv1(x) - - pool1 = self.pool(conv1) - - res2 = self.stage2(pool1) - - res3 = self.stage3(res2) - - res4 = self.stage4(res3) - - outs = { - 'res2': res2, - 'res3': res3, - 'res4': res4, - 'res_norm_type': self.norm_type - } - outs[self.freeze_at].stop_gradient = True + x = self.pool(conv1) + outs = [] + for idx, stage in enumerate(self.res_layers): + x = stage(x) + if idx == self.freeze_at: + x.stop_gradient = True + if idx in self.return_idx: + outs.append(x) return outs diff --git a/ppdet/modeling/bbox.py b/ppdet/modeling/bbox.py index b91f6adec72089779bde98c93ad78b1d3702508a..235f4dc55eb54bf42297e8f0768307c98419eed6 100644 --- a/ppdet/modeling/bbox.py +++ b/ppdet/modeling/bbox.py @@ -5,51 +5,65 @@ from ppdet.core.workspace import register @register class BBoxPostProcess(object): - __shared__ = ['num_classes', 'num_stages'] + __shared__ = ['num_classes'] __inject__ = ['decode_clip_nms'] def __init__(self, decode_clip_nms, num_classes=81, - num_stages=1, + cls_agnostic=False, decode=None, clip=None, - nms=None): + nms=None, + score_stage=[0, 1, 2], + delta_stage=[2]): super(BBoxPostProcess, self).__init__() self.num_classes = num_classes - self.num_stages = num_stages self.decode = decode self.clip = clip self.nms = nms self.decode_clip_nms = decode_clip_nms + self.score_stage = score_stage + self.delta_stage = delta_stage + self.out_dim = 2 if cls_agnostic else num_classes + self.cls_agnostic = cls_agnostic - def __call__(self, inputs): + def __call__(self, inputs, bboxheads, rois): # TODO: split into 3 steps # TODO: modify related ops for deploying # decode # clip # nms - if self.num_stages > 0: - bbox_prob_list = [] - for i in range(self.num_stages): - bbox_prob_list.append(inputs['bbox_head_' + str(i)][ - 'bbox_prob']) - bbox_prob = fluid.layers.sum(bbox_prob_list) / float( - len(bbox_prob_list)) - bbox_delta = inputs['bbox_head_' + str(i)]['bbox_delta'] - if inputs['bbox_head_0']['cls_agnostic_bbox_reg'] == 2: - bbox_delta = fluid.layers.slice( - bbox_delta, axes=1, starts=[1], ends=[2]) - bbox_delta = fluid.layers.expand(bbox_delta, - [1, self.num_classes, 1]) + if isinstance(rois, tuple): + proposal, proposal_num = rois + score, delta = bboxheads[0] + bbox_prob = fluid.layers.softmax(score) + delta = fluid.layers.reshape(delta, (-1, self.out_dim, 4)) else: - bbox_prob = inputs['bbox_prob'] - bbox_delta = inputs['bbox_delta'] - - outs = self.decode_clip_nms(inputs['rpn_rois'], bbox_prob, bbox_delta, - inputs['im_info']) - outs = {"predicted_bbox_nums": outs[0], "predicted_bbox": outs[1]} - return outs + num_stage = len(rois) + proposal_list = [] + prob_list = [] + delta_list = [] + for stage, (proposals, bboxhead) in zip(rois, bboxheads): + score, delta = bboxhead + proposal, proposal_num = proposals + if stage in self.score_stage: + bbox_prob = fluid.layers.softmax(score) + prob_list.append(bbox_prob) + if stage in self.delta_stage: + proposal_list.append(proposal) + delta_list.append(delta) + bbox_prob = fluid.layers.mean(prob_list) + delta = fluid.layers.mean(delta_list) + proposal = fluid.layers.mean(proposal_list) + delta = fluid.layers.reshape(delta, (-1, self.out_dim, 4)) + if self.cls_agnostic: + delta = delta[:, 1:2, :] + delta = fluid.layers.expand(delta, [1, self.num_classes, 1]) + bboxes = (proposal, proposal_num) + bboxes, bbox_nums = self.decode_clip_nms(bboxes, bbox_prob, delta, + inputs['im_info']) + return bboxes, bbox_nums @register @@ -97,36 +111,51 @@ class AnchorRPN(object): self.anchor_generator = anchor_generator self.anchor_target_generator = anchor_target_generator - def __call__(self, inputs): - outs = self.generate_anchors(inputs) - return outs - - def generate_anchors(self, inputs): - # TODO: update here to use int to specify featmap size - outs = self.anchor_generator(inputs['rpn_feat']) - outs = {'anchor': outs[0], 'anchor_var': outs[1], 'anchor_module': self} - return outs - - def generate_anchors_target(self, inputs): - rpn_rois_score = fluid.layers.transpose( - inputs['rpn_rois_score'], perm=[0, 2, 3, 1]) - rpn_rois_delta = fluid.layers.transpose( - inputs['rpn_rois_delta'], perm=[0, 2, 3, 1]) - rpn_rois_score = fluid.layers.reshape( - x=rpn_rois_score, shape=(0, -1, 1)) - rpn_rois_delta = fluid.layers.reshape( - x=rpn_rois_delta, shape=(0, -1, 4)) - - anchor = fluid.layers.reshape(inputs['anchor'], shape=(-1, 4)) + def __call__(self, rpn_feats): + anchors = [] + num_level = len(rpn_feats) + for i, rpn_feat in enumerate(rpn_feats): + anchor, var = self.anchor_generator(rpn_feat, i) + anchors.append((anchor, var)) + return anchors + + def _get_target_input(self, rpn_feats, anchors): + rpn_score_list = [] + rpn_delta_list = [] + anchor_list = [] + for (rpn_score, rpn_delta), (anchor, var) in zip(rpn_feats, anchors): + rpn_score = fluid.layers.transpose(rpn_score, perm=[0, 2, 3, 1]) + rpn_delta = fluid.layers.transpose(rpn_delta, perm=[0, 2, 3, 1]) + rpn_score = fluid.layers.reshape(x=rpn_score, shape=(0, -1, 1)) + rpn_delta = fluid.layers.reshape(x=rpn_delta, shape=(0, -1, 4)) + + anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) + var = fluid.layers.reshape(var, shape=(-1, 4)) + + rpn_score_list.append(rpn_score) + rpn_delta_list.append(rpn_delta) + anchor_list.append(anchor) + + rpn_scores = fluid.layers.concat(rpn_score_list, axis=1) + rpn_deltas = fluid.layers.concat(rpn_delta_list, axis=1) + anchors = fluid.layers.concat(anchor_list) + return rpn_scores, rpn_deltas, anchors + + def generate_loss_inputs(self, inputs, rpn_head_out, anchors): + assert len(rpn_head_out) == len( + anchors + ), "rpn_head_out and anchors should have same length, but received rpn_head_out' length is {} and anchors' length is {}".format( + len(rpn_head_out), len(anchors)) + rpn_score, rpn_delta, anchors = self._get_target_input(rpn_head_out, + anchors) score_pred, roi_pred, score_tgt, roi_tgt, roi_weight = self.anchor_target_generator( - bbox_pred=rpn_rois_delta, - cls_logits=rpn_rois_score, - anchor_box=anchor, + bbox_pred=rpn_delta, + cls_logits=rpn_score, + anchor_box=anchors, gt_boxes=inputs['gt_bbox'], is_crowd=inputs['is_crowd'], - im_info=inputs['im_info'], - open_debug=inputs['open_debug']) + im_info=inputs['im_info']) outs = { 'rpn_score_pred': score_pred, 'rpn_score_target': score_tgt, @@ -180,86 +209,107 @@ class Proposal(object): self.proposal_target_generator = proposal_target_generator self.bbox_post_process = bbox_post_process - def __call__(self, inputs): - outs = {} - if inputs['stage'] == 0: - proposal_out = self.generate_proposal(inputs) - inputs.update(proposal_out) - if inputs['mode'] == 'train': - proposal_target_out = self.generate_proposal_target(inputs) - outs.update(proposal_target_out) - return outs - - def generate_proposal(self, inputs): - rpn_rois_prob = fluid.layers.sigmoid( - inputs['rpn_rois_score'], name='rpn_rois_prob') - outs = self.proposal_generator( - scores=rpn_rois_prob, - bbox_deltas=inputs['rpn_rois_delta'], - anchors=inputs['anchor'], - variances=inputs['anchor_var'], - im_info=inputs['im_info'], - mode=inputs['mode']) - outs = { - 'rpn_rois': outs[0], - 'rpn_rois_probs': outs[1], - 'rpn_rois_nums': outs[2] - } - return outs - - def generate_proposal_target(self, inputs): - if inputs['stage'] == 0: - rois = inputs['rpn_rois'] - rois_num = inputs['rpn_rois_nums'] - elif inputs['stage'] > 0: - last_proposal_out = inputs['proposal_' + str(inputs['stage'] - 1)] - rois = last_proposal_out['refined_bbox'] - rois_num = last_proposal_out['rois_nums'] - + def generate_proposal(self, inputs, rpn_head_out, anchor_out): + rpn_rois_list = [] + rpn_prob_list = [] + rpn_rois_num_list = [] + for (rpn_score, rpn_delta), (anchor, var) in zip(rpn_head_out, + anchor_out): + rpn_prob = fluid.layers.sigmoid(rpn_score) + rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n = self.proposal_generator( + scores=rpn_prob, + bbox_deltas=rpn_delta, + anchors=anchor, + variances=var, + im_info=inputs['im_info'], + mode=inputs['mode']) + if len(rpn_head_out) == 1: + return rpn_rois, rpn_rois_num + rpn_rois_list.append(rpn_rois) + rpn_prob_list.append(rpn_rois_prob) + rpn_rois_num_list.append(rpn_rois_num) + + start_level = 2 + end_level = start_level + len(rpn_head_out) + rois_collect, rois_num_collect = fluid.layers.collect_fpn_proposals( + rpn_rois_list, + rpn_prob_list, + start_level, + end_level, + post_nms_top_n, + rois_num_per_level=rpn_rois_num_list) + return rois_collect, rois_num_collect + + def generate_proposal_target(self, inputs, rois, rois_num, stage=0): outs = self.proposal_target_generator( rpn_rois=rois, - rpn_rois_nums=rois_num, + rpn_rois_num=rois_num, gt_classes=inputs['gt_class'], is_crowd=inputs['is_crowd'], gt_boxes=inputs['gt_bbox'], im_info=inputs['im_info'], - stage=inputs['stage'], - open_debug=inputs['open_debug']) - outs = { - 'rois': outs[0], + stage=stage) + rois = outs[0] + rois_num = outs[-1] + targets = { 'labels_int32': outs[1], 'bbox_targets': outs[2], 'bbox_inside_weights': outs[3], - 'bbox_outside_weights': outs[4], - 'rois_nums': outs[5] + 'bbox_outside_weights': outs[4] } - return outs - - def refine_bbox(self, inputs): - if inputs['mode'] == 'train': - rois = inputs['proposal_' + str(inputs['stage'])]['rois'] - else: - rois = inputs['rpn_rois'] - bbox_head_out = inputs['bbox_head_' + str(inputs['stage'])] + return rois, rois_num, targets - bbox_delta_r = fluid.layers.reshape( - bbox_head_out['bbox_delta'], - (-1, inputs['bbox_head_0']['cls_agnostic_bbox_reg'], 4)) + def refine_bbox(self, rois, bbox_delta, stage=0): + out_dim = bbox_delta.shape[1] / 4 + bbox_delta_r = fluid.layers.reshape(bbox_delta, (-1, out_dim, 4)) bbox_delta_s = fluid.layers.slice( bbox_delta_r, axes=[1], starts=[1], ends=[2]) refined_bbox = fluid.layers.box_coder( prior_box=rois, prior_box_var=self.proposal_target_generator.bbox_reg_weights[ - inputs['stage']], + stage], target_box=bbox_delta_s, code_type='decode_center_size', box_normalized=False, axis=1) refined_bbox = fluid.layers.reshape(refined_bbox, shape=[-1, 4]) - outs = {'refined_bbox': refined_bbox} - return outs + return refined_bbox + + def __call__(self, + inputs, + rpn_head_out, + anchor_out, + stage=0, + proposal_out=None, + bbox_head_outs=None, + refined=False): + if refined: + assert proposal_out is not None, "If proposal has been refined, proposal_out should not be None." + return proposal_out + if stage == 0: + roi, rois_num = self.generate_proposal(inputs, rpn_head_out, + anchor_out) + self.proposals_list = [] + self.targets_list = [] - def post_process(self, inputs): - outs = self.bbox_post_process(inputs) - return outs + else: + bbox_delta = bbox_head_outs[stage][0] + roi = self.refine_bbox(proposal_out[0], bbox_delta, stage - 1) + rois_num = proposal_out[1] + if inputs['mode'] == 'train': + roi, rois_num, targets = self.generate_proposal_target( + inputs, roi, rois_num, stage) + self.targets_list.append(targets) + self.proposals_list.append((roi, rois_num)) + return roi, rois_num + + def get_targets(self): + return self.targets_list + + def get_proposals(self): + return self.proposals_list + + def post_process(self, inputs, bbox_head_out, rois): + bboxes = self.bbox_post_process(inputs, bbox_head_out, rois) + return bboxes diff --git a/ppdet/modeling/head/bbox_head.py b/ppdet/modeling/head/bbox_head.py index d1fe38937378d6dd645e165358516bd8ae2af0a9..af768a2e6a90729cdb36db6afb010ff159b60772 100644 --- a/ppdet/modeling/head/bbox_head.py +++ b/ppdet/modeling/head/bbox_head.py @@ -1,155 +1,171 @@ import paddle.fluid as fluid from paddle.fluid.dygraph import Layer from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, MSRA +from paddle.fluid.initializer import Normal, Xavier from paddle.fluid.regularizer import L2Decay -from paddle.fluid.dygraph.nn import Conv2D, Pool2D +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear from ppdet.core.workspace import register -# TODO: del import and use inject -from ..backbone.resnet import Blocks @register -class BBoxFeat(Layer): - __inject__ = ['roi_extractor'] +class TwoFCHead(Layer): + __shared__ = ['num_stages'] - def __init__(self, roi_extractor, feat_in=1024, feat_out=512, num_stages=1): + def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, num_stages=1): + super(TwoFCHead, self).__init__() + self.in_dim = in_dim + self.mlp_dim = mlp_dim + self.num_stages = num_stages + fan = in_dim * resolution * resolution + self.fc6_list = [] + self.fc7_list = [] + for stage in range(num_stages): + fc6_name = 'fc6_{}'.format(stage) + fc7_name = 'fc7_{}'.format(stage) + fc6 = self.add_sublayer( + fc6_name, + Linear( + in_dim * resolution * resolution, + mlp_dim, + act='relu', + param_attr=ParamAttr( + #name='fc6_w', + initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr( + #name='fc6_b', + learning_rate=2., + regularizer=L2Decay(0.)))) + fc7 = self.add_sublayer( + fc7_name, + Linear( + mlp_dim, + mlp_dim, + act='relu', + param_attr=ParamAttr( + #name='fc7_w', + initializer=Xavier()), + bias_attr=ParamAttr( + #name='fc7_b', + learning_rate=2., + regularizer=L2Decay(0.)))) + self.fc6_list.append(fc6) + self.fc7_list.append(fc7) + + def forward(self, rois_feat, stage=0): + rois_feat = fluid.layers.flatten(rois_feat) + fc6 = self.fc6_list[stage](rois_feat) + fc7 = self.fc7_list[stage](fc6) + return fc7 + + +@register +class BBoxFeat(Layer): + __inject__ = ['roi_extractor', 'head_feat'] + + def __init__(self, roi_extractor, head_feat): super(BBoxFeat, self).__init__() self.roi_extractor = roi_extractor - self.num_stages = num_stages - self.res5s = [] - for i in range(self.num_stages): - if i == 0: - postfix = '' - else: - postfix = '_' + str(i) - # TODO: set norm type - res5 = Blocks( - "res5" + postfix, - ch_in=feat_in, - ch_out=feat_out, - count=3, - stride=2) - self.res5s.append(res5) - self.res5_pool = fluid.dygraph.Pool2D( - pool_type='avg', global_pooling=True) - - def forward(self, inputs): - - if inputs['mode'] == 'train': - in_rois = inputs['proposal_' + str(inputs['stage'])] - rois = in_rois['rois'] - rois_num = in_rois['rois_nums'] - elif inputs['mode'] == 'infer': - rois = inputs['rpn_rois'] - rois_num = inputs['rpn_rois_nums'] - else: - raise "BBoxFeat only support train or infer mode!" - - rois_feat = self.roi_extractor(inputs['res4'], rois, rois_num) - # TODO: add others - y_res5 = self.res5s[inputs['stage']](rois_feat) - y = self.res5_pool(y_res5) - y = fluid.layers.squeeze(y, axes=[2, 3]) - outs = { - 'rois_feat': rois_feat, - 'res5': y_res5, - "bbox_feat": y, - 'shared_res5_block': self.res5s[inputs['stage']], - 'shared_roi_extractor': self.roi_extractor - } - return outs + self.head_feat = head_feat + + def forward(self, body_feats, rois, spatial_scale, stage=0): + rois_feat = self.roi_extractor(body_feats, rois, spatial_scale) + bbox_feat = self.head_feat(rois_feat, stage) + return bbox_feat @register class BBoxHead(Layer): - __inject__ = ['bbox_feat'] __shared__ = ['num_classes', 'num_stages'] + __inject__ = ['bbox_feat'] def __init__(self, bbox_feat, - feat_in=2048, + in_feat=1024, num_classes=81, - cls_agnostic_bbox_reg=81, - num_stages=1): + cls_agnostic=False, + num_stages=1, + with_pool=False): super(BBoxHead, self).__init__() - self.bbox_feat = bbox_feat self.num_classes = num_classes - self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg + self.delta_dim = 2 if cls_agnostic else num_classes + self.bbox_feat = bbox_feat self.num_stages = num_stages - - self.bbox_scores = [] - self.bbox_deltas = [] - for i in range(self.num_stages): - if i == 0: - postfix = '' - else: - postfix = '_' + str(i) - bbox_score = fluid.dygraph.Linear( - input_dim=feat_in, - output_dim=1 * self.num_classes, - act=None, - param_attr=ParamAttr( - name='cls_score_w' + postfix, - initializer=Normal( - loc=0.0, scale=0.001)), - bias_attr=ParamAttr( - name='cls_score_b' + postfix, - learning_rate=2., - regularizer=L2Decay(0.))) - - bbox_delta = fluid.dygraph.Linear( - input_dim=feat_in, - output_dim=4 * self.cls_agnostic_bbox_reg, - act=None, - param_attr=ParamAttr( - name='bbox_pred_w' + postfix, - initializer=Normal( - loc=0.0, scale=0.01)), - bias_attr=ParamAttr( - name='bbox_pred_b' + postfix, - learning_rate=2., - regularizer=L2Decay(0.))) - self.bbox_scores.append(bbox_score) - self.bbox_deltas.append(bbox_delta) - - def forward(self, inputs): - outs = self.bbox_feat(inputs) - x = outs['bbox_feat'] - bs = self.bbox_scores[inputs['stage']](x) - bd = self.bbox_deltas[inputs['stage']](x) - outs.update({'bbox_score': bs, 'bbox_delta': bd}) - if inputs['stage'] == 0: - outs.update({"cls_agnostic_bbox_reg": self.cls_agnostic_bbox_reg}) - if inputs['mode'] == 'infer': - bbox_prob = fluid.layers.softmax(bs, use_cudnn=False) - outs['bbox_prob'] = bbox_prob - return outs - - def loss(self, inputs): - bbox_out = inputs['bbox_head_' + str(inputs['stage'])] - bbox_target = inputs['proposal_' + str(inputs['stage'])] - + self.bbox_score_list = [] + self.bbox_delta_list = [] + self.with_pool = with_pool + for stage in range(num_stages): + score_name = 'bbox_score_{}'.format(stage) + delta_name = 'bbox_delta_{}'.format(stage) + bbox_score = self.add_sublayer( + score_name, + fluid.dygraph.Linear( + input_dim=in_feat, + output_dim=1 * self.num_classes, + act=None, + param_attr=ParamAttr( + #name='cls_score_w', + initializer=Normal( + loc=0.0, scale=0.01)), + bias_attr=ParamAttr( + #name='cls_score_b', + learning_rate=2., + regularizer=L2Decay(0.)))) + + bbox_delta = self.add_sublayer( + delta_name, + fluid.dygraph.Linear( + input_dim=in_feat, + output_dim=4 * self.delta_dim, + act=None, + param_attr=ParamAttr( + #name='bbox_pred_w', + initializer=Normal( + loc=0.0, scale=0.001)), + bias_attr=ParamAttr( + #name='bbox_pred_b', + learning_rate=2., + regularizer=L2Decay(0.)))) + self.bbox_score_list.append(bbox_score) + self.bbox_delta_list.append(bbox_delta) + + def forward(self, body_feats, rois, spatial_scale, stage=0): + bbox_feat = self.bbox_feat(body_feats, rois, spatial_scale, stage) + if self.with_pool: + bbox_feat = fluid.layers.pool2d( + bbox_feat, pool_type='avg', global_pooling=True) + bbox_head_out = [] + scores = self.bbox_score_list[stage](bbox_feat) + deltas = self.bbox_delta_list[stage](bbox_feat) + bbox_head_out.append((scores, deltas)) + return bbox_feat, bbox_head_out + + def _get_head_loss(self, score, delta, target): # bbox cls labels_int64 = fluid.layers.cast( - x=bbox_target['labels_int32'], dtype='int64') + x=target['labels_int32'], dtype='int64') labels_int64.stop_gradient = True - bbox_score = fluid.layers.reshape(bbox_out['bbox_score'], - (-1, self.num_classes)) loss_bbox_cls = fluid.layers.softmax_with_cross_entropy( - logits=bbox_score, label=labels_int64) - loss_bbox_cls = fluid.layers.reduce_mean( - loss_bbox_cls, name='loss_bbox_cls_' + str(inputs['stage'])) - + logits=score, label=labels_int64) + loss_bbox_cls = fluid.layers.reduce_mean(loss_bbox_cls) # bbox reg loss_bbox_reg = fluid.layers.smooth_l1( - x=bbox_out['bbox_delta'], - y=bbox_target['bbox_targets'], - inside_weight=bbox_target['bbox_inside_weights'], - outside_weight=bbox_target['bbox_outside_weights'], + x=delta, + y=target['bbox_targets'], + inside_weight=target['bbox_inside_weights'], + outside_weight=target['bbox_outside_weights'], sigma=1.0) - loss_bbox_reg = fluid.layers.reduce_mean( - loss_bbox_reg, name='loss_bbox_loc_' + str(inputs['stage'])) - + loss_bbox_reg = fluid.layers.reduce_mean(loss_bbox_reg) return loss_bbox_cls, loss_bbox_reg + + def loss(self, bbox_head_out, targets): + loss_bbox = {} + for lvl, (bboxhead, target) in enumerate(zip(bbox_head_out, targets)): + score, delta = bboxhead + cls_name = 'loss_bbox_cls_{}'.format(lvl) + reg_name = 'loss_bbox_reg_{}'.format(lvl) + loss_bbox_cls, loss_bbox_reg = self._get_head_loss(score, delta, + target) + loss_bbox[cls_name] = loss_bbox_cls + loss_bbox[reg_name] = loss_bbox_reg + return loss_bbox diff --git a/ppdet/modeling/head/mask_head.py b/ppdet/modeling/head/mask_head.py index c65020f05b22898729dc0985857e46b09acb7715..3ab92daa4096761987db9b2b52965eb3da72fa67 100644 --- a/ppdet/modeling/head/mask_head.py +++ b/ppdet/modeling/head/mask_head.py @@ -1,127 +1,190 @@ import paddle.fluid as fluid -from paddle.fluid.dygraph import Layer +from paddle.fluid.dygraph import Layer, Sequential from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, MSRA +from paddle.fluid.initializer import MSRA from paddle.fluid.regularizer import L2Decay -from paddle.fluid.dygraph.nn import Conv2D, Pool2D +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Conv2DTranspose from ppdet.core.workspace import register -# TODO: del it and use inject -from ..backbone.resnet import Blocks @register class MaskFeat(Layer): - def __init__(self, feat_in=2048, feat_out=256, mask_stages=1): + __inject__ = ['mask_roi_extractor'] + + def __init__(self, + mask_roi_extractor, + num_convs=1, + feat_in=2048, + feat_out=256, + mask_num_stages=1, + share_bbox_feat=False): super(MaskFeat, self).__init__() + self.num_convs = num_convs self.feat_in = feat_in self.feat_out = feat_out - self.mask_stages = mask_stages - - for i in range(self.mask_stages): - if i == 0: - postfix = '' - else: - postfix = '_' + str(i) - self.upsample = fluid.dygraph.Conv2DTranspose( - num_channels=self.feat_in, - num_filters=self.feat_out, - filter_size=2, - stride=2, - act='relu', - param_attr=ParamAttr( - name='conv5_mask_w' + postfix, - initializer=MSRA(uniform=False)), - bias_attr=ParamAttr( - name='conv5_mask_b' + postfix, - learning_rate=2., - regularizer=L2Decay(0.))) - - def forward(self, inputs): - bbox_head_out = inputs['bbox_head_' + str(inputs['stage'])] - if inputs['mode'] == 'train': - x = bbox_head_out['res5'] - rois_feat = fluid.layers.gather(x, inputs['rois_has_mask_int32']) - elif inputs['mode'] == 'infer': - rois = inputs['predicted_bbox'][:, 2:] * inputs['im_info'][:, 2] - rois_num = inputs['predicted_bbox_nums'] - # TODO: optim here - shared_roi_ext = bbox_head_out['shared_roi_extractor'] - if callable(shared_roi_ext): - rois_feat = shared_roi_ext(inputs['res4'], rois, rois_num) - - shared_res5 = bbox_head_out['shared_res5_block'] - if callable(shared_res5): - rois_feat = shared_res5(rois_feat) - + self.mask_roi_extractor = mask_roi_extractor + self.mask_num_stages = mask_num_stages + self.share_bbox_feat = share_bbox_feat + self.upsample_module = [] + fan_conv = feat_out * 3 * 3 + fan_deconv = feat_out * 2 * 2 + for i in range(self.mask_num_stages): + name = 'stage_{}'.format(i) + mask_conv = Sequential() + for j in range(self.num_convs): + conv_name = 'mask_inter_feat_{}'.format(j + 1) + mask_conv.add_sublayer( + conv_name, + Conv2D( + num_channels=feat_in if j == 1 else feat_out, + num_filters=feat_out, + filter_size=3, + act='relu', + padding=1, + param_attr=ParamAttr( + #name=conv_name+'_w', + initializer=MSRA( + uniform=False, fan_in=fan_conv)), + bias_attr=ParamAttr( + #name=conv_name+'_b', + learning_rate=2., + regularizer=L2Decay(0.)))) + mask_conv.add_sublayer( + 'conv5_mask', + Conv2DTranspose( + num_channels=self.feat_in, + num_filters=self.feat_out, + filter_size=2, + stride=2, + act='relu', + param_attr=ParamAttr( + #name='conv5_mask_w', + initializer=MSRA( + uniform=False, fan_in=fan_deconv)), + bias_attr=ParamAttr( + #name='conv5_mask_b', + learning_rate=2., + regularizer=L2Decay(0.)))) + upsample = self.add_sublayer(name, mask_conv) + self.upsample_module.append(upsample) + + def forward(self, + body_feats, + bboxes, + bbox_feat, + mask_index, + spatial_scale, + stage=0): + if self.share_bbox_feat: + rois_feat = fluid.layers.gather(bbox_feat, mask_index) + else: + rois_feat = self.mask_roi_extractor(body_feats, bboxes, + spatial_scale) # upsample - y = self.upsample(rois_feat) - outs = {'mask_feat': y} - return outs + mask_feat = self.upsample_module[stage](rois_feat) + return mask_feat @register class MaskHead(Layer): - __shared__ = ['num_classes'] + __shared__ = ['num_classes', 'mask_num_stages'] __inject__ = ['mask_feat'] def __init__(self, mask_feat, - num_classes=81, feat_in=256, - resolution=14, - mask_stages=1): + num_classes=81, + mask_num_stages=1): super(MaskHead, self).__init__() self.mask_feat = mask_feat self.feat_in = feat_in - self.resolution = resolution self.num_classes = num_classes - self.mask_stages = mask_stages - - for i in range(self.mask_stages): - if i == 0: - postfix = '' - else: - postfix = '_' + str(i) - self.mask_fcn_logits = fluid.dygraph.Conv2D( - num_channels=self.feat_in, - num_filters=self.num_classes, - filter_size=1, - param_attr=ParamAttr( - name='mask_fcn_logits_w' + postfix, - initializer=MSRA(uniform=False)), - bias_attr=ParamAttr( - name='mask_fcn_logits_b' + postfix, - learning_rate=2., - regularizer=L2Decay(0.0))) - - def forward(self, inputs): - # feat - outs = self.mask_feat(inputs) - x = outs['mask_feat'] - # logits - mask_logits = self.mask_fcn_logits(x) - if inputs['mode'] == 'infer': - pred_bbox = inputs['predicted_bbox'] - shape = reduce((lambda x, y: x * y), pred_bbox.shape) - shape = np.asarray(shape).reshape((1, 1)) - ones = np.ones((1, 1), dtype=np.int32) - cond = (shape == ones).all() - if cond: - mask_logits = pred_bbox - - outs['mask_logits'] = mask_logits - - return outs - - def loss(self, inputs): - reshape_dim = self.num_classes * self.resolution * self.resolution - mask_logits = fluid.layers.reshape(inputs['mask_logits'], - (-1, reshape_dim)) - mask_label = fluid.layers.cast(x=inputs['mask_int32'], dtype='float32') + self.mask_num_stages = mask_num_stages + self.mask_fcn_logits = [] + for i in range(self.mask_num_stages): + name = 'mask_fcn_logits_{}'.format(i) + self.mask_fcn_logits.append( + self.add_sublayer( + name, + fluid.dygraph.Conv2D( + num_channels=self.feat_in, + num_filters=self.num_classes, + filter_size=1, + param_attr=ParamAttr( + #name='mask_fcn_logits_w', + initializer=MSRA( + uniform=False, fan_in=self.num_classes)), + bias_attr=ParamAttr( + #name='mask_fcn_logits_b', + learning_rate=2., + regularizer=L2Decay(0.0))))) + + def forward_train(self, + body_feats, + bboxes, + bbox_feat, + mask_index, + spatial_scale, + stage=0): + # feat + mask_feat = self.mask_feat(body_feats, bboxes, bbox_feat, mask_index, + spatial_scale, stage) + # logits + mask_head_out = self.mask_fcn_logits[stage](mask_feat) + return mask_head_out + + def forward_test(self, + im_info, + body_feats, + bboxes, + bbox_feat, + mask_index, + spatial_scale, + stage=0): + bbox, bbox_num = bboxes + if bbox.shape[0] == 0: + mask_head_out = bbox + else: + im_info_expand = [] + for idx, num in enumerate(bbox_num): + for n in range(num): + im_info_expand.append(im_info[idx, -1]) + im_info_expand = fluid.layers.concat(im_info_expand) + scaled_bbox = fluid.layers.elementwise_mul( + bbox[:, 2:], im_info_expand, axis=0) + scaled_bboxes = (scaled_bbox, bbox_num) + mask_feat = self.mask_feat(body_feats, scaled_bboxes, bbox_feat, + mask_index, spatial_scale, stage) + mask_logit = self.mask_fcn_logits[stage](mask_feat) + mask_head_out = fluid.layers.sigmoid(mask_logit) + return mask_head_out + + def forward(self, + inputs, + body_feats, + bboxes, + bbox_feat, + mask_index, + spatial_scale, + stage=0): + if inputs['mode'] == 'train': + mask_head_out = self.forward_train(body_feats, bboxes, bbox_feat, + mask_index, spatial_scale, stage) + else: + im_info = inputs['im_info'] + mask_head_out = self.forward_test(im_info, body_feats, bboxes, + bbox_feat, mask_index, + spatial_scale, stage) + return mask_head_out + + def loss(self, mask_head_out, mask_target): + mask_logits = fluid.layers.flatten(mask_head_out) + mask_label = fluid.layers.cast(x=mask_target, dtype='float32') + mask_label.stop_gradient = True loss_mask = fluid.layers.sigmoid_cross_entropy_with_logits( x=mask_logits, label=mask_label, ignore_index=-1, normalize=True) - loss_mask = fluid.layers.reduce_sum(loss_mask, name='loss_mask') + loss_mask = fluid.layers.reduce_sum(loss_mask) - return loss_mask + return {'loss_mask': loss_mask} diff --git a/ppdet/modeling/head/rpn_head.py b/ppdet/modeling/head/rpn_head.py index 219c08747d8d797fb4b0e00fe5a2e49ce5d9f093..0f83ffc9e927f2d7e39ab79f576c85c5ea444ca0 100644 --- a/ppdet/modeling/head/rpn_head.py +++ b/ppdet/modeling/head/rpn_head.py @@ -11,95 +11,105 @@ from ppdet.core.workspace import register class RPNFeat(Layer): def __init__(self, feat_in=1024, feat_out=1024): super(RPNFeat, self).__init__() + # rpn feat is shared with each level self.rpn_conv = Conv2D( - num_channels=1024, - num_filters=1024, + num_channels=feat_in, + num_filters=feat_out, filter_size=3, - stride=1, padding=1, act='relu', param_attr=ParamAttr( - name="conv_rpn_w", initializer=Normal( + #name="conv_rpn_fpn2_w", + initializer=Normal( loc=0., scale=0.01)), bias_attr=ParamAttr( - name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) + #name="conv_rpn_fpn2_b", + learning_rate=2., + regularizer=L2Decay(0.))) - def forward(self, inputs): - x = inputs.get('res4') - y = self.rpn_conv(x) - outs = {'rpn_feat': y} - return outs + def forward(self, inputs, feats): + rpn_feats = [] + for feat in feats: + rpn_feats.append(self.rpn_conv(feat)) + return rpn_feats @register class RPNHead(Layer): __inject__ = ['rpn_feat'] - def __init__(self, rpn_feat, anchor_per_position=15): + def __init__(self, rpn_feat, anchor_per_position=15, rpn_channel=1024): super(RPNHead, self).__init__() self.rpn_feat = rpn_feat - self.anchor_per_position = anchor_per_position - + if isinstance(rpn_feat, dict): + self.rpn_feat = RPNFeat(**rpn_feat) + # rpn head is shared with each level # rpn roi classification scores self.rpn_rois_score = Conv2D( - num_channels=1024, - num_filters=1 * self.anchor_per_position, + num_channels=rpn_channel, + num_filters=anchor_per_position, filter_size=1, - stride=1, padding=0, act=None, param_attr=ParamAttr( - name="rpn_cls_logits_w", initializer=Normal( + #name="rpn_cls_logits_fpn2_w", + initializer=Normal( loc=0., scale=0.01)), bias_attr=ParamAttr( - name="rpn_cls_logits_b", + #name="rpn_cls_logits_fpn2_b", learning_rate=2., regularizer=L2Decay(0.))) # rpn roi bbox regression deltas self.rpn_rois_delta = Conv2D( - num_channels=1024, - num_filters=4 * self.anchor_per_position, + num_channels=rpn_channel, + num_filters=4 * anchor_per_position, filter_size=1, - stride=1, padding=0, act=None, param_attr=ParamAttr( - name="rpn_bbox_pred_w", initializer=Normal( + #name="rpn_bbox_pred_fpn2_w", + initializer=Normal( loc=0., scale=0.01)), bias_attr=ParamAttr( - name="rpn_bbox_pred_b", + #name="rpn_bbox_pred_fpn2_b", learning_rate=2., regularizer=L2Decay(0.))) - def forward(self, inputs): - outs = self.rpn_feat(inputs) - x = outs['rpn_feat'] - rrs = self.rpn_rois_score(x) - rrd = self.rpn_rois_delta(x) - outs.update({'rpn_rois_score': rrs, 'rpn_rois_delta': rrd}) - return outs + def forward(self, inputs, feats): + rpn_feats = self.rpn_feat(inputs, feats) + rpn_head_out = [] + for rpn_feat in rpn_feats: + rrs = self.rpn_rois_score(rpn_feat) + rrd = self.rpn_rois_delta(rpn_feat) + rpn_head_out.append((rrs, rrd)) + return rpn_feats, rpn_head_out - def loss(self, inputs): - if callable(inputs['anchor_module']): - rpn_targets = inputs['anchor_module'].generate_anchors_target( - inputs) + def loss(self, loss_inputs): # cls loss score_tgt = fluid.layers.cast( - x=rpn_targets['rpn_score_target'], dtype='float32') - rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=rpn_targets['rpn_score_pred'], label=score_tgt) - rpn_cls_loss = fluid.layers.reduce_mean( - rpn_cls_loss, name='loss_rpn_cls') + x=loss_inputs['rpn_score_target'], dtype='float32') + score_tgt.stop_gradient = True + loss_rpn_cls = fluid.layers.sigmoid_cross_entropy_with_logits( + x=loss_inputs['rpn_score_pred'], label=score_tgt) + loss_rpn_cls = fluid.layers.reduce_mean( + loss_rpn_cls, name='loss_rpn_cls') # reg loss - rpn_reg_loss = fluid.layers.smooth_l1( - x=rpn_targets['rpn_rois_pred'], - y=rpn_targets['rpn_rois_target'], + loc_tgt = fluid.layers.cast( + x=loss_inputs['rpn_rois_target'], dtype='float32') + loc_tgt.stop_gradient = True + loss_rpn_reg = fluid.layers.smooth_l1( + x=loss_inputs['rpn_rois_pred'], + y=loc_tgt, sigma=3.0, - inside_weight=rpn_targets['rpn_rois_weight'], - outside_weight=rpn_targets['rpn_rois_weight']) - rpn_reg_loss = fluid.layers.reduce_mean( - rpn_reg_loss, name='loss_rpn_reg') + inside_weight=loss_inputs['rpn_rois_weight'], + outside_weight=loss_inputs['rpn_rois_weight']) + loss_rpn_reg = fluid.layers.reduce_sum(loss_rpn_reg) + score_shape = fluid.layers.shape(score_tgt) + score_shape = fluid.layers.cast(x=score_shape, dtype='float32') + norm = fluid.layers.reduce_prod(score_shape) + norm.stop_gradient = True + loss_rpn_reg = loss_rpn_reg / norm - return rpn_cls_loss, rpn_reg_loss + return {'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_reg': loss_rpn_reg} diff --git a/ppdet/modeling/mask.py b/ppdet/modeling/mask.py index b7122c5b597b5d2f862047f83d32060ac87d851c..e8dcf20e3d9798f235aaa643be75acc8523860d6 100644 --- a/ppdet/modeling/mask.py +++ b/ppdet/modeling/mask.py @@ -8,20 +8,22 @@ from ppdet.py_op.post_process import mask_post_process @register class MaskPostProcess(object): - __shared__ = ['num_classes'] + __shared__ = ['mask_resolution'] - def __init__(self, num_classes=81): + def __init__(self, mask_resolution=28, binary_thresh=0.5): super(MaskPostProcess, self).__init__() - self.num_classes = num_classes + self.mask_resolution = mask_resolution + self.binary_thresh = binary_thresh - def __call__(self, inputs): + def __call__(self, bboxes, mask_head_out, im_info): # TODO: modify related ops for deploying - outs = mask_post_process(inputs['predicted_bbox_nums'].numpy(), - inputs['predicted_bbox'].numpy(), - inputs['mask_logits'].numpy(), - inputs['im_info'].numpy()) - outs = {'predicted_mask': outs} - return outs + bboxes_np = (i.numpy() for i in bboxes) + mask = mask_post_process(bboxes_np, + mask_head_out.numpy(), + im_info.numpy(), self.mask_resolution, + self.binary_thresh) + mask = {'mask': mask} + return mask @register @@ -33,29 +35,28 @@ class Mask(object): self.mask_target_generator = mask_target_generator self.mask_post_process = mask_post_process - def __call__(self, inputs): - outs = {} - if inputs['mode'] == 'train': - outs = self.generate_mask_target(inputs) - return outs + def __call__(self, inputs, rois, targets): + mask_rois, rois_has_mask_int32 = self.generate_mask_target(inputs, rois, + targets) + return mask_rois, rois_has_mask_int32 - def generate_mask_target(self, inputs): - proposal_out = inputs['proposal_' + str(inputs['stage'])] - outs = self.mask_target_generator( + def generate_mask_target(self, inputs, rois, targets): + labels_int32 = targets['labels_int32'] + proposals, proposals_num = rois + mask_rois, mask_rois_num, self.rois_has_mask_int32, self.mask_int32 = self.mask_target_generator( im_info=inputs['im_info'], gt_classes=inputs['gt_class'], is_crowd=inputs['is_crowd'], gt_segms=inputs['gt_mask'], - rois=proposal_out['rois'], - rois_nums=proposal_out['rois_nums'], - labels_int32=proposal_out['labels_int32']) - outs = { - 'mask_rois': outs[0], - 'rois_has_mask_int32': outs[1], - 'mask_int32': outs[2] - } - return outs - - def post_process(self, inputs): - outs = self.mask_post_process(inputs) - return outs + rois=proposals, + rois_num=proposals_num, + labels_int32=labels_int32) + self.mask_rois = (mask_rois, mask_rois_num) + return self.mask_rois, self.rois_has_mask_int32 + + def get_targets(self): + return self.mask_int32 + + def post_process(self, bboxes, mask_head_out, im_info): + mask = self.mask_post_process(bboxes, mask_head_out, im_info) + return mask diff --git a/ppdet/modeling/neck/__init__.py b/ppdet/modeling/neck/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4991079b28f1d163474ebc3196598299f45d629c --- /dev/null +++ b/ppdet/modeling/neck/__init__.py @@ -0,0 +1,3 @@ +from . import fpn + +from .fpn import * diff --git a/ppdet/modeling/neck/fpn.py b/ppdet/modeling/neck/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..321335f457f9c958cb072a59fcf72dd87e0ed1df --- /dev/null +++ b/ppdet/modeling/neck/fpn.py @@ -0,0 +1,85 @@ +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.dygraph import Layer +from paddle.fluid.dygraph import Conv2D, Pool2D, BatchNorm +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Xavier +from paddle.fluid.regularizer import L2Decay +from ppdet.core.workspace import register, serializable + + +@register +@serializable +class FPN(Layer): + def __init__(self, + in_channels, + out_channel, + min_level=0, + max_level=4, + spatial_scale=[0.25, 0.125, 0.0625, 0.03125]): + + super(FPN, self).__init__() + self.lateral_convs = [] + self.fpn_convs = [] + fan = out_channel * 3 * 3 + + for i in range(min_level, max_level): + if i == 3: + lateral_name = 'fpn_inner_res5_sum' + else: + lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2) + in_c = in_channels[i] + lateral = self.add_sublayer( + lateral_name, + Conv2D( + num_channels=in_c, + num_filters=out_channel, + filter_size=1, + param_attr=ParamAttr( + #name=lateral_name+'_w', + initializer=Xavier(fan_out=in_c)), + bias_attr=ParamAttr( + #name=lateral_name+'_b', + learning_rate=2., + regularizer=L2Decay(0.)))) + self.lateral_convs.append(lateral) + + fpn_name = 'fpn_res{}_sum'.format(i + 2) + fpn_conv = self.add_sublayer( + fpn_name, + Conv2D( + num_channels=out_channel, + num_filters=out_channel, + filter_size=3, + padding=1, + param_attr=ParamAttr( + #name=fpn_name+'_w', + initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr( + #name=fpn_name+'_b', + learning_rate=2., + regularizer=L2Decay(0.)))) + self.fpn_convs.append(fpn_conv) + + self.min_level = min_level + self.max_level = max_level + self.spatial_scale = spatial_scale + + def forward(self, body_feats): + laterals = [] + for lvl in range(self.min_level, self.max_level): + laterals.append(self.lateral_convs[lvl](body_feats[lvl])) + + for lvl in range(self.max_level - 1, self.min_level, -1): + upsample = fluid.layers.resize_nearest(laterals[lvl], scale=2.) + laterals[lvl - 1] = laterals[lvl - 1] + upsample + + fpn_output = [] + for lvl in range(self.min_level, self.max_level): + fpn_output.append(self.fpn_convs[lvl](laterals[lvl])) + + extension = fluid.layers.pool2d(fpn_output[-1], 1, 'max', pool_stride=2) + + spatial_scale = self.spatial_scale + [self.spatial_scale[-1] * 0.5] + fpn_output.append(extension) + return fpn_output, spatial_scale diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index d4c7663c7cfeea50dc0960d81f46bc34f67761a1..5d7a112939a9ddbde947f6aa78507b34edcc732a 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -14,21 +14,29 @@ class AnchorGeneratorRPN(object): anchor_sizes=[32, 64, 128, 256, 512], aspect_ratios=[0.5, 1.0, 2.0], stride=[16.0, 16.0], - variance=[1.0, 1.0, 1.0, 1.0]): + variance=[1.0, 1.0, 1.0, 1.0], + anchor_start_size=None): super(AnchorGeneratorRPN, self).__init__() self.anchor_sizes = anchor_sizes self.aspect_ratios = aspect_ratios self.stride = stride self.variance = variance - - def __call__(self, inputs): - outs = fluid.layers.anchor_generator( - input=inputs, - anchor_sizes=self.anchor_sizes, + self.anchor_start_size = anchor_start_size + + def __call__(self, input, level=None): + anchor_sizes = self.anchor_sizes if ( + level is None or self.anchor_start_size is None) else ( + self.anchor_start_size * 2**level) + stride = self.stride if ( + level is None or self.anchor_start_size is None) else ( + self.stride[0] * (2.**level), self.stride[1] * (2.**level)) + anchor, var = fluid.layers.anchor_generator( + input=input, + anchor_sizes=anchor_sizes, aspect_ratios=self.aspect_ratios, - stride=self.stride, + stride=stride, variance=self.variance) - return outs + return anchor, var @register @@ -49,20 +57,12 @@ class AnchorTargetGeneratorRPN(object): self.negative_overlap = negative_overlap self.use_random = use_random - def __call__(self, - cls_logits, - bbox_pred, - anchor_box, - gt_boxes, - is_crowd, - im_info, - open_debug=False): + def __call__(self, cls_logits, bbox_pred, anchor_box, gt_boxes, is_crowd, + im_info): anchor_box = anchor_box.numpy() gt_boxes = gt_boxes.numpy() is_crowd = is_crowd.numpy() im_info = im_info.numpy() - if open_debug: - self.use_random = False loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights = generate_rpn_anchor_target( anchor_box, gt_boxes, is_crowd, im_info, self.straddle_thresh, self.batch_size_per_im, self.positive_overlap, @@ -149,8 +149,7 @@ class ProposalGenerator(object): infer_post_nms_top_n=1000, nms_thresh=.5, min_size=.1, - eta=1., - return_rois_num=True): + eta=1.): super(ProposalGenerator, self).__init__() self.train_pre_nms_top_n = train_pre_nms_top_n self.train_post_nms_top_n = train_post_nms_top_n @@ -159,7 +158,6 @@ class ProposalGenerator(object): self.nms_thresh = nms_thresh self.min_size = min_size self.eta = eta - self.return_rois_num = return_rois_num def __call__(self, scores, @@ -170,7 +168,7 @@ class ProposalGenerator(object): mode='train'): pre_nms_top_n = self.train_pre_nms_top_n if mode == 'train' else self.infer_pre_nms_top_n post_nms_top_n = self.train_post_nms_top_n if mode == 'train' else self.infer_post_nms_top_n - outs = fluid.layers.generate_proposals( + rpn_rois, rpn_rois_prob, rpn_rois_num = fluid.layers.generate_proposals( scores, bbox_deltas, im_info, @@ -181,8 +179,8 @@ class ProposalGenerator(object): nms_thresh=self.nms_thresh, min_size=self.min_size, eta=self.eta, - return_rois_num=self.return_rois_num) - return outs + return_rois_num=True) + return rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n @register @@ -210,34 +208,29 @@ class ProposalTargetGenerator(object): self.bbox_reg_weights = bbox_reg_weights self.num_classes = num_classes self.use_random = use_random - self.is_cls_agnostic = is_cls_agnostic, + self.is_cls_agnostic = is_cls_agnostic self.is_cascade_rcnn = is_cascade_rcnn def __call__(self, rpn_rois, - rpn_rois_nums, + rpn_rois_num, gt_classes, is_crowd, gt_boxes, im_info, - stage=0, - open_debug=False): + stage=0): rpn_rois = rpn_rois.numpy() - rpn_rois_nums = rpn_rois_nums.numpy() + rpn_rois_num = rpn_rois_num.numpy() gt_classes = gt_classes.numpy() gt_boxes = gt_boxes.numpy() is_crowd = is_crowd.numpy() im_info = im_info.numpy() - if open_debug: - self.use_random = False - outs = generate_proposal_target( - rpn_rois, rpn_rois_nums, gt_classes, is_crowd, gt_boxes, im_info, + rpn_rois, rpn_rois_num, gt_classes, is_crowd, gt_boxes, im_info, self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage], self.bg_thresh_hi[stage], self.bg_thresh_lo[stage], self.bbox_reg_weights[stage], self.num_classes, self.use_random, self.is_cls_agnostic, self.is_cascade_rcnn) - outs = [to_variable(v) for v in outs] for v in outs: v.stop_gradient = True @@ -247,25 +240,25 @@ class ProposalTargetGenerator(object): @register @serializable class MaskTargetGenerator(object): - __shared__ = ['num_classes'] + __shared__ = ['num_classes', 'mask_resolution'] - def __init__(self, num_classes=81, resolution=14): + def __init__(self, num_classes=81, mask_resolution=14): super(MaskTargetGenerator, self).__init__() self.num_classes = num_classes - self.resolution = resolution + self.mask_resolution = mask_resolution - def __call__(self, im_info, gt_classes, is_crowd, gt_segms, rois, rois_nums, + def __call__(self, im_info, gt_classes, is_crowd, gt_segms, rois, rois_num, labels_int32): im_info = im_info.numpy() gt_classes = gt_classes.numpy() is_crowd = is_crowd.numpy() gt_segms = gt_segms.numpy() rois = rois.numpy() - rois_nums = rois_nums.numpy() + rois_num = rois_num.numpy() labels_int32 = labels_int32.numpy() outs = generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, - rois, rois_nums, labels_int32, - self.num_classes, self.resolution) + rois, rois_num, labels_int32, + self.num_classes, self.mask_resolution) outs = [to_variable(v) for v in outs] for v in outs: @@ -277,41 +270,54 @@ class MaskTargetGenerator(object): class RoIExtractor(object): def __init__(self, resolution=14, - spatial_scale=1. / 16, sampling_ratio=0, - extractor_type='RoIAlign'): + canconical_level=4, + canonical_size=224, + start_level=0, + end_level=3): super(RoIExtractor, self).__init__() - if isinstance(resolution, Integral): - resolution = [resolution, resolution] self.resolution = resolution - self.spatial_scale = spatial_scale self.sampling_ratio = sampling_ratio - self.extractor_type = extractor_type + self.canconical_level = canconical_level + self.canonical_size = canonical_size + self.start_level = start_level + self.end_level = end_level - def __call__(self, feat, rois, rois_nums): + def __call__(self, feats, rois, spatial_scale): + roi, rois_num = rois cur_l = 0 - new_nums = [cur_l] - rois_nums_np = rois_nums.numpy() - for l in rois_nums_np: - cur_l += l - new_nums.append(cur_l) - nums_t = to_variable(np.asarray(new_nums)) - if self.extractor_type == 'RoIAlign': + if self.start_level == self.end_level: rois_feat = fluid.layers.roi_align( - feat, - rois, - self.resolution[0], - self.resolution[1], - self.spatial_scale, - rois_lod=nums_t) - elif self.extractor_type == 'RoIPool': - rois_feat = fluid.layers.roi_pool( - feat, - rois, - self.resolution[0], - self.resolution[1], - self.spatial_scale, - rois_lod=nums_t) + feats[self.start_level], + roi, + self.resolution, + self.resolution, + spatial_scale, + rois_num=rois_num) + return rois_feat + offset = 2 + k_min = self.start_level + offset + k_max = self.end_level + offset + rois_dist, restore_index, rois_num_dist = fluid.layers.distribute_fpn_proposals( + roi, + k_min, + k_max, + self.canconical_level, + self.canonical_size, + rois_num=rois_num) + rois_feat_list = [] + for lvl in range(self.start_level, self.end_level + 1): + roi_feat = fluid.layers.roi_align( + feats[lvl], + rois_dist[lvl], + self.resolution, + self.resolution, + spatial_scale[lvl], + sampling_ratio=self.sampling_ratio, + rois_num=rois_num_dist[lvl]) + rois_feat_list.append(roi_feat) + rois_feat_shuffle = fluid.layers.concat(rois_feat_list) + rois_feat = fluid.layers.gather(rois_feat_shuffle, restore_index) return rois_feat @@ -333,11 +339,13 @@ class DecodeClipNms(object): self.score_threshold = score_threshold self.nms_threshold = nms_threshold - def __call__(self, bbox, bbox_prob, bbox_delta, img_info): - outs = bbox_post_process(bbox.numpy(), + def __call__(self, bboxes, bbox_prob, bbox_delta, im_info): + bboxes_np = (i.numpy() for i in bboxes) + # bbox, bbox_num + outs = bbox_post_process(bboxes_np, bbox_prob.numpy(), bbox_delta.numpy(), - img_info.numpy(), self.keep_top_k, + im_info.numpy(), self.keep_top_k, self.score_threshold, self.nms_threshold, self.num_classes) outs = [to_variable(v) for v in outs] diff --git a/ppdet/py_op/bbox.py b/ppdet/py_op/bbox.py index 83b68a78222cf553e84ba52e52586d7b3aefd944..dc34a77cd2b3f09a76fb0ca1476458855ebcc301 100755 --- a/ppdet/py_op/bbox.py +++ b/ppdet/py_op/bbox.py @@ -126,12 +126,11 @@ def bbox_overlaps(bboxes1, bboxes2): def nms(dets, thresh): if dets.shape[0] == 0: return [] - x1 = dets[:, 0] - y1 = dets[:, 1] - x2 = dets[:, 2] - y2 = dets[:, 3] - scores = dets[:, 4] - + scores = dets[:, 0] + x1 = dets[:, 1] + y1 = dets[:, 2] + x2 = dets[:, 3] + y2 = dets[:, 4] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] @@ -242,13 +241,13 @@ def compute_bbox_targets(bboxes1, bboxes2, labels, bbox_reg_weights): np.float32, copy=False) -@jit +#@jit def expand_bbox_targets(bbox_targets_input, class_nums=81, is_cls_agnostic=False): class_labels = bbox_targets_input[:, 0] fg_inds = np.where(class_labels > 0)[0] - if not is_cls_agnostic: + if is_cls_agnostic: class_nums = 2 bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums)) bbox_inside_weights = np.zeros(bbox_targets.shape) diff --git a/ppdet/py_op/mask.py b/ppdet/py_op/mask.py index 07ff76a9a4e34d67b826d5254ce6fb11160083f8..9de446f85bed09d7b02e2eb0f2ce08c61626d468 100755 --- a/ppdet/py_op/mask.py +++ b/ppdet/py_op/mask.py @@ -180,7 +180,7 @@ def polys_to_mask_wrt_box(polygons, box, M): return mask -@jit +#@jit def expand_mask_targets(masks, mask_class_labels, resolution, num_classes): """Expand masks from shape (#masks, resolution ** 2) to (#masks, #classes * resolution ** 2) to encode class diff --git a/ppdet/py_op/post_process.py b/ppdet/py_op/post_process.py index a2f972e660450aef558e251fb7e31c8d60c3c92b..c6bf354e47453fade8c9fc88dea42f92df90b478 100755 --- a/ppdet/py_op/post_process.py +++ b/ppdet/py_op/post_process.py @@ -3,44 +3,45 @@ import os import numpy as np from numba import jit from .bbox import delta2bbox, clip_bbox, expand_bbox, nms +import pycocotools.mask as mask_util +import cv2 def bbox_post_process(bboxes, - bbox_nums, - bbox_probs, + bbox_prob, bbox_deltas, im_info, keep_top_k=100, score_thresh=0.05, nms_thresh=0.5, class_nums=81, - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2]): - - new_bboxes = [[] for _ in range(len(bbox_nums))] - new_bbox_nums = [0] + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], + with_background=True): + bbox, bbox_num = bboxes + new_bbox = [[] for _ in range(len(bbox_num))] + new_bbox_num = [] st_num = 0 end_num = 0 - for i in range(len(bbox_nums)): - bbox_num = bbox_nums[i] - end_num += bbox_num - - bbox = bboxes[st_num:end_num, :] # bbox - bbox = bbox / im_info[i][2] # scale - bbox_delta = bbox_deltas[st_num:end_num, :] # bbox delta - + for i in range(len(bbox_num)): + box_num = bbox_num[i] + end_num += box_num + + boxes = bbox[st_num:end_num, :] # bbox + boxes = boxes / im_info[i][2] # scale + bbox_delta = bbox_deltas[st_num:end_num, :, :] # bbox delta + bbox_delta = np.reshape(bbox_delta, (box_num, -1)) # step1: decode - bbox = delta2bbox(bbox_delta, bbox, bbox_reg_weights) + boxes = delta2bbox(bbox_delta, boxes, bbox_reg_weights) # step2: clip - bbox = clip_bbox(bbox, im_info[i][:2] / im_info[i][2]) - + boxes = clip_bbox(boxes, im_info[i][:2] / im_info[i][2]) # step3: nms cls_boxes = [[] for _ in range(class_nums)] - scores_n = bbox_probs[st_num:end_num, :] - for j in range(1, class_nums): + scores_n = bbox_prob[st_num:end_num, :] + for j in range(with_background, class_nums): inds = np.where(scores_n[:, j] > score_thresh)[0] scores_j = scores_n[inds, j] - rois_j = bbox[inds, j * 4:(j + 1) * 4] + rois_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype( np.float32, copy=False) keep = nms(dets_j, nms_thresh) @@ -51,32 +52,34 @@ def bbox_post_process(bboxes, np.float32, copy=False) cls_boxes[j] = nms_dets - st_num += bbox_num + st_num += box_num # Limit to max_per_image detections **over all classes** image_scores = np.hstack( - [cls_boxes[j][:, 1] for j in range(1, class_nums)]) + [cls_boxes[j][:, 1] for j in range(with_background, class_nums)]) if len(image_scores) > keep_top_k: image_thresh = np.sort(image_scores)[-keep_top_k] - for j in range(1, class_nums): + for j in range(with_background, class_nums): keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] - new_bboxes_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)]) - new_bboxes[i] = new_bboxes_n - new_bbox_nums.append(len(new_bboxes_n)) - labels = new_bboxes_n[:, 0] - scores = new_bboxes_n[:, 1] - boxes = new_bboxes_n[:, 2:] - new_bboxes = np.vstack([new_bboxes[k] for k in range(len(bbox_nums) - 1)]) - new_bbox_nums = np.array(new_bbox_nums) - return new_bbox_nums, new_bboxes + new_bbox_n = np.vstack( + [cls_boxes[j] for j in range(with_background, class_nums)]) + new_bbox[i] = new_bbox_n + new_bbox_num.append(len(new_bbox_n)) + new_bbox = np.vstack([new_bbox[k] for k in range(len(bbox_num))]) + new_bbox_num = np.array(new_bbox_num).astype('int32') + return new_bbox, new_bbox_num @jit -def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14): - scale = (resolution + 2.0) / resolution - boxes = bboxes[:, 2:] - labels = bboxes[:, 0] +def mask_post_process(bboxes, masks, im_info, resolution=14, binary_thresh=0.5): + if masks.shape[0] == 0: + return masks + bbox, bbox_nums = bboxes + M = resolution + scale = (M + 2.0) / M + boxes = bbox[:, 2:] + labels = bbox[:, 0] segms_results = [[] for _ in range(len(bbox_nums))] sum = 0 st_num = 0 @@ -92,7 +95,7 @@ def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14): im_h = int(round(im_info[i][0] / im_info[i][2])) im_w = int(round(im_info[i][1] / im_info[i][2])) - boxes_n = expand_boxes(boxes_n, scale) + boxes_n = expand_bbox(boxes_n, scale) boxes_n = boxes_n.astype(np.int32) padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) for j in range(len(boxes_n)): @@ -106,7 +109,7 @@ def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14): h = np.maximum(h, 1) mask = cv2.resize(padded_mask, (w, h)) - mask = np.array(mask > cfg.mrcnn_thresh_binarize, dtype=np.uint8) + mask = np.array(mask > binary_thresh, dtype=np.uint8) im_mask = np.zeros((im_h, im_w), dtype=np.uint8) x_0 = max(ref_box[0], 0) @@ -121,20 +124,18 @@ def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14): im_mask[:, :, np.newaxis], order='F'))[0] cls_segms.append(rle) segms_results[i] = np.array(cls_segms)[:, np.newaxis] + st_num += bbox_num segms_results = np.vstack([segms_results[k] for k in range(len(bbox_nums))]) - bboxes = np.hstack([segms_results, bboxes]) + bboxes = np.hstack([segms_results, bbox]) return bboxes[:, :3] @jit -def get_det_res(bboxes, bbox_nums, image_id, num_id_to_cat_id_map, - batch_size=1): +def get_det_res(bboxes, bbox_nums, image_id, num_id_to_cat_id_map): det_res = [] k = 0 for i in range(len(bbox_nums)): image_id = int(image_id[i][0]) - image_width = int(image_shape[i][1]) - image_height = int(image_shape[i][2]) det_nums = bbox_nums[i] for j in range(det_nums): diff --git a/ppdet/py_op/target.py b/ppdet/py_op/target.py index fb949ea39b8a29341977f055a8c77ce1be385a68..0a36a3fdc567c1207de6f16ca94bcda75160d07a 100755 --- a/ppdet/py_op/target.py +++ b/ppdet/py_op/target.py @@ -89,7 +89,7 @@ def generate_rpn_anchor_target(anchors, @jit def label_anchor(anchors, gt_boxes): - iou = compute_iou(anchors, gt_boxes) + iou = bbox_overlaps(anchors, gt_boxes) # every gt's anchor's index gt_bbox_anchor_inds = iou.argmax(axis=0) @@ -150,7 +150,7 @@ def sample_anchor(anchor_gt_bbox_iou, @jit def generate_proposal_target(rpn_rois, - rpn_rois_nums, + rpn_rois_num, gt_classes, is_crowd, gt_boxes, @@ -171,12 +171,12 @@ def generate_proposal_target(rpn_rois, tgt_deltas = [] rois_inside_weights = [] rois_outside_weights = [] - rois_nums = [] + new_rois_num = [] st_num = 0 end_num = 0 - for im_i in range(len(rpn_rois_nums)): - rpn_rois_num = rpn_rois_nums[im_i] - end_num += rpn_rois_num + for im_i in range(len(rpn_rois_num)): + length = rpn_rois_num[im_i] + end_num += length rpn_roi = rpn_rois[st_num:end_num] im_scale = im_info[im_i][2] @@ -220,10 +220,10 @@ def generate_proposal_target(rpn_rois, bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) roi = sampled_boxes * im_scale - st_num += rpn_rois_num + st_num += length rois.append(roi) - rois_nums.append(roi.shape[0]) + new_rois_num.append(roi.shape[0]) tgt_labels.append(sampled_labels) tgt_deltas.append(sampled_deltas) rois_inside_weights.append(bbox_inside_weights) @@ -237,9 +237,8 @@ def generate_proposal_target(rpn_rois, rois_inside_weights, axis=0).astype(np.float32) rois_outside_weights = np.concatenate( rois_outside_weights, axis=0).astype(np.float32) - rois_nums = np.asarray(rois_nums, np.int32) - - return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, rois_nums + new_rois_num = np.asarray(new_rois_num, np.int32) + return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, new_rois_num @jit @@ -250,7 +249,7 @@ def label_bbox(boxes, class_nums=81, is_cascade_rcnn=False): - iou = compute_iou(boxes, gt_boxes) + iou = bbox_overlaps(boxes, gt_boxes) # every roi's gt box's index roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32) @@ -318,15 +317,16 @@ def sample_bbox(roi_gt_bbox_iou, @jit def generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, rois, - rois_nums, labels_int32, num_classes, resolution): + rois_num, labels_int32, num_classes, resolution): mask_rois = [] + mask_rois_num = [] rois_has_mask_int32 = [] mask_int32 = [] st_num = 0 end_num = 0 - for k in range(len(rois_nums)): - rois_num = rois_nums[k] - end_num += rois_num + for k in range(len(rois_num)): + length = rois_num[k] + end_num += length # remove padding gt_polys = gt_segms[k] @@ -345,37 +345,32 @@ def generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, rois, if len(new_poly) > 0: gt_segs.append(new_poly) new_gt_polys.append(gt_segs) - im_scale = im_info[k][2] boxes = rois[st_num:end_num] / im_scale bbox_fg, bbox_has_mask, masks = sample_mask( - boxes, new_gt_polys, labels_int32[st_num:rois_num], gt_classes[k], + boxes, new_gt_polys, labels_int32[st_num:end_num], gt_classes[k], is_crowd[k], num_classes, resolution) - st_num += rois_num + st_num += length mask_rois.append(bbox_fg * im_scale) + mask_rois_num.append(len(bbox_fg)) rois_has_mask_int32.append(bbox_has_mask) mask_int32.append(masks) mask_rois = np.concatenate(mask_rois, axis=0).astype(np.float32) + mask_rois_num = np.array(mask_rois_num).astype(np.int32) rois_has_mask_int32 = np.concatenate( rois_has_mask_int32, axis=0).astype(np.int32) mask_int32 = np.concatenate(mask_int32, axis=0).astype(np.int32) - return mask_rois, rois_has_mask_int32, mask_int32 + return mask_rois, mask_rois_num, rois_has_mask_int32, mask_int32 @jit -def sample_mask( - boxes, - gt_polys, - label_int32, - gt_classes, - is_crowd, - num_classes, - resolution, ): +def sample_mask(boxes, gt_polys, label_int32, gt_classes, is_crowd, num_classes, + resolution): gt_polys_inds = np.where((gt_classes > 0) & (is_crowd == 0))[0] _gt_polys = [gt_polys[i] for i in gt_polys_inds] @@ -405,7 +400,5 @@ def sample_mask( masks_fg = -np.ones((1, resolution**2), dtype=np.int32) labels_fg = np.zeros((1, )) bbox_has_mask = np.append(bbox_has_mask, 0) - masks = expand_mask_targets(masks_fg, labels_fg, resolution, num_classes) - return bbox_fg, bbox_has_mask, masks diff --git a/ppdet/utils/checkpoint.py b/ppdet/utils/checkpoint.py index 3ee6c328a1024b109d6cc77b46ac28cd52082538..c2c41947f4cf1c76eeea4a72f5e9178e129058e9 100644 --- a/ppdet/utils/checkpoint.py +++ b/ppdet/utils/checkpoint.py @@ -45,37 +45,46 @@ def get_ckpt_path(path): def load_dygraph_ckpt(model, - optimizer, + optimizer=None, pretrain_ckpt=None, ckpt=None, - ckpt_type='pretrain', + ckpt_type=None, exclude_params=[], - open_debug=False): + load_static_weights=False): - if ckpt_type == 'pretrain': + assert ckpt_type in ['pretrain', 'resume', 'finetune', None] + if ckpt_type == 'pretrain' and ckpt is None: ckpt = pretrain_ckpt ckpt = get_ckpt_path(ckpt) - if ckpt is not None and os.path.exists(ckpt): - param_state_dict, optim_state_dict = fluid.load_dygraph(ckpt) - if open_debug: - print("Loading Weights: ", param_state_dict.keys()) + assert os.path.exists(ckpt), "Path {} does not exist.".format(ckpt) + if load_static_weights: + pre_state_dict = fluid.load_program_state(ckpt) + param_state_dict = {} + model_dict = model.state_dict() + for key in model_dict.keys(): + weight_name = model_dict[key].name + if weight_name in pre_state_dict.keys(): + print('Load weight: {}, shape: {}'.format( + weight_name, pre_state_dict[weight_name].shape)) + param_state_dict[key] = pre_state_dict[weight_name] + else: + param_state_dict[key] = model_dict[key] + model.set_dict(param_state_dict) + return model + param_state_dict, optim_state_dict = fluid.load_dygraph(ckpt) - if len(exclude_params) != 0: - for k in exclude_params: - param_state_dict.pop(k, None) + if len(exclude_params) != 0: + for k in exclude_params: + param_state_dict.pop(k, None) - if ckpt_type == 'pretrain': - model.backbone.set_dict(param_state_dict) - elif ckpt_type == 'finetune': - model.set_dict(param_state_dict, use_structured_name=True) - else: - model.set_dict(param_state_dict) + if ckpt_type == 'pretrain': + model.backbone.set_dict(param_state_dict) + else: + model.set_dict(param_state_dict) - if ckpt_type == 'resume': - if optim_state_dict is None: - print("Can't Resume Last Training's Optimizer State!!!") - else: - optimizer.set_dict(optim_state_dict) + if ckpt_type == 'resume': + assert optim_state_dict, "Can't Resume Last Training's Optimizer State!!!" + optimizer.set_dict(optim_state_dict) return model diff --git a/ppdet/utils/eval_utils.py b/ppdet/utils/eval_utils.py index 4dfddbf7f98f8085677a3c4129714d1763e67ac1..b0bb21a151006134d00d57f4affb0ef44733b681 100644 --- a/ppdet/utils/eval_utils.py +++ b/ppdet/utils/eval_utils.py @@ -28,10 +28,7 @@ def json_eval_results(metric, json_directory=None, dataset=None): logger.info("{} not exists!".format(v_json)) -def coco_eval_results(outs_res=None, - include_mask=False, - batch_size=1, - dataset=None): +def coco_eval_results(outs_res=None, include_mask=False, dataset=None): print("start evaluate bbox using coco api") import io import six @@ -49,14 +46,14 @@ def coco_eval_results(outs_res=None, if outs_res is not None and len(outs_res) > 0: det_res = [] for outs in outs_res: - det_res += get_det_res(outs['bbox_nums'], outs['bbox'], - outs['im_id'], catid, batch_size) + det_res += get_det_res(outs['bbox'], outs['bbox_num'], + outs['im_id'], catid) - with io.open("bbox_eval.json", 'w') as outfile: + with io.open("bbox.json", 'w') as outfile: encode_func = unicode if six.PY2 else str outfile.write(encode_func(json.dumps(det_res))) - cocoDt = cocoGt.loadRes("bbox_eval.json") + cocoDt = cocoGt.loadRes("bbox.json") cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.evaluate() cocoEval.accumulate() @@ -65,14 +62,15 @@ def coco_eval_results(outs_res=None, if outs_res is not None and len(outs_res) > 0 and include_mask: seg_res = [] for outs in outs_res: - seg_res += get_seg_res(outs['bbox_nums'], outs['mask'], - outs['im_id'], catid, batch_size) + seg_res += get_seg_res(outs['mask'], outs['bbox_num'], + outs['im_id'], catid) - with io.open("mask_eval.json", 'w') as outfile: + with io.open("mask.json", 'w') as outfile: encode_func = unicode if six.PY2 else str outfile.write(encode_func(json.dumps(seg_res))) - cocoSg = cocoGt.loadRes("mask_eval.json") - cocoEval = COCOeval(cocoGt, cocoSg, 'bbox') + cocoSg = cocoGt.loadRes("mask.json") + cocoEval = COCOeval(cocoGt, cocoSg, 'segm') cocoEval.evaluate() cocoEval.accumulate() + cocoEval.summarize() diff --git a/ppdet/utils/stats.py b/ppdet/utils/stats.py index 4d7e36babf8d53170162cfd5581f591e376ec8cd..e34beb26d3b0964e25f15a3ac8307c0c5ea9a601 100644 --- a/ppdet/utils/stats.py +++ b/ppdet/utils/stats.py @@ -47,7 +47,7 @@ class TrainingStats(object): def update(self, stats): for k, v in self.smoothed_losses_and_metrics.items(): - v.add_value(stats[k]) + v.add_value(stats[k].numpy()) def get(self, extras=None): stats = collections.OrderedDict() diff --git a/tools/eval.py b/tools/eval.py index 6300710f4a087fb06e29f2d41ce1754c9d3a3548..d437cd7cb5b3d3e466f66e3846a9b171ed3d1181 100755 --- a/tools/eval.py +++ b/tools/eval.py @@ -1,7 +1,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os +import os, sys +# add python path of PadleDetection to sys.path +parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2))) +if parent_path not in sys.path: + sys.path.append(parent_path) + import time # ignore numba warning import warnings @@ -14,6 +19,7 @@ from ppdet.utils.check import check_gpu, check_version, check_config from ppdet.utils.cli import ArgsParser from ppdet.utils.eval_utils import coco_eval_results from ppdet.data.reader import create_reader +from ppdet.utils.checkpoint import load_dygraph_ckpt, save_dygraph_ckpt def parse_args(): @@ -38,11 +44,10 @@ def run(FLAGS, cfg): # Model main_arch = cfg.architecture - model = create(cfg.architecture, mode='infer', open_debug=cfg.open_debug) + model = create(cfg.architecture) # Init Model - param_state_dict = fluid.dygraph.load_dygraph(cfg.weights)[0] - model.set_dict(param_state_dict) + model = load_dygraph_ckpt(model, ckpt=cfg.weights) # Data Reader if FLAGS.use_gpu: @@ -58,7 +63,7 @@ def run(FLAGS, cfg): # forward model.eval() - outs = model(data, cfg['EvalReader']['inputs_def']['fields']) + outs = model(data, cfg['EvalReader']['inputs_def']['fields'], 'infer') outs_res.append(outs) # log @@ -68,7 +73,7 @@ def run(FLAGS, cfg): # Metric coco_eval_results( outs_res, - include_mask=True if 'MaskHed' in cfg else False, + include_mask=True if 'MaskHead' in cfg else False, dataset=cfg['EvalReader']['dataset']) diff --git a/tools/train.py b/tools/train.py index 5c4c87f4fd9bdd9350152ad78c2bf26684c86f4f..27f13323be3ab7d4f8b82a6d01f71a2a0fcc9ade 100755 --- a/tools/train.py +++ b/tools/train.py @@ -1,19 +1,32 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os +import os, sys +# add python path of PadleDetection to sys.path +parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2))) +if parent_path not in sys.path: + sys.path.append(parent_path) + import time # ignore numba warning import warnings warnings.filterwarnings('ignore') import random +import datetime import numpy as np +from collections import deque import paddle.fluid as fluid from ppdet.core.workspace import load_config, merge_config, create from ppdet.data.reader import create_reader +from ppdet.utils.stats import TrainingStats from ppdet.utils.check import check_gpu, check_version, check_config from ppdet.utils.cli import ArgsParser from ppdet.utils.checkpoint import load_dygraph_ckpt, save_dygraph_ckpt +from paddle.fluid.dygraph.parallel import ParallelEnv +import logging +FORMAT = '%(asctime)s-%(levelname)s: %(message)s' +logging.basicConfig(level=logging.INFO, format=FORMAT) +logger = logging.getLogger(__name__) def parse_args(): @@ -24,7 +37,6 @@ def parse_args(): type=str, help="Loading Checkpoints only support 'pretrain', 'finetune', 'resume'." ) - parser.add_argument( "--fp16", action='store_true', @@ -63,11 +75,6 @@ def parse_args(): "This flag is only used for internal test.") parser.add_argument( "--use_gpu", action='store_true', default=False, help="data parallel") - parser.add_argument( - "--use_parallel", - action='store_true', - default=False, - help="data parallel") parser.add_argument( '--is_profiler', @@ -88,13 +95,13 @@ def run(FLAGS, cfg): random.seed(local_seed) np.random.seed(local_seed) - if FLAGS.enable_ce or cfg.open_debug: + if FLAGS.enable_ce: random.seed(0) np.random.seed(0) # Model main_arch = cfg.architecture - model = create(cfg.architecture, mode='train', open_debug=cfg.open_debug) + model = create(cfg.architecture) # Optimizer lr = create('LearningRate')() @@ -105,12 +112,11 @@ def run(FLAGS, cfg): model, optimizer, cfg.pretrain_weights, - cfg.weights, - FLAGS.ckpt_type, - open_debug=cfg.open_debug) + ckpt_type=FLAGS.ckpt_type, + load_static_weights=cfg.load_static_weights) # Parallel Model - if FLAGS.use_parallel: + if ParallelEnv().nranks > 1: strategy = fluid.dygraph.parallel.prepare_context() model = fluid.dygraph.parallel.DataParallel(model, strategy) @@ -122,21 +128,29 @@ def run(FLAGS, cfg): devices_num = int(os.environ.get('CPU_NUM', 1)) train_reader = create_reader( - cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, - cfg, - devices_num=devices_num) + cfg.TrainReader, (cfg.max_iters - start_iter), cfg, devices_num=1) + time_stat = deque(maxlen=cfg.log_smooth_window) + start_time = time.time() + end_time = time.time() # Run Train for iter_id, data in enumerate(train_reader()): - start_time = time.time() + + start_time = end_time + end_time = time.time() + time_stat.append(end_time - start_time) + time_cost = np.mean(time_stat) + eta_sec = (cfg.max_iters - iter_id) * time_cost + eta = str(datetime.timedelta(seconds=int(eta_sec))) # Model Forward model.train() - outputs = model(data, cfg['TrainReader']['inputs_def']['fields']) + outputs = model(data, cfg['TrainReader']['inputs_def']['fields'], + 'train') # Model Backward loss = outputs['loss'] - if FLAGS.use_parallel: + if ParallelEnv().nranks > 1: loss = model.scale_loss(loss) loss.backward() model.apply_collective_grads() @@ -144,30 +158,27 @@ def run(FLAGS, cfg): loss.backward() optimizer.minimize(loss) model.clear_gradients() - - # Log state - cost_time = time.time() - start_time - # TODO: check this method curr_lr = optimizer.current_step_lr() - log_info = "iter: {}, time: {:.4f}, lr: {:.6f}".format( - iter_id, cost_time, curr_lr) - for k, v in outputs.items(): - log_info += ", {}: {:.6f}".format(k, v.numpy()[0]) - print(log_info) - - # Debug - if cfg.open_debug and iter_id > 10: - break - - # Save Stage - if iter_id > 0 and iter_id % int( - cfg.snapshot_iter) == 0 and fluid.dygraph.parallel.Env( - ).local_rank == 0: - cfg_name = os.path.basename(FLAGS.config).split('.')[0] - save_name = str( - iter_id) if iter_id != cfg.max_iters - 1 else "model_final" - save_dir = os.path.join(cfg.save_dir, cfg_name, save_name) - save_dygraph_ckpt(model, optimizer, save_dir) + + if ParallelEnv().nranks < 2 or ParallelEnv().local_rank == 0: + # Log state + if iter_id == 0: + train_stats = TrainingStats(cfg.log_smooth_window, + outputs.keys()) + train_stats.update(outputs) + logs = train_stats.log() + if iter_id % cfg.log_iter == 0: + strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format( + iter_id, curr_lr, logs, time_cost, eta) + logger.info(strs) + # Save Stage + if iter_id > 0 and iter_id % int( + cfg.snapshot_iter) == 0 or iter_id == cfg.max_iters - 1: + cfg_name = os.path.basename(FLAGS.config).split('.')[0] + save_name = str( + iter_id) if iter_id != cfg.max_iters - 1 else "model_final" + save_dir = os.path.join(cfg.save_dir, cfg_name, save_name) + save_dygraph_ckpt(model, optimizer, save_dir) def main(): @@ -179,7 +190,7 @@ def main(): check_gpu(cfg.use_gpu) check_version() - place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ + place = fluid.CUDAPlace(ParallelEnv().dev_id) \ if cfg.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place):