diff --git a/configs/solov2/README.md b/configs/solov2/README.md index 0dd5fd8eccbadd0d79c777e04193bcf28da104fe..72f9bfe5055a64150fdc54efa48e83a77ad0cd00 100644 --- a/configs/solov2/README.md +++ b/configs/solov2/README.md @@ -1,22 +1,29 @@ -# SOLOv2 (Segmenting Objects by Locations) for instance segmentation +# SOLOv2 for instance segmentation ## Introduction -- SOLOv2 is a fast instance segmentation framework with strong performance: [https://arxiv.org/abs/2003.10152](https://arxiv.org/abs/2003.10152) +- SOLOv2 (Segmenting Objects by Locations) is a fast instance segmentation framework with strong performance. We reproduced the model of the paper, and improved and optimized the accuracy and speed of the SOLOv2.Among them, `Light-R50-VD-DCN-FPN` model reached 38.6 FPS on single Tesla V100, and mask ap on the COCO-val reached 38.8. -``` -@misc{wang2020solov2, - title={SOLOv2: Dynamic, Faster and Stronger}, - author={Xinlong Wang and Rufeng Zhang and Tao Kong and Lei Li and Chunhua Shen}, - year={2020}, - eprint={2003.10152}, - archivePrefix={arXiv}, - primaryClass={cs.CV} -} -``` ## Model Zoo -| Backbone | Multi-scale training | Lr schd | Inf time (fps) | Mask AP | Download | Configs | +| Backbone | Multi-scale training | Lr schd | Inf time (V100) | Mask AP | Download | Configs | +| :---------------------: | :-------------------: | :-----: | :------------: | :-----: | :---------: | :------------------------: | +| R50-FPN | False | 1x | 45.7ms | 35.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/solov2_r50_fpn_1x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/solov2/solov2_r50_fpn_1x.yml) | +| R50-FPN | True | 3x | 45.7ms | 37.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/solov2_r50_fpn_3x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/solov2/solov2_r50_fpn_3x.yml) | +| R101-VD-FPN | True | 3x | - | 42.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/solov2_r101_vd_fpn_3x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/solov2/solov2_r101_vd_fpn_3x.yml) | + +## Enhance model +| Backbone | Input size | Lr schd | Inf time (V100) | Mask AP | Download | Configs | | :---------------------: | :-------------------: | :-----: | :------------: | :-----: | :---------: | :------------------------: | -| R50-FPN | False | 1x | - | 34.7 | [model](https://paddlemodels.bj.bcebos.com/object_detection/solov2_r50_fpn_1x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/solov2/solov2_r50_fpn_1x.yml) | +| Light-R50-VD-DCN-FPN | 512 | 3x | 25.9ms | 38.8 | [model](https://paddlemodels.bj.bcebos.com/object_detection/solov2_light_r50_vd_fpn_dcn_512_3x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/solov2/solov2_light_r50_vd_fpn_dcn_512_3x.yml) | + +## Citations +``` +@article{wang2020solov2, + title={SOLOv2: Dynamic, Faster and Stronger}, + author={Wang, Xinlong and Zhang, Rufeng and Kong, Tao and Li, Lei and Shen, Chunhua}, + journal={arXiv preprint arXiv:2003.10152}, + year={2020} +} +``` diff --git a/configs/solov2/solov2_light_r50_vd_fpn_dcn_512_3x.yml b/configs/solov2/solov2_light_r50_vd_fpn_dcn_512_3x.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f7fa4f918579ef4137906ba872bc442a81d2f2f --- /dev/null +++ b/configs/solov2/solov2_light_r50_vd_fpn_dcn_512_3x.yml @@ -0,0 +1,68 @@ +architecture: SOLOv2 +use_gpu: true +max_iters: 270000 +snapshot_iter: 30000 +log_smooth_window: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar +metric: COCO +weights: output/solov2_light_r50_vd_fpn_dcn_512_multiscale_3x/model_final +num_classes: 81 +use_ema: true +ema_decay: 0.9998 + +SOLOv2: + backbone: ResNet + fpn: FPN + bbox_head: SOLOv2Head + mask_head: SOLOv2MaskHead + +ResNet: + depth: 50 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + norm_type: bn + dcn_v2_stages: [3, 4, 5] + variant: d + lr_mult_list: [0.05, 0.05, 0.1, 0.15] + +FPN: + max_level: 6 + min_level: 2 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + reverse_out: True + +SOLOv2Head: + seg_feat_channels: 256 + stacked_convs: 3 + num_grids: [40, 36, 24, 16, 12] + kernel_out_channels: 128 + dcn_v2_stages: [2,] + drop_block: True + +SOLOv2MaskHead: + out_channels: 128 + start_level: 0 + end_level: 3 + num_classes: 128 + +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [180000, 240000] + - !LinearWarmup + start_factor: 0. + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +_READER_: 'solov2_light_reader.yml' diff --git a/configs/solov2/solov2_light_reader.yml b/configs/solov2/solov2_light_reader.yml new file mode 100644 index 0000000000000000000000000000000000000000..415b92157becfb52de24120c22ec494484191f82 --- /dev/null +++ b/configs/solov2/solov2_light_reader.yml @@ -0,0 +1,102 @@ +TrainReader: + batch_size: 2 + worker_num: 2 + inputs_def: + fields: ['image', 'im_id', 'gt_segm'] + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_train2017.json + image_dir: train2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !Poly2Mask {} + - !ColorDistort {} + - !RandomCrop + is_mask_crop: True + - !ResizeImage + target_size: [352, 384, 416, 448, 480, 512] + max_size: 852 + interp: 1 + use_cv2: true + resize_box: true + - !RandomFlipImage + prob: 0.5 + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !Permute + to_bgr: false + channel_first: true + batch_transforms: + - !PadBatch + pad_to_stride: 32 + - !Gt2Solov2Target + num_grids: [40, 36, 24, 16, 12] + scale_ranges: [[1, 64], [32, 128], [64, 256], [128, 512], [256, 2048]] + coord_sigma: 0.2 + shuffle: True + +EvalReader: + inputs_def: + fields: ['image', 'im_info', 'im_id'] + dataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + sample_transforms: + - !DecodeImage + to_rgb: true + - !ResizeImage + interp: 1 + max_size: 852 + target_size: 512 + use_cv2: true + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !Permute + channel_first: true + to_bgr: false + batch_transforms: + - !PadBatch + pad_to_stride: 32 + use_padded_im_info: false + batch_size: 1 + shuffle: false + drop_last: false + drop_empty: false + worker_num: 2 + +TestReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'im_shape'] + dataset: + !ImageFolder + anno_path: dataset/coco/annotations/instances_val2017.json + sample_transforms: + - !DecodeImage + to_rgb: true + - !ResizeImage + interp: 1 + max_size: 852 + target_size: 512 + use_cv2: true + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !Permute + channel_first: true + to_bgr: false + batch_transforms: + - !PadBatch + pad_to_stride: 32 + use_padded_im_info: false diff --git a/deploy/python/infer.py b/deploy/python/infer.py index 249d11c2e653425bcaaf0dd1196db6cc90db03b2..caa47a220dc3861608701b78de7372114a827775 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -565,6 +565,7 @@ class Detector(): 1]).copy_to_cpu() np_segms = self.predictor.get_output_tensor(output_names[ 2]).copy_to_cpu() + continue boxes_tensor = self.predictor.get_output_tensor(output_names[0]) np_boxes = boxes_tensor.copy_to_cpu() if self.config.mask_resolution is not None: diff --git a/ppdet/data/transform/batch_operators.py b/ppdet/data/transform/batch_operators.py index 31b2c10a49ca69a377aa373161ed673d12156f2b..c0e9bd6e0e4af3940326d0a99264fda745292ba5 100644 --- a/ppdet/data/transform/batch_operators.py +++ b/ppdet/data/transform/batch_operators.py @@ -631,6 +631,7 @@ class Gt2Solov2Target(BaseOperator): return resized_img def __call__(self, samples, context=None): + sample_id = 0 for sample in samples: gt_bboxes_raw = sample['gt_bbox'] gt_labels_raw = sample['gt_class'] @@ -642,7 +643,6 @@ class Gt2Solov2Target(BaseOperator): gt_areas = np.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) * (gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1])) ins_ind_label_list = [] - grid_offset = [] idx = 0 for (lower_bound, upper_bound), num_grid \ in zip(self.scale_ranges, self.num_grids): @@ -663,8 +663,8 @@ class Gt2Solov2Target(BaseOperator): ins_ind_label_list.append(ins_ind_label) sample['cate_label{}'.format(idx)] = cate_label.flatten() sample['ins_label{}'.format(idx)] = ins_label - sample['grid_order{}'.format(idx)] = np.asarray([0]) - grid_offset.append(1) + sample['grid_order{}'.format(idx)] = np.asarray( + [sample_id * num_grid * num_grid + 0]) idx += 1 continue gt_bboxes = gt_bboxes_raw[hit_indices] @@ -722,7 +722,8 @@ class Gt2Solov2Target(BaseOperator): 1]] = seg_mask ins_label.append(cur_ins_label) ins_ind_label[label] = True - grid_order.append(label) + grid_order.append( + [sample_id * num_grid * num_grid + label]) if ins_label == []: ins_label = np.zeros( [1, mask_feat_size[0], mask_feat_size[1]], @@ -730,8 +731,8 @@ class Gt2Solov2Target(BaseOperator): ins_ind_label_list.append(ins_ind_label) sample['cate_label{}'.format(idx)] = cate_label.flatten() sample['ins_label{}'.format(idx)] = ins_label - sample['grid_order{}'.format(idx)] = np.asarray([0]) - grid_offset.append(1) + sample['grid_order{}'.format(idx)] = np.asarray( + [sample_id * num_grid * num_grid + 0]) else: ins_label = np.stack(ins_label, axis=0) ins_ind_label_list.append(ins_ind_label) @@ -739,7 +740,6 @@ class Gt2Solov2Target(BaseOperator): sample['ins_label{}'.format(idx)] = ins_label sample['grid_order{}'.format(idx)] = np.asarray(grid_order) assert len(grid_order) > 0 - grid_offset.append(len(grid_order)) idx += 1 ins_ind_labels = np.concatenate([ ins_ind_labels_level_img @@ -747,6 +747,6 @@ class Gt2Solov2Target(BaseOperator): ]) fg_num = np.sum(ins_ind_labels) sample['fg_num'] = fg_num - sample['grid_offset'] = np.asarray(grid_offset).astype(np.int32) + sample_id += 1 return samples diff --git a/ppdet/modeling/anchor_heads/solov2_head.py b/ppdet/modeling/anchor_heads/solov2_head.py index 4005f2c9e9f59664cd6e8456021754788835dc76..f243af5733b3d1c2672f9cc8908af73bc90a6f5f 100644 --- a/ppdet/modeling/anchor_heads/solov2_head.py +++ b/ppdet/modeling/anchor_heads/solov2_head.py @@ -21,7 +21,7 @@ from paddle import fluid from paddle.fluid.param_attr import ParamAttr from paddle.fluid.regularizer import L2Decay -from ppdet.modeling.ops import ConvNorm, DeformConvNorm, MaskMatrixNMS +from ppdet.modeling.ops import ConvNorm, DeformConvNorm, MaskMatrixNMS, DropBlock from ppdet.core.workspace import register from ppdet.utils.check import check_version @@ -53,6 +53,7 @@ class SOLOv2Head(object): pre_nms_top_n (int): Number of total instance to be kept per image before NMS post_nms_top_n (int): Number of total instance to be kept per image after NMS. mask_nms (object): MaskMatrixNMS instance. + drop_block (bool): Whether use drop_block or not. """ __inject__ = [] __shared__ = ['num_classes'] @@ -74,7 +75,8 @@ class SOLOv2Head(object): pre_nms_top_n=500, post_nms_top_n=100, mask_nms=MaskMatrixNMS( - kernel='gaussian', sigma=2.0).__dict__): + kernel='gaussian', sigma=2.0).__dict__, + drop_block=False): check_version('2.0.0') self.num_classes = num_classes self.seg_num_grids = num_grids @@ -93,11 +95,12 @@ class SOLOv2Head(object): self.update_threshold = update_threshold self.pre_nms_top_n = pre_nms_top_n self.post_nms_top_n = post_nms_top_n + self.drop_block = drop_block self.conv_type = [ConvNorm, DeformConvNorm] if isinstance(mask_nms, dict): self.mask_nms = MaskMatrixNMS(**mask_nms) - def _conv_pred(self, conv_feat, num_filters, name, name_feat=None): + def _conv_pred(self, conv_feat, num_filters, is_test, name, name_feat=None): for i in range(self.stacked_convs): if i in self.dcn_v2_stages: conv_func = self.conv_type[1] @@ -122,6 +125,11 @@ class SOLOv2Head(object): initializer=fluid.initializer.Constant(value=bias_init)) else: bias_attr = ParamAttr(name="{}.bias".format(name_feat)) + + if self.drop_block: + conv_feat = DropBlock( + conv_feat, block_size=3, keep_prob=0.9, is_test=is_test) + conv_feat = fluid.layers.conv2d( input=conv_feat, num_filters=num_filters, @@ -167,14 +175,13 @@ class SOLOv2Head(object): align_corners=False, align_mode=0)) - def get_outputs(self, input, is_eval=False, batch_size=1): + def get_outputs(self, input, is_eval=False): """ Get SOLOv2 head output Args: input (list): List of Variables, output of backbone or neck stages is_eval (bool): whether in train or test mode - batch_size (int): batch size Returns: cate_pred_list (list): Variables of each category branch layer kernel_pred_list (list): Variables of each kernel branch layer @@ -184,13 +191,13 @@ class SOLOv2Head(object): kernel_pred_list = [] for idx in range(len(self.seg_num_grids)): cate_pred, kernel_pred = self._get_output_single( - feats[idx], idx, is_eval=is_eval, batch_size=batch_size) + feats[idx], idx, is_eval=is_eval) cate_pred_list.append(cate_pred) kernel_pred_list.append(kernel_pred) return cate_pred_list, kernel_pred_list - def _get_output_single(self, input, idx, is_eval=False, batch_size=1): + def _get_output_single(self, input, idx, is_eval=False): ins_kernel_feat = input # CoordConv x_range = paddle.linspace( @@ -200,8 +207,10 @@ class SOLOv2Head(object): y, x = paddle.tensor.meshgrid([y_range, x_range]) x = fluid.layers.unsqueeze(x, [0, 1]) y = fluid.layers.unsqueeze(y, [0, 1]) - y = fluid.layers.expand(y, expand_times=[batch_size, 1, 1, 1]) - x = fluid.layers.expand(x, expand_times=[batch_size, 1, 1, 1]) + y = fluid.layers.expand( + y, expand_times=[fluid.layers.shape(ins_kernel_feat)[0], 1, 1, 1]) + x = fluid.layers.expand( + x, expand_times=[fluid.layers.shape(ins_kernel_feat)[0], 1, 1, 1]) coord_feat = fluid.layers.concat([x, y], axis=1) ins_kernel_feat = fluid.layers.concat( [ins_kernel_feat, coord_feat], axis=1) @@ -220,6 +229,7 @@ class SOLOv2Head(object): kernel_pred = self._conv_pred( kernel_feat, self.kernel_out_channels, + is_eval, name='bbox_head.kernel_convs', name_feat='bbox_head.solo_kernel') @@ -227,6 +237,7 @@ class SOLOv2Head(object): cate_pred = self._conv_pred( cate_feat, self.cate_out_channels, + is_eval, name='bbox_head.cate_convs', name_feat='bbox_head.solo_cate') @@ -236,16 +247,8 @@ class SOLOv2Head(object): cate_pred = fluid.layers.transpose(cate_pred, [0, 2, 3, 1]) return cate_pred, kernel_pred - def get_loss(self, - cate_preds, - kernel_preds, - ins_pred, - ins_labels, - cate_labels, - grid_order_list, - fg_num, - grid_offset, - batch_size=1): + def get_loss(self, cate_preds, kernel_preds, ins_pred, ins_labels, + cate_labels, grid_order_list, fg_num): """ Get loss of network of SOLOv2. @@ -257,56 +260,49 @@ class SOLOv2Head(object): cate_labels (list): List of categroy labels pre batch. grid_order_list (list): List of index in pre grid. fg_num (int): Number of positive samples in a mini-batch. - grid_offset (list): List of offset of pre grid. - batch_size: Batch size. Returns: loss_ins (Variable): The instance loss Variable of SOLOv2 network. loss_cate (Variable): The category loss Variable of SOLOv2 network. """ new_kernel_preds = [] - grid_offset_list = fluid.layers.split( - grid_offset, num_or_sections=len(grid_order_list), dim=1) - pred_weight_list = [] - for kernel_preds_level, grid_orders_level, grid_offset_level in zip( - kernel_preds, grid_order_list, grid_offset_list): - tmp_list = [] - kernel_pred_weight = [] - start_order_num = fluid.layers.zeros(shape=[1], dtype='int32') - for i in range(batch_size): - reshape_pred = fluid.layers.reshape( - kernel_preds_level[i], - shape=(int(kernel_preds_level[i].shape[0]), -1)) - end_order_num = start_order_num + grid_offset_level[i] - grid_order_img = fluid.layers.slice( - grid_orders_level, - axes=[0], - starts=[start_order_num], - ends=[end_order_num]) - start_order_num = end_order_num - reshape_pred = fluid.layers.transpose(reshape_pred, [1, 0]) - reshape_pred = fluid.layers.gather( - reshape_pred, index=grid_order_img) - reshape_pred = fluid.layers.transpose(reshape_pred, [1, 0]) - tmp_list.append(reshape_pred) - new_kernel_preds.append(tmp_list) + pad_length_list = [] + for kernel_preds_level, grid_orders_level in zip(kernel_preds, + grid_order_list): + reshape_pred = fluid.layers.reshape( + kernel_preds_level, + shape=(fluid.layers.shape(kernel_preds_level)[0], + fluid.layers.shape(kernel_preds_level)[1], -1)) + reshape_pred = fluid.layers.transpose(reshape_pred, [0, 2, 1]) + reshape_pred = fluid.layers.reshape( + reshape_pred, shape=(-1, fluid.layers.shape(reshape_pred)[2])) + gathered_pred = fluid.layers.gather( + reshape_pred, index=grid_orders_level) + gathered_pred = fluid.layers.lod_reset(gathered_pred, + grid_orders_level) + pad_value = fluid.layers.assign(input=np.array( + [0.0], dtype=np.float32)) + pad_pred, pad_length = fluid.layers.sequence_pad( + gathered_pred, pad_value=pad_value) + new_kernel_preds.append(pad_pred) + pad_length_list.append(pad_length) # generate masks ins_pred_list = [] - for b_kernel_pred in new_kernel_preds: - b_mask_pred = [] - for idx, kernel_pred in enumerate(b_kernel_pred): - cur_ins_pred = ins_pred[idx] - cur_ins_pred = fluid.layers.unsqueeze(cur_ins_pred, 0) - kernel_pred = fluid.layers.transpose(kernel_pred, [1, 0]) - kernel_pred = fluid.layers.unsqueeze(kernel_pred, [2, 3]) - - ins_pred_conv = paddle.nn.functional.conv2d(cur_ins_pred, - kernel_pred) - cur_ins_pred = ins_pred_conv[0] - b_mask_pred.append(cur_ins_pred) - - b_mask_pred = fluid.layers.concat(b_mask_pred, axis=0) - ins_pred_list.append(b_mask_pred) + for kernel_pred, pad_length in zip(new_kernel_preds, pad_length_list): + cur_ins_pred = ins_pred + cur_ins_pred = fluid.layers.reshape( + cur_ins_pred, + shape=(fluid.layers.shape(cur_ins_pred)[0], + fluid.layers.shape(cur_ins_pred)[1], -1)) + ins_pred_conv = paddle.matmul(kernel_pred, cur_ins_pred) + cur_ins_pred = fluid.layers.reshape( + ins_pred_conv, + shape=(fluid.layers.shape(ins_pred_conv)[0], + fluid.layers.shape(ins_pred_conv)[1], + fluid.layers.shape(ins_pred)[-2], + fluid.layers.shape(ins_pred)[-1])) + cur_ins_pred = fluid.layers.sequence_unpad(cur_ins_pred, pad_length) + ins_pred_list.append(cur_ins_pred) num_ins = fluid.layers.reduce_sum(fg_num) diff --git a/ppdet/modeling/architectures/solov2.py b/ppdet/modeling/architectures/solov2.py index 76ac0a1419ef732aed3c86f9ab27117c5771dd74..86880bb59f4c154ee82ed2dbea211418d6d32dfd 100644 --- a/ppdet/modeling/architectures/solov2.py +++ b/ppdet/modeling/architectures/solov2.py @@ -36,25 +36,21 @@ class SOLOv2(object): fpn (object): feature pyramid network instance bbox_head (object): an `SOLOv2Head` instance mask_head (object): an `SOLOv2MaskHead` instance - train_batch_size (int): training batch size. """ __category__ = 'architecture' __inject__ = ['backbone', 'fpn', 'bbox_head', 'mask_head'] - __shared__ = ['train_batch_size'] def __init__(self, backbone, fpn=None, bbox_head='SOLOv2Head', - mask_head='SOLOv2MaskHead', - train_batch_size=1): + mask_head='SOLOv2MaskHead'): super(SOLOv2, self).__init__() self.backbone = backbone self.fpn = fpn self.bbox_head = bbox_head self.mask_head = mask_head - self.train_batch_size = train_batch_size def build(self, feed_vars, mode='train'): im = feed_vars['image'] @@ -78,19 +74,13 @@ class SOLOv2(object): if mixed_precision_enabled: body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats] - if not mode == 'train': - self.batch_size = 1 - else: - self.batch_size = self.train_batch_size - - mask_feat_pred = self.mask_head.get_output(body_feats, self.batch_size) + mask_feat_pred = self.mask_head.get_output(body_feats) if mode == 'train': ins_labels = [] cate_labels = [] grid_orders = [] fg_num = feed_vars['fg_num'] - grid_offset = feed_vars['grid_offset'] for i in range(5): ins_label = 'ins_label{}'.format(i) @@ -103,20 +93,17 @@ class SOLOv2(object): if grid_order in feed_vars: grid_orders.append(feed_vars[grid_order]) - cate_preds, kernel_preds = self.bbox_head.get_outputs( - body_feats, batch_size=self.batch_size) + cate_preds, kernel_preds = self.bbox_head.get_outputs(body_feats) losses = self.bbox_head.get_loss(cate_preds, kernel_preds, mask_feat_pred, ins_labels, - cate_labels, grid_orders, fg_num, - grid_offset, self.train_batch_size) + cate_labels, grid_orders, fg_num) total_loss = fluid.layers.sum(list(losses.values())) losses.update({'loss': total_loss}) return losses else: im_info = feed_vars['im_info'] - outs = self.bbox_head.get_outputs( - body_feats, is_eval=True, batch_size=self.batch_size) + outs = self.bbox_head.get_outputs(body_feats, is_eval=True) seg_inputs = outs + (mask_feat_pred, im_info) return self.bbox_head.get_prediction(*seg_inputs) @@ -148,7 +135,6 @@ class SOLOv2(object): 'grid_order3': {'shape': [None], 'dtype': 'int32', 'lod_level': 1}, 'grid_order4': {'shape': [None], 'dtype': 'int32', 'lod_level': 1}, 'fg_num': {'shape': [None], 'dtype': 'int32', 'lod_level': 0}, - 'grid_offset': {'shape': [None, 5], 'dtype': 'int32', 'lod_level': 0}, } # yapf: enable inputs_def.update(targets_def) @@ -163,7 +149,7 @@ class SOLOv2(object): inputs_def = self._inputs_def(image_shape, fields) if 'gt_segm' in fields: fields.remove('gt_segm') - fields.extend(['fg_num', 'grid_offset']) + fields.extend(['fg_num']) for i in range(5): fields.extend([ 'ins_label%d' % i, 'cate_label%d' % i, 'grid_order%d' % i diff --git a/ppdet/modeling/mask_head/solo_mask_head.py b/ppdet/modeling/mask_head/solo_mask_head.py index 58bcb5531e8b670ae174cc5a015fd6ecaa2cadc5..f37031b39d0ae8c7625c9efc44378e0a51482e14 100644 --- a/ppdet/modeling/mask_head/solo_mask_head.py +++ b/ppdet/modeling/mask_head/solo_mask_head.py @@ -113,13 +113,12 @@ class SOLOv2MaskHead(object): return conv_feat - def get_output(self, inputs, batch_size=1): + def get_output(self, inputs): """ Get SOLOv2MaskHead output. Args: inputs(list[Variable]): feature map from each necks with shape of [N, C, H, W] - batch_size (int): batch size Returns: ins_pred(Variable): Output of SOLOv2MaskHead head """ @@ -137,8 +136,12 @@ class SOLOv2MaskHead(object): y, x = paddle.tensor.meshgrid([y_range, x_range]) x = fluid.layers.unsqueeze(x, [0, 1]) y = fluid.layers.unsqueeze(y, [0, 1]) - y = fluid.layers.expand(y, expand_times=[batch_size, 1, 1, 1]) - x = fluid.layers.expand(x, expand_times=[batch_size, 1, 1, 1]) + y = fluid.layers.expand( + y, + expand_times=[fluid.layers.shape(input_feat)[0], 1, 1, 1]) + x = fluid.layers.expand( + x, + expand_times=[fluid.layers.shape(input_feat)[0], 1, 1, 1]) coord_feat = fluid.layers.concat([x, y], axis=1) input_p = fluid.layers.concat([input_p, coord_feat], axis=1) feature_add_all_level = fluid.layers.elementwise_add(