diff --git a/configs/solov2/solov2_r50_fpn_3x.yml b/configs/solov2/solov2_r50_fpn_3x.yml new file mode 100644 index 0000000000000000000000000000000000000000..ee1b38eba070377c732ced4440fce319bdabc274 --- /dev/null +++ b/configs/solov2/solov2_r50_fpn_3x.yml @@ -0,0 +1,90 @@ +architecture: SOLOv2 +use_gpu: true +max_iters: 270000 +snapshot_iter: 30000 +log_smooth_window: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar +metric: COCO +weights: output/solov2/solov2_r50_fpn_3x/model_final +num_classes: 81 + +SOLOv2: + backbone: ResNet + fpn: FPN + bbox_head: SOLOv2Head + mask_head: SOLOv2MaskHead + batch_size: 2 + +ResNet: + depth: 50 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + norm_type: bn + +FPN: + max_level: 6 + min_level: 2 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + reverse_out: True + +SOLOv2Head: + seg_feat_channels: 512 + stacked_convs: 4 + num_grids: [40, 36, 24, 16, 12] + kernel_out_channels: 256 + +SOLOv2MaskHead: + out_channels: 128 + start_level: 0 + end_level: 3 + num_classes: 256 + +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [180000, 240000] + - !LinearWarmup + start_factor: 0. + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +_READER_: 'solov2_reader.yml' +TrainReader: + sample_transforms: + - !DecodeImage + to_rgb: true + - !Poly2Mask {} + - !ResizeImage + target_size: [640, 672, 704, 736, 768, 800] + max_size: 1333 + interp: 1 + use_cv2: true + resize_box: true + - !RandomFlipImage + prob: 0.5 + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !Permute + to_bgr: false + channel_first: true + batch_transforms: + - !PadBatch + pad_to_stride: 32 + - !Gt2Solov2Target + num_grids: [40, 36, 24, 16, 12] + scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]] + coord_sigma: 0.2 diff --git a/ppdet/modeling/architectures/solov2.py b/ppdet/modeling/architectures/solov2.py index 756a269c4caea164e3c16a554d204c54428becfe..76ac0a1419ef732aed3c86f9ab27117c5771dd74 100644 --- a/ppdet/modeling/architectures/solov2.py +++ b/ppdet/modeling/architectures/solov2.py @@ -36,24 +36,25 @@ class SOLOv2(object): fpn (object): feature pyramid network instance bbox_head (object): an `SOLOv2Head` instance mask_head (object): an `SOLOv2MaskHead` instance - batch_size (int): batch size. + train_batch_size (int): training batch size. """ __category__ = 'architecture' __inject__ = ['backbone', 'fpn', 'bbox_head', 'mask_head'] + __shared__ = ['train_batch_size'] def __init__(self, backbone, fpn=None, bbox_head='SOLOv2Head', mask_head='SOLOv2MaskHead', - batch_size=1): + train_batch_size=1): super(SOLOv2, self).__init__() self.backbone = backbone self.fpn = fpn self.bbox_head = bbox_head self.mask_head = mask_head - self.batch_size = batch_size + self.train_batch_size = train_batch_size def build(self, feed_vars, mode='train'): im = feed_vars['image'] @@ -79,6 +80,8 @@ class SOLOv2(object): if not mode == 'train': self.batch_size = 1 + else: + self.batch_size = self.train_batch_size mask_feat_pred = self.mask_head.get_output(body_feats, self.batch_size) @@ -103,9 +106,10 @@ class SOLOv2(object): cate_preds, kernel_preds = self.bbox_head.get_outputs( body_feats, batch_size=self.batch_size) - losses = self.bbox_head.get_loss( - cate_preds, kernel_preds, mask_feat_pred, ins_labels, - cate_labels, grid_orders, fg_num, grid_offset, self.batch_size) + losses = self.bbox_head.get_loss(cate_preds, kernel_preds, + mask_feat_pred, ins_labels, + cate_labels, grid_orders, fg_num, + grid_offset, self.train_batch_size) total_loss = fluid.layers.sum(list(losses.values())) losses.update({'loss': total_loss}) return losses