From e9d7f8a89a8e2cd7b75d212c0bbbe99d9bbd8693 Mon Sep 17 00:00:00 2001 From: FDInSky <48318485+FDInSky@users.noreply.github.com> Date: Fri, 19 Jun 2020 14:22:18 +0800 Subject: [PATCH] add rcnn's modeling part (#862) add faster rcnn\mask\cascade rcnn model arch\module\op add base debug func in bufferdict --- ppdet/modeling/__init__.py | 11 + ppdet/modeling/anchor.py | 236 +++++++++++++ ppdet/modeling/architecture/__init__.py | 14 + ppdet/modeling/architecture/faster_rcnn.py | 97 ++++++ ppdet/modeling/architecture/mask_rcnn.py | 121 +++++++ ppdet/modeling/architecture/meta_arch.py | 36 ++ ppdet/modeling/backbone/__init__.py | 3 + ppdet/modeling/backbone/resnet.py | 276 +++++++++++++++ ppdet/modeling/head/__init__.py | 7 + ppdet/modeling/head/bbox_head.py | 137 ++++++++ ppdet/modeling/head/mask_head.py | 128 +++++++ ppdet/modeling/head/rpn_head.py | 109 ++++++ ppdet/modeling/ops.py | 372 +++++++++++++++++++++ ppdet/utils/data_structure.py | 45 +++ 14 files changed, 1592 insertions(+) create mode 100644 ppdet/modeling/__init__.py create mode 100644 ppdet/modeling/anchor.py create mode 100644 ppdet/modeling/architecture/__init__.py create mode 100644 ppdet/modeling/architecture/faster_rcnn.py create mode 100644 ppdet/modeling/architecture/mask_rcnn.py create mode 100644 ppdet/modeling/architecture/meta_arch.py create mode 100644 ppdet/modeling/backbone/__init__.py create mode 100755 ppdet/modeling/backbone/resnet.py create mode 100644 ppdet/modeling/head/__init__.py create mode 100644 ppdet/modeling/head/bbox_head.py create mode 100644 ppdet/modeling/head/mask_head.py create mode 100644 ppdet/modeling/head/rpn_head.py create mode 100644 ppdet/modeling/ops.py create mode 100644 ppdet/utils/data_structure.py diff --git a/ppdet/modeling/__init__.py b/ppdet/modeling/__init__.py new file mode 100644 index 000000000..5f344103e --- /dev/null +++ b/ppdet/modeling/__init__.py @@ -0,0 +1,11 @@ +from . import architecture +from . import backbone +from . import head +from . import ops +from . import anchor + +from .architecture import * +from .backbone import * +from .head import * +from .ops import * +from .anchor import * diff --git a/ppdet/modeling/anchor.py b/ppdet/modeling/anchor.py new file mode 100644 index 000000000..8abe04b45 --- /dev/null +++ b/ppdet/modeling/anchor.py @@ -0,0 +1,236 @@ +import numpy as np + +import paddle.fluid as fluid +from paddle.fluid.dygraph import Layer +from paddle.fluid.dygraph.base import to_variable + +from ppdet.core.workspace import register +from ppdet.modeling.ops import (AnchorGenerator, RPNAnchorTargetGenerator, + ProposalGenerator, ProposalTargetGenerator, + MaskTargetGenerator, DecodeClipNms) +# TODO: modify here into ppdet.modeling.ops like DecodeClipNms +from ppdet.py_op.post_process import mask_post_process + + +@register +class BBoxPostProcess(Layer): + def __init__(self, + decode=None, + clip=None, + nms=None, + decode_clip_nms=DecodeClipNms().__dict__): + super(BBoxPostProcess, self).__init__() + self.decode = decode + self.clip = clip + self.nms = nms + self.decode_clip_nms = decode_clip_nms + if isinstance(decode_clip_nms, dict): + self.decode_clip_nms = DecodeClipNms(**decode_clip_nms) + + def __call__(self, inputs): + # TODO: split into 3 steps + # TODO: modify related ops for deploying + # decode + # clip + # nms + outs = self.decode_clip_nms(inputs['rpn_rois'], inputs['bbox_prob'], + inputs['bbox_delta'], inputs['im_info']) + outs = {"predicted_bbox_nums": outs[0], "predicted_bbox": outs[1]} + return outs + + +@register +class MaskPostProcess(object): + __shared__ = ['num_classes'] + + def __init__(self, num_classes=81): + super(MaskPostProcess, self).__init__() + self.num_classes = num_classes + + def __call__(self, inputs): + # TODO: modify related ops for deploying + outs = mask_post_process(inputs['predicted_bbox_nums'].numpy(), + inputs['predicted_bbox'].numpy(), + inputs['mask_logits'].numpy(), + inputs['im_info'].numpy()) + outs = {'predicted_mask': outs} + return outs + + +@register +class Anchor(object): + __inject__ = ['anchor_generator', 'anchor_target_generator'] + + def __init__(self, + anchor_type='rpn', + anchor_generator=AnchorGenerator().__dict__, + anchor_target_generator=RPNAnchorTargetGenerator().__dict__): + super(Anchor, self).__init__() + self.anchor_generator = anchor_generator + self.anchor_target_generator = anchor_target_generator + if isinstance(anchor_generator, dict): + self.anchor_generator = AnchorGenerator(**anchor_generator) + if isinstance(anchor_target_generator, dict): + self.anchor_target_generator = RPNAnchorTargetGenerator( + **anchor_target_generator) + + def __call__(self, inputs): + outs = self.generate_anchors(inputs) + return outs + + def generate_anchors(self, inputs): + # TODO: update here to use int to specify featmap size + outs = self.anchor_generator(inputs['rpn_feat']) + outs = {'anchor': outs[0], 'var': outs[1], 'anchor_module': self} + return outs + + def generate_anchors_target(self, inputs): + # TODO: add yolo anchor targets + rpn_rois_score = fluid.layers.transpose( + inputs['rpn_rois_score'], perm=[0, 2, 3, 1]) + rpn_rois_delta = fluid.layers.transpose( + inputs['rpn_rois_delta'], perm=[0, 2, 3, 1]) + rpn_rois_score = fluid.layers.reshape( + x=rpn_rois_score, shape=(0, -1, 1)) + rpn_rois_delta = fluid.layers.reshape( + x=rpn_rois_delta, shape=(0, -1, 4)) + + anchor = fluid.layers.reshape(inputs['anchor'], shape=(-1, 4)) + #var = fluid.layers.reshape(inputs['var'], shape=(-1, 4)) + + score_pred, roi_pred, score_tgt, roi_tgt, roi_weight = self.anchor_target_generator( + bbox_pred=rpn_rois_delta, + cls_logits=rpn_rois_score, + anchor_box=anchor, + gt_boxes=inputs['gt_bbox'], + is_crowd=inputs['is_crowd'], + im_info=inputs['im_info']) + outs = { + 'rpn_score_pred': score_pred, + 'rpn_score_target': score_tgt, + 'rpn_rois_pred': roi_pred, + 'rpn_rois_target': roi_tgt, + 'rpn_rois_weight': roi_weight + } + return outs + + def post_process(self, ): + # TODO: whether move bbox post process to here + pass + + +@register +class Proposal(object): + __inject__ = [ + 'proposal_generator', 'proposal_target_generator', 'bbox_post_process' + ] + + def __init__( + self, + proposal_generator=ProposalGenerator().__dict__, + proposal_target_generator=ProposalTargetGenerator().__dict__, + bbox_post_process=BBoxPostProcess().__dict__, ): + super(Proposal, self).__init__() + self.proposal_generator = proposal_generator + self.proposal_target_generator = proposal_target_generator + self.bbox_post_process = bbox_post_process + if isinstance(proposal_generator, dict): + self.proposal_generator = ProposalGenerator(**proposal_generator) + if isinstance(proposal_target_generator, dict): + self.proposal_target_generator = ProposalTargetGenerator( + **proposal_target_generator) + if isinstance(bbox_post_process, dict): + self.bbox_post_process = BBoxPostProcess(**bbox_post_process) + + def __call__(self, inputs, stage=0): + outs = {} + if stage == 0: + proposal_out = self.generate_proposal(inputs) + inputs.update(proposal_out) + if inputs['mode'] == 'train': + proposal_target_out = self.generate_proposal_target(inputs, stage) + outs.update(proposal_target_out) + return outs + + def generate_proposal(self, inputs): + rpn_rois_prob = fluid.layers.sigmoid( + inputs['rpn_rois_score'], name='rpn_rois_prob') + outs = self.proposal_generator( + scores=rpn_rois_prob, + bbox_deltas=inputs['rpn_rois_delta'], + anchors=inputs['anchor'], + variances=inputs['var'], + im_info=inputs['im_info'], + mode=inputs['mode']) + outs = { + 'rpn_rois': outs[0], + 'rpn_rois_probs': outs[1], + 'rpn_rois_nums': outs[2] + } + return outs + + def generate_proposal_target(self, inputs, stage=0): + outs = self.proposal_target_generator( + rpn_rois=inputs['rpn_rois'], + rpn_rois_nums=inputs['rpn_rois_nums'], + gt_classes=inputs['gt_class'], + is_crowd=inputs['is_crowd'], + gt_boxes=inputs['gt_bbox'], + im_info=inputs['im_info'], + stage=stage) + outs = { + 'rois': outs[0], + 'labels_int32': outs[1], + 'bbox_targets': outs[2], + 'bbox_inside_weights': outs[3], + 'bbox_outside_weights': outs[4], + 'rois_nums': outs[5] + } + return outs + + def post_process(self, inputs): + outs = self.bbox_post_process(inputs) + return outs + + +@register +class Mask(object): + __inject__ = ['mask_target_generator', 'mask_post_process'] + + def __init__(self, + mask_target_generator=MaskTargetGenerator().__dict__, + mask_post_process=MaskPostProcess().__dict__): + super(Mask, self).__init__() + self.mask_target_generator = mask_target_generator + self.mask_post_process = mask_post_process + if isinstance(mask_target_generator, dict): + self.mask_target_generator = MaskTargetGenerator( + **mask_target_generator) + if isinstance(mask_post_process, dict): + self.mask_post_process = MaskPostProcess(**mask_post_process) + + def __call__(self, inputs): + outs = {} + if inputs['mode'] == 'train': + outs = self.generate_mask_target(inputs) + return outs + + def generate_mask_target(self, inputs): + outs = self.mask_target_generator( + im_info=inputs['im_info'], + gt_classes=inputs['gt_class'], + is_crowd=inputs['is_crowd'], + gt_segms=inputs['gt_mask'], + rois=inputs['rois'], + rois_nums=inputs['rois_nums'], + labels_int32=inputs['labels_int32'], ) + outs = { + 'mask_rois': outs[0], + 'rois_has_mask_int32': outs[1], + 'mask_int32': outs[2] + } + return outs + + def post_process(self, inputs): + outs = self.mask_post_process(inputs) + return outs diff --git a/ppdet/modeling/architecture/__init__.py b/ppdet/modeling/architecture/__init__.py new file mode 100644 index 000000000..70be4e9c7 --- /dev/null +++ b/ppdet/modeling/architecture/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +from . import meta_arch +from . import faster_rcnn +from . import mask_rcnn + +from .meta_arch import * +from .faster_rcnn import * +from .mask_rcnn import * diff --git a/ppdet/modeling/architecture/faster_rcnn.py b/ppdet/modeling/architecture/faster_rcnn.py new file mode 100644 index 000000000..c91c3a591 --- /dev/null +++ b/ppdet/modeling/architecture/faster_rcnn.py @@ -0,0 +1,97 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid + +from ppdet.core.workspace import register +from .meta_arch import BaseArch + +__all__ = ['FasterRCNN'] + + +@register +class FasterRCNN(BaseArch): + __category__ = 'architecture' + __inject__ = [ + 'anchor', + 'proposal', + 'backbone', + 'rpn_head', + 'bbox_head', + ] + + def __init__(self, + anchor, + proposal, + backbone, + rpn_head, + bbox_head, + rpn_only=False): + super(FasterRCNN, self).__init__() + self.anchor = anchor + self.proposal = proposal + self.backbone = backbone + self.rpn_head = rpn_head + self.bbox_head = bbox_head + self.rpn_only = rpn_only + + def forward(self, inputs, inputs_keys, mode='train'): + self.gbd = self.build_inputs(inputs, inputs_keys) + self.gbd['mode'] = mode + + # Backbone + bb_out = self.backbone(self.gbd) + self.gbd.update(bb_out) + + # RPN + rpn_head_out = self.rpn_head(self.gbd) + self.gbd.update(rpn_head_out) + + # Anchor + anchor_out = self.anchor(self.gbd) + self.gbd.update(anchor_out) + + # Proposal BBox + proposal_out = self.proposal(self.gbd) + self.gbd.update(proposal_out) + + # BBox Head + bbox_head_out = self.bbox_head(self.gbd) + self.gbd.update(bbox_head_out) + + if self.gbd['mode'] == 'infer': + bbox_out = self.proposal.post_process(self.gbd) + self.gbd.update(bbox_out) + + # result + if self.gbd['mode'] == 'train': + return self.loss(self.gbd) + elif self.gbd['mode'] == 'infer': + return self.infer(self.gbd) + else: + raise "Now, only support train or infer mode!" + + def loss(self, inputs): + losses = [] + rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(inputs) + bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(inputs) + losses = [rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss] + loss = fluid.layers.sum(losses) + out = { + 'loss': loss, + 'loss_rpn_cls': rpn_cls_loss, + 'loss_rpn_reg': rpn_reg_loss, + 'loss_bbox_cls': bbox_cls_loss, + 'loss_bbox_reg': bbox_reg_loss, + } + return out + + def infer(self, inputs): + outs = { + "bbox_nums": inputs['predicted_bbox_nums'].numpy(), + "bbox": inputs['predicted_bbox'].numpy(), + 'im_id': inputs['im_id'].numpy(), + 'im_shape': inputs['im_shape'].numpy() + } + return outs diff --git a/ppdet/modeling/architecture/mask_rcnn.py b/ppdet/modeling/architecture/mask_rcnn.py new file mode 100644 index 000000000..0002afc3b --- /dev/null +++ b/ppdet/modeling/architecture/mask_rcnn.py @@ -0,0 +1,121 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid + +from ppdet.core.workspace import register +from ppdet.utils.data_structure import BufferDict + +from .meta_arch import BaseArch +__all__ = ['MaskRCNN'] + + +@register +class MaskRCNN(BaseArch): + __category__ = 'architecture' + __inject__ = [ + 'anchor', + 'proposal', + 'mask', + 'backbone', + 'rpn_head', + 'bbox_head', + 'mask_head', + ] + + def __init__(self, + anchor, + proposal, + mask, + backbone, + rpn_head, + bbox_head, + mask_head, + rpn_only=False): + super(MaskRCNN, self).__init__() + + self.anchor = anchor + self.proposal = proposal + self.mask = mask + self.backbone = backbone + self.rpn_head = rpn_head + self.bbox_head = bbox_head + self.mask_head = mask_head + + def forward(self, inputs, inputs_keys, mode='train'): + self.gbd = self.build_inputs(inputs, inputs_keys) + self.gbd['mode'] = mode + + # Backbone + bb_out = self.backbone(self.gbd) + self.gbd.update(bb_out) + + # RPN + rpn_head_out = self.rpn_head(self.gbd) + self.gbd.update(rpn_head_out) + + # Anchor + anchor_out = self.anchor(self.gbd) + self.gbd.update(anchor_out) + + # Proposal BBox + proposal_out = self.proposal(self.gbd) + self.gbd.update(proposal_out) + + # BBox Head + bbox_head_out = self.bbox_head(self.gbd) + self.gbd.update(bbox_head_out) + + if self.gbd['mode'] == 'infer': + bbox_out = self.proposal.post_process(self.gbd) + self.gbd.update(bbox_out) + + # Mask + mask_out = self.mask(self.gbd) + self.gbd.update(mask_out) + + # Mask Head + mask_head_out = self.mask_head(self.gbd) + self.gbd.update(mask_head_out) + + if self.gbd['mode'] == 'infer': + mask_out = self.mask.post_process(self.gbd) + self.gbd.update(mask_out) + + # result + if self.gbd['mode'] == 'train': + return self.loss(self.gbd) + elif self.gbd['mode'] == 'infer': + self.infer(self.gbd) + else: + raise "Now, only support train or infer mode!" + + def loss(self, inputs): + losses = [] + rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(inputs) + bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(inputs) + mask_loss = self.mask_head.loss(inputs) + losses = [ + rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss, mask_loss + ] + loss = fluid.layers.sum(losses) + out = { + 'loss': loss, + 'loss_rpn_cls': rpn_cls_loss, + 'loss_rpn_reg': rpn_reg_loss, + 'loss_bbox_cls': bbox_cls_loss, + 'loss_bbox_reg': bbox_reg_loss, + 'loss_mask': mask_loss + } + return out + + def infer(self, inputs): + outs = { + 'bbox_nums': inputs['predicted_bbox_nums'].numpy(), + 'bbox': inputs['predicted_bbox'].numpy(), + 'mask': inputs['predicted_mask'].numpy(), + 'im_id': inputs['im_id'].numpy(), + 'im_shape': inputs['im_shape'].numpy() + } + return inputs diff --git a/ppdet/modeling/architecture/meta_arch.py b/ppdet/modeling/architecture/meta_arch.py new file mode 100644 index 000000000..ff2a7d93c --- /dev/null +++ b/ppdet/modeling/architecture/meta_arch.py @@ -0,0 +1,36 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from paddle import fluid +from paddle.fluid.dygraph import Layer +from paddle.fluid.dygraph.base import to_variable + +from ppdet.core.workspace import register +from ppdet.utils.data_structure import BufferDict + +__all__ = ['BaseArch'] + + +@register +class BaseArch(Layer): + def __init__(self, *args, **kwargs): + super(BaseArch, self).__init__() + + def forward(self, inputs, inputs_keys, mode='train'): + raise NotImplementedError("Should implement forward method!") + + def loss(self, inputs): + raise NotImplementedError("Should implement loss method!") + + def infer(self, inputs): + raise NotImplementedError("Should implement infer method!") + + def build_inputs(self, inputs, inputs_keys): + gbd = BufferDict() + for i, k in enumerate(inputs_keys): + v = to_variable(np.array([x[i] for x in inputs])) + gbd.set(k, v) + return gbd diff --git a/ppdet/modeling/backbone/__init__.py b/ppdet/modeling/backbone/__init__.py new file mode 100644 index 000000000..00399479a --- /dev/null +++ b/ppdet/modeling/backbone/__init__.py @@ -0,0 +1,3 @@ +from . import resnet + +from .resnet import * diff --git a/ppdet/modeling/backbone/resnet.py b/ppdet/modeling/backbone/resnet.py new file mode 100755 index 000000000..d8369ffcf --- /dev/null +++ b/ppdet/modeling/backbone/resnet.py @@ -0,0 +1,276 @@ +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.dygraph import Layer +from paddle.fluid.dygraph import Conv2D, Pool2D, BatchNorm +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Constant + +from ppdet.core.workspace import register, serializable + + +class ConvBNLayer(Layer): + def __init__(self, + name_scope, + ch_in, + ch_out, + filter_size, + stride, + padding, + act='relu', + learning_rate=1.0): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=ch_in, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=1, + act=act, + param_attr=ParamAttr( + name=name_scope + "_weights", learning_rate=learning_rate), + bias_attr=ParamAttr(name=name_scope + "_bias")) + + if name_scope == "conv1": + bn_name = "bn_" + name_scope + else: + bn_name = "bn" + name_scope[3:] + + self._bn = BatchNorm( + num_channels=ch_out, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + is_test=True) + + def forward(self, inputs): + x = self._conv(inputs) + out = self._bn(x) + return out + + +class ConvAffineLayer(Layer): + def __init__(self, + name_scope, + ch_in, + ch_out, + filter_size, + stride, + padding, + learning_rate=1.0, + act='relu'): + super(ConvAffineLayer, self).__init__() + + self._conv = Conv2D( + num_channels=ch_in, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr( + name=name_scope + "_weights", learning_rate=learning_rate), + bias_attr=False) + + if name_scope == "conv1": + bn_name = "bn_" + name_scope + else: + bn_name = "bn" + name_scope[3:] + self.name_scope = name_scope + + self.scale = fluid.Layer.create_parameter( + shape=[ch_out], + dtype='float32', + attr=ParamAttr( + name=bn_name + '_scale', learning_rate=0.), + default_initializer=Constant(1.)) + self.bias = fluid.layers.create_parameter( + shape=[ch_out], + dtype='float32', + attr=ParamAttr( + bn_name + '_offset', learning_rate=0.), + default_initializer=Constant(0.)) + + self.act = act + + def forward(self, inputs): + conv = self._conv(inputs) + out = fluid.layers.affine_channel( + x=conv, scale=self.scale, bias=self.bias) + if self.act == 'relu': + out = fluid.layers.relu(x=out) + return out + + +class BottleNeck(Layer): + def __init__(self, + name_scope, + ch_in, + ch_out, + stride, + shortcut=True, + learning_rate=1.0): + super(BottleNeck, self).__init__() + + self.shortcut = shortcut + if not shortcut: + self.short = ConvBNLayer( + name_scope + "_branch1", + ch_in=ch_in, + ch_out=ch_out * 4, + filter_size=1, + stride=stride, + padding=0, + act=None, + learning_rate=learning_rate) + + self.conv1 = ConvBNLayer( + name_scope + "_branch2a", + ch_in=ch_in, + ch_out=ch_out, + filter_size=1, + stride=stride, + padding=0, + learning_rate=learning_rate, ) + + self.conv2 = ConvBNLayer( + name_scope + "_branch2b", + ch_in=ch_out, + ch_out=ch_out, + filter_size=3, + stride=1, + padding=1, + learning_rate=learning_rate) + + self.conv3 = ConvBNLayer( + name_scope + "_branch2c", + ch_in=ch_out, + ch_out=ch_out * 4, + filter_size=1, + stride=1, + padding=0, + learning_rate=learning_rate, + act=None) + self.name_scope = name_scope + + def forward(self, inputs): + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + conv1 = self.conv1(inputs) + conv2 = self.conv2(conv1) + conv3 = self.conv3(conv2) + + out = fluid.layers.elementwise_add( + x=short, + y=conv3, + act='relu', + name=self.name_scope + ".add.output.5") + + return out + + +class Blocks(Layer): + def __init__(self, + name_scope, + ch_in, + ch_out, + count, + stride, + learning_rate=1.0): + super(Blocks, self).__init__() + + self.blocks = [] + for i in range(count): + if i == 0: + name = name_scope + "a" + self.stride = stride + self.shortcut = False + else: + name = name_scope + chr(ord("a") + i) + self.stride = 1 + self.shortcut = True + + block = self.add_sublayer( + name, + BottleNeck( + name, + ch_in=ch_in if i == 0 else ch_out * 4, + ch_out=ch_out, + stride=self.stride, + shortcut=self.shortcut, + learning_rate=learning_rate)) + self.blocks.append(block) + shortcut = True + + def forward(self, inputs): + res_out = self.blocks[0](inputs) + for block in self.blocks[1:]: + res_out = block(res_out) + return res_out + + +@register +@serializable +class ResNet(Layer): + def __init__( + self, + norm_type='bn', + depth=50, + feature_maps=4, + freeze_at=2, ): + super(ResNet, self).__init__() + + if depth == 50: + blocks = [3, 4, 6, 3] + elif depth == 101: + blocks = [3, 4, 23, 3] + elif depth == 152: + blocks = [3, 8, 36, 3] + + self.conv = ConvBNLayer( + "conv1", + ch_in=3, + ch_out=64, + filter_size=7, + stride=2, + padding=3, + learning_rate=0.) + + self.pool2d_max = Pool2D( + pool_type='max', pool_size=3, pool_stride=2, pool_padding=1) + + self.stage2 = Blocks( + "res2", + ch_in=64, + ch_out=64, + count=blocks[0], + stride=1, + learning_rate=0.) + + self.stage3 = Blocks( + "res3", ch_in=256, ch_out=128, count=blocks[1], stride=2) + + self.stage4 = Blocks( + "res4", ch_in=512, ch_out=256, count=blocks[2], stride=2) + + def forward(self, inputs): + x = inputs['image'] + + conv1 = self.conv(x) + poo1 = self.pool2d_max(conv1) + + res2 = self.stage2(poo1) + res2.stop_gradient = True + + res3 = self.stage3(res2) + + res4 = self.stage4(res3) + + outs = {'res2': res2, 'res3': res3, 'res4': res4} + return outs diff --git a/ppdet/modeling/head/__init__.py b/ppdet/modeling/head/__init__.py new file mode 100644 index 000000000..ebe78b61c --- /dev/null +++ b/ppdet/modeling/head/__init__.py @@ -0,0 +1,7 @@ +from . import rpn_head +from . import bbox_head +from . import mask_head + +from .rpn_head import * +from .bbox_head import * +from .mask_head import * diff --git a/ppdet/modeling/head/bbox_head.py b/ppdet/modeling/head/bbox_head.py new file mode 100644 index 000000000..ecd72d1b8 --- /dev/null +++ b/ppdet/modeling/head/bbox_head.py @@ -0,0 +1,137 @@ +import paddle.fluid as fluid +from paddle.fluid.dygraph import Layer + +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Normal, MSRA +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.dygraph.nn import Conv2D, Pool2D +from ppdet.core.workspace import register +from ..backbone.resnet import Blocks +from ..ops import RoIExtractor + + +@register +class BBoxFeat(Layer): + __inject__ = ['roi_extractor'] + + def __init__(self, + feat_in=1024, + feat_out=512, + roi_extractor=RoIExtractor().__dict__, + stage=0): + super(BBoxFeat, self).__init__() + self.roi_extractor = roi_extractor + if isinstance(roi_extractor, dict): + self.roi_extractor = RoIExtractor(**roi_extractor) + if stage == 0: + postfix = '' + else: + postfix = '_' + str(stage) + self.res5 = Blocks( + "res5", ch_in=feat_in, ch_out=feat_out, count=3, stride=2) + self.res5_pool = fluid.dygraph.Pool2D( + pool_type='avg', global_pooling=True) + + def forward(self, inputs): + if inputs['mode'] == 'train': + rois = inputs['rois'] + rois_num = inputs['rois_nums'] + elif inputs['mode'] == 'infer': + rois = inputs['rpn_rois'] + rois_num = inputs['rpn_rois_nums'] + else: + raise "BBoxFeat only support train or infer mode!" + + rois_feat = self.roi_extractor(inputs['res4'], rois, rois_num) + # TODO: add others + y_res5 = self.res5(rois_feat) + y = self.res5_pool(y_res5) + y = fluid.layers.squeeze(y, axes=[2, 3]) + outs = { + 'rois_feat': rois_feat, + 'res5': y_res5, + "bbox_feat": y, + 'shared_res5_block': self.res5, + 'shared_roi_extractor': self.roi_extractor + } + return outs + + +@register +class BBoxHead(Layer): + __shared__ = ['num_classes'] + __inject__ = ['bbox_feat'] + + def __init__(self, + in_feat=2048, + num_classes=81, + bbox_feat=BBoxFeat().__dict__, + stage=0): + super(BBoxHead, self).__init__() + self.num_classes = num_classes + self.bbox_feat = bbox_feat + if isinstance(bbox_feat, dict): + self.bbox_feat = BBoxFeat(**bbox_feat) + if stage == 0: + postfix = '' + else: + postfix = '_' + str(stage) + self.bbox_score = fluid.dygraph.Linear( + input_dim=in_feat, + output_dim=1 * self.num_classes, + act=None, + param_attr=ParamAttr( + name='cls_score_w' + postfix, + initializer=Normal( + loc=0.0, scale=0.001)), + bias_attr=ParamAttr( + name='cls_score_b' + postfix, + learning_rate=2., + regularizer=L2Decay(0.))) + + self.bbox_delta = fluid.dygraph.Linear( + input_dim=in_feat, + output_dim=4 * self.num_classes, + act=None, + param_attr=ParamAttr( + name='bbox_pred_w' + postfix, + initializer=Normal( + loc=0.0, scale=0.01)), + bias_attr=ParamAttr( + name='bbox_pred_b' + postfix, + learning_rate=2., + regularizer=L2Decay(0.))) + + def forward(self, inputs): + outs = self.bbox_feat(inputs) + x = outs['bbox_feat'] + bs = self.bbox_score(x) + bd = self.bbox_delta(x) + outs.update({'bbox_score': bs, 'bbox_delta': bd}) + if inputs['mode'] == 'infer': + bbox_prob = fluid.layers.softmax(bs, use_cudnn=False) + outs['bbox_prob'] = bbox_prob + return outs + + def loss(self, inputs): + # bbox cls + labels_int64 = fluid.layers.cast( + x=inputs['labels_int32'], dtype='int64') + labels_int64.stop_gradient = True + bbox_score = fluid.layers.reshape(inputs['bbox_score'], + (-1, self.num_classes)) + loss_bbox_cls = fluid.layers.softmax_with_cross_entropy( + logits=bbox_score, label=labels_int64) + loss_bbox_cls = fluid.layers.reduce_mean( + loss_bbox_cls, name='loss_bbox_cls') + # bbox reg + loss_bbox_reg = fluid.layers.smooth_l1( + x=inputs['bbox_delta'], + y=inputs['bbox_targets'], + inside_weight=inputs['bbox_inside_weights'], + outside_weight=inputs['bbox_outside_weights'], + sigma=1.0) + loss_bbox_reg = fluid.layers.reduce_mean( + loss_bbox_reg, name='loss_bbox_loc') + + return loss_bbox_cls, loss_bbox_reg diff --git a/ppdet/modeling/head/mask_head.py b/ppdet/modeling/head/mask_head.py new file mode 100644 index 000000000..1e904c0b0 --- /dev/null +++ b/ppdet/modeling/head/mask_head.py @@ -0,0 +1,128 @@ +import paddle.fluid as fluid +from paddle.fluid.dygraph import Layer + +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Normal, MSRA +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.dygraph.nn import Conv2D, Pool2D +from ppdet.core.workspace import register +from ..ops import RoIExtractor +from ..backbone.resnet import Blocks + + +@register +class MaskFeat(Layer): + __inject__ = ['mask_roi_extractor'] + + def __init__(self, + feat_in=2048, + feat_out=256, + mask_roi_extractor=RoIExtractor().__dict__, + stage=0): + super(MaskFeat, self).__init__() + self.feat_in = feat_in + self.feat_out = feat_out + self.mask_roi_extractor = mask_roi_extractor + if isinstance(mask_roi_extractor, dict): + self.mask_roi_extractor = RoIExtractor(**mask_roi_extractor) + if stage == 0: + postfix = '' + else: + postfix = '_' + str(stage) + self.upsample = fluid.dygraph.Conv2DTranspose( + num_channels=self.feat_in, + num_filters=self.feat_out, + filter_size=2, + stride=2, + act='relu', + param_attr=ParamAttr( + name='conv5_mask_w' + postfix, initializer=MSRA(uniform=False)), + bias_attr=ParamAttr( + name='conv5_mask_b' + postfix, + learning_rate=2., + regularizer=L2Decay(0.))) + + def forward(self, inputs): + if inputs['mode'] == 'train': + x = inputs['res5'] + rois_feat = fluid.layers.gather(x, inputs['rois_has_mask_int32']) + elif inputs['mode'] == 'infer': + rois = inputs['predicted_bbox'][:, 2:] * inputs['im_info'][:, 2] + rois_num = inputs['predicted_bbox_nums'] + # TODO: optim here + if callable(inputs['shared_roi_extractor']): + rois_feat = inputs['shared_roi_extractor'](inputs['res4'], rois, + rois_num) + if callable(inputs['shared_res5_block']): + rois_feat = inputs['shared_res5_block'](rois_feat) + # upsample + y = self.upsample(rois_feat) + outs = {'mask_feat': y} + return outs + + +@register +class MaskHead(Layer): + __shared__ = ['num_classes'] + __inject__ = ['mask_feat'] + + def __init__(self, + feat_in=256, + resolution=14, + num_classes=81, + mask_feat=MaskFeat().__dict__, + stage=0): + super(MaskHead, self).__init__() + self.feat_in = feat_in + self.resolution = resolution + self.num_classes = num_classes + self.mask_feat = mask_feat + if isinstance(mask_feat, dict): + self.mask_feat = MaskFeat(**mask_feat) + if stage == 0: + postfix = '' + else: + postfix = '_' + str(stage) + self.mask_fcn_logits = fluid.dygraph.Conv2D( + num_channels=self.feat_in, + num_filters=self.num_classes, + filter_size=1, + param_attr=ParamAttr( + name='mask_fcn_logits_w' + postfix, + initializer=MSRA(uniform=False)), + bias_attr=ParamAttr( + name='mask_fcn_logits_b' + postfix, + learning_rate=2., + regularizer=L2Decay(0.0))) + + def forward(self, inputs): + # feat + outs = self.mask_feat(inputs) + x = outs['mask_feat'] + # logits + mask_logits = self.mask_fcn_logits(x) + if inputs['mode'] == 'infer': + pred_bbox = inputs['predicted_bbox'] + shape = reduce((lambda x, y: x * y), pred_bbox.shape) + shape = np.asarray(shape).reshape((1, 1)) + ones = np.ones((1, 1), dtype=np.int32) + cond = (shape == ones).all() + if cond: + mask_logits = pred_bbox + + outs['mask_logits'] = mask_logits + + return outs + + def loss(self, inputs): + # input needs (model_out, target) + reshape_dim = self.num_classes * self.resolution * self.resolution + mask_logits = fluid.layers.reshape(inputs['mask_logits'], + (-1, reshape_dim)) + mask_label = fluid.layers.cast(x=inputs['mask_int32'], dtype='float32') + + loss_mask = fluid.layers.sigmoid_cross_entropy_with_logits( + x=mask_logits, label=mask_label, ignore_index=-1, normalize=True) + loss_mask = fluid.layers.reduce_sum(loss_mask, name='loss_mask') + + return loss_mask diff --git a/ppdet/modeling/head/rpn_head.py b/ppdet/modeling/head/rpn_head.py new file mode 100644 index 000000000..9572e8c59 --- /dev/null +++ b/ppdet/modeling/head/rpn_head.py @@ -0,0 +1,109 @@ +import paddle.fluid as fluid +from paddle.fluid.dygraph import Layer +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Normal +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.dygraph.nn import Conv2D + +from ppdet.core.workspace import register +from ..ops import RPNAnchorTargetGenerator + + +@register +class RPNFeat(Layer): + def __init__(self, feat_in=1024, feat_out=1024): + super(RPNFeat, self).__init__() + self.rpn_conv = fluid.dygraph.Conv2D( + num_channels=1024, + num_filters=1024, + filter_size=3, + stride=1, + padding=1, + act='relu', + param_attr=ParamAttr( + "conv_rpn_w", initializer=Normal( + loc=0., scale=0.01)), + bias_attr=ParamAttr( + "conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) + + def forward(self, inputs): + x = inputs.get('res4') + y = self.rpn_conv(x) + outs = {'rpn_feat': y} + return outs + + +@register +class RPNHead(Layer): + __inject__ = ['rpn_feat'] + + def __init__(self, anchor_per_position=15, rpn_feat=RPNFeat().__dict__): + super(RPNHead, self).__init__() + self.anchor_per_position = anchor_per_position + self.rpn_feat = rpn_feat + if isinstance(rpn_feat, dict): + self.rpn_feat = RPNFeat(**rpn_feat) + + # rpn roi classification scores + self.rpn_rois_score = fluid.dygraph.Conv2D( + num_channels=1024, + num_filters=1 * self.anchor_per_position, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr( + name="rpn_cls_logits_w", initializer=Normal( + loc=0., scale=0.01)), + bias_attr=ParamAttr( + name="rpn_cls_logits_b", + learning_rate=2., + regularizer=L2Decay(0.))) + + # rpn roi bbox regression deltas + self.rpn_rois_delta = fluid.dygraph.Conv2D( + num_channels=1024, + num_filters=4 * self.anchor_per_position, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr( + name="rpn_bbox_pred_w", initializer=Normal( + loc=0., scale=0.01)), + bias_attr=ParamAttr( + name="rpn_bbox_pred_b", + learning_rate=2., + regularizer=L2Decay(0.))) + + def forward(self, inputs): + outs = self.rpn_feat(inputs) + x = outs['rpn_feat'] + rrs = self.rpn_rois_score(x) + rrd = self.rpn_rois_delta(x) + outs.update({'rpn_rois_score': rrs, 'rpn_rois_delta': rrd}) + return outs + + def loss(self, inputs): + if callable(inputs['anchor_module']): + rpn_targets = inputs['anchor_module'].generate_anchors_target( + inputs) + # cls loss + score_tgt = fluid.layers.cast( + x=rpn_targets['rpn_score_target'], dtype='float32') + rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( + x=rpn_targets['rpn_score_pred'], label=score_tgt) + rpn_cls_loss = fluid.layers.reduce_mean( + rpn_cls_loss, name='loss_rpn_cls') + + # reg loss + rpn_reg_loss = fluid.layers.smooth_l1( + x=rpn_targets['rpn_rois_pred'], + y=rpn_targets['rpn_rois_target'], + sigma=3.0, + inside_weight=rpn_targets['rpn_rois_weight'], + outside_weight=rpn_targets['rpn_rois_weight']) + rpn_reg_loss = fluid.layers.reduce_mean( + rpn_reg_loss, name='loss_rpn_reg') + + return rpn_cls_loss, rpn_reg_loss diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py new file mode 100644 index 000000000..544fb72aa --- /dev/null +++ b/ppdet/modeling/ops.py @@ -0,0 +1,372 @@ +import numpy as np +from numbers import Integral +import paddle.fluid as fluid +from paddle.fluid.dygraph.base import to_variable +from ppdet.core.workspace import register, serializable +from ppdet.py_op.target import generate_rpn_anchor_target, generate_proposal_target, generate_mask_target +from ppdet.py_op.post_process import bbox_post_process + + +@register +@serializable +class AnchorGenerator(object): + def __init__(self, + anchor_sizes=[32, 64, 128, 256, 512], + aspect_ratios=[0.5, 1.0, 2.0], + stride=[16.0, 16.0], + variance=[1.0, 1.0, 1.0, 1.0]): + super(AnchorGenerator, self).__init__() + self.anchor_sizes = anchor_sizes + self.aspect_ratios = aspect_ratios + self.stride = stride + self.variance = variance + + def __call__(self, inputs): + outs = fluid.layers.anchor_generator( + input=inputs, + anchor_sizes=self.anchor_sizes, + aspect_ratios=self.aspect_ratios, + stride=self.stride, + variance=self.variance) + return outs + + +@register +@serializable +class RPNAnchorTargetGenerator(object): + def __init__(self, + batch_size_per_im=256, + straddle_thresh=0., + fg_fraction=0.5, + positive_overlap=0.7, + negative_overlap=0.3, + use_random=True): + super(RPNAnchorTargetGenerator, self).__init__() + self.batch_size_per_im = batch_size_per_im + self.straddle_thresh = straddle_thresh + self.fg_fraction = fg_fraction + self.positive_overlap = positive_overlap + self.negative_overlap = negative_overlap + self.use_random = use_random + + def __call__(self, cls_logits, bbox_pred, anchor_box, gt_boxes, is_crowd, + im_info): + anchor_box = anchor_box.numpy() + gt_boxes = gt_boxes.numpy() + is_crowd = is_crowd.numpy() + im_info = im_info.numpy() + + loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights = generate_rpn_anchor_target( + anchor_box, gt_boxes, is_crowd, im_info, self.straddle_thresh, + self.batch_size_per_im, self.positive_overlap, + self.negative_overlap, self.fg_fraction, self.use_random) + + loc_indexes = to_variable(loc_indexes) + score_indexes = to_variable(score_indexes) + tgt_labels = to_variable(tgt_labels) + tgt_bboxes = to_variable(tgt_bboxes) + bbox_inside_weights = to_variable(bbox_inside_weights) + + loc_indexes.stop_gradient = True + score_indexes.stop_gradient = True + tgt_labels.stop_gradient = True + + cls_logits = fluid.layers.reshape(x=cls_logits, shape=(-1, )) + bbox_pred = fluid.layers.reshape(x=bbox_pred, shape=(-1, 4)) + pred_cls_logits = fluid.layers.gather(cls_logits, score_indexes) + pred_bbox_pred = fluid.layers.gather(bbox_pred, loc_indexes) + + return pred_cls_logits, pred_bbox_pred, tgt_labels, tgt_bboxes, bbox_inside_weights + + +@register +@serializable +class ProposalGenerator(object): + __append_doc__ = True + + def __init__(self, + train_pre_nms_top_n=12000, + train_post_nms_top_n=2000, + infer_pre_nms_top_n=6000, + infer_post_nms_top_n=1000, + nms_thresh=.5, + min_size=.1, + eta=1., + return_rois_num=True): + super(ProposalGenerator, self).__init__() + self.train_pre_nms_top_n = train_pre_nms_top_n + self.train_post_nms_top_n = train_post_nms_top_n + self.infer_pre_nms_top_n = infer_pre_nms_top_n + self.infer_post_nms_top_n = infer_post_nms_top_n + self.nms_thresh = nms_thresh + self.min_size = min_size + self.eta = eta + self.return_rois_num = return_rois_num + + def __call__(self, + scores, + bbox_deltas, + anchors, + variances, + im_info, + mode='train'): + pre_nms_top_n = self.train_pre_nms_top_n if mode == 'train' else self.infer_pre_nms_top_n + post_nms_top_n = self.train_post_nms_top_n if mode == 'train' else self.infer_post_nms_top_n + outs = fluid.layers.generate_proposals( + scores, + bbox_deltas, + im_info, + anchors, + variances, + pre_nms_top_n=pre_nms_top_n, + post_nms_top_n=post_nms_top_n, + nms_thresh=self.nms_thresh, + min_size=self.min_size, + eta=self.eta, + return_rois_num=self.return_rois_num) + return outs + + +@register +@serializable +class ProposalTargetGenerator(object): + __shared__ = ['num_classes'] + + def __init__(self, + batch_size_per_im=512, + fg_fraction=.25, + fg_thresh=[.5, ], + bg_thresh_hi=[.5, ], + bg_thresh_lo=[0., ], + bbox_reg_weights=[[0.1, 0.1, 0.2, 0.2]], + num_classes=81, + shuffle_before_sample=True, + is_cls_agnostic=False, + is_cascade_rcnn=False): + super(ProposalTargetGenerator, self).__init__() + self.batch_size_per_im = batch_size_per_im + self.fg_fraction = fg_fraction + self.fg_thresh = fg_thresh + self.bg_thresh_hi = bg_thresh_hi + self.bg_thresh_lo = bg_thresh_lo + self.bbox_reg_weights = bbox_reg_weights + self.num_classes = num_classes + self.use_random = shuffle_before_sample + self.is_cls_agnostic = is_cls_agnostic, + self.is_cascade_rcnn = is_cascade_rcnn + + def __call__(self, + rpn_rois, + rpn_rois_nums, + gt_classes, + is_crowd, + gt_boxes, + im_info, + stage=0): + rpn_rois = rpn_rois.numpy() + rpn_rois_nums = rpn_rois_nums.numpy() + gt_classes = gt_classes.numpy() + gt_boxes = gt_boxes.numpy() + is_crowd = is_crowd.numpy() + im_info = im_info.numpy() + outs = generate_proposal_target( + rpn_rois, rpn_rois_nums, gt_classes, is_crowd, gt_boxes, im_info, + self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage], + self.bg_thresh_hi[stage], self.bg_thresh_lo[stage], + self.bbox_reg_weights[stage], self.num_classes, self.use_random, + self.is_cls_agnostic, self.is_cascade_rcnn) + + outs = [to_variable(v) for v in outs] + for v in outs: + v.stop_gradient = True + return outs + + +@register +@serializable +class MaskTargetGenerator(object): + __shared__ = ['num_classes'] + + def __init__(self, num_classes=81, resolution=14): + super(MaskTargetGenerator, self).__init__() + self.num_classes = num_classes + self.resolution = resolution + + def __call__(self, im_info, gt_classes, is_crowd, gt_segms, rois, rois_nums, + labels_int32): + im_info = im_info.numpy() + gt_classes = gt_classes.numpy() + is_crowd = is_crowd.numpy() + gt_segms = gt_segms.numpy() + rois = rois.numpy() + rois_nums = rois_nums.numpy() + labels_int32 = labels_int32.numpy() + outs = generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, + rois, rois_nums, labels_int32, + self.num_classes, self.resolution) + + outs = [to_variable(v) for v in outs] + for v in outs: + v.stop_gradient = True + return outs + + +@register +class RoIExtractor(object): + def __init__(self, + resolution=7, + spatial_scale=1. / 16, + sampling_ratio=0, + extractor_type='RoIPool'): + super(RoIExtractor, self).__init__() + if isinstance(resolution, Integral): + resolution = [resolution, resolution] + self.resolution = resolution + self.spatial_scale = spatial_scale + self.sampling_ratio = sampling_ratio + self.extractor_type = extractor_type + + def __call__(self, feat, rois, rois_nums): + cur_l = 0 + new_nums = [cur_l] + rois_nums_np = rois_nums.numpy() + for l in rois_nums_np: + cur_l += l + new_nums.append(cur_l) + nums_t = to_variable(np.asarray(new_nums)) + if self.extractor_type == 'RoIAlign': + rois_feat = fluid.layers.roi_align( + feat, + rois, + self.resolution[0], + self.resolution[1], + self.spatial_scale, + rois_lod=nums_t) + elif self.extractor_type == 'RoIPool': + rois_feat = fluid.layers.roi_pool( + feat, + rois, + self.resolution[0], + self.resolution[1], + self.spatial_scale, + rois_lod=nums_t) + + return rois_feat + + +@register +@serializable +class DecodeClipNms(object): + __shared__ = ['num_classes'] + + def __init__( + self, + num_classes=81, + keep_top_k=100, + score_threshold=0.05, + nms_threshold=0.5, ): + super(DecodeClipNms, self).__init__() + self.num_classes = num_classes + self.keep_top_k = keep_top_k + self.score_threshold = score_threshold + self.nms_threshold = nms_threshold + + def __call__(self, bbox, bbox_prob, bbox_delta, img_info): + outs = bbox_post_process(bbox.numpy(), + bbox_prob.numpy(), + bbox_delta.numpy(), + img_info.numpy(), self.keep_top_k, + self.score_threshold, self.nms_threshold, + self.num_classes) + outs = [to_variable(v) for v in outs] + for v in outs: + v.stop_gradient = True + return outs + + +@register +@serializable +class AnchorGrid(object): + """Generate anchor grid + + Args: + image_size (int or list): input image size, may be a single integer or + list of [h, w]. Default: 512 + min_level (int): min level of the feature pyramid. Default: 3 + max_level (int): max level of the feature pyramid. Default: 7 + anchor_base_scale: base anchor scale. Default: 4 + num_scales: number of anchor scales. Default: 3 + aspect_ratios: aspect ratios. default: [[1, 1], [1.4, 0.7], [0.7, 1.4]] + """ + + def __init__(self, + image_size=512, + min_level=3, + max_level=7, + anchor_base_scale=4, + num_scales=3, + aspect_ratios=[[1, 1], [1.4, 0.7], [0.7, 1.4]]): + super(AnchorGrid, self).__init__() + if isinstance(image_size, Integral): + self.image_size = [image_size, image_size] + else: + self.image_size = image_size + for dim in self.image_size: + assert dim % 2 ** max_level == 0, \ + "image size should be multiple of the max level stride" + self.min_level = min_level + self.max_level = max_level + self.anchor_base_scale = anchor_base_scale + self.num_scales = num_scales + self.aspect_ratios = aspect_ratios + + @property + def base_cell(self): + if not hasattr(self, '_base_cell'): + self._base_cell = self.make_cell() + return self._base_cell + + def make_cell(self): + scales = [2**(i / self.num_scales) for i in range(self.num_scales)] + scales = np.array(scales) + ratios = np.array(self.aspect_ratios) + ws = np.outer(scales, ratios[:, 0]).reshape(-1, 1) + hs = np.outer(scales, ratios[:, 1]).reshape(-1, 1) + anchors = np.hstack((-0.5 * ws, -0.5 * hs, 0.5 * ws, 0.5 * hs)) + return anchors + + def make_grid(self, stride): + cell = self.base_cell * stride * self.anchor_base_scale + x_steps = np.arange(stride // 2, self.image_size[1], stride) + y_steps = np.arange(stride // 2, self.image_size[0], stride) + offset_x, offset_y = np.meshgrid(x_steps, y_steps) + offset_x = offset_x.flatten() + offset_y = offset_y.flatten() + offsets = np.stack((offset_x, offset_y, offset_x, offset_y), axis=-1) + offsets = offsets[:, np.newaxis, :] + return (cell + offsets).reshape(-1, 4) + + def generate(self): + return [ + self.make_grid(2**l) + for l in range(self.min_level, self.max_level + 1) + ] + + def __call__(self): + if not hasattr(self, '_anchor_vars'): + anchor_vars = [] + helper = LayerHelper('anchor_grid') + for idx, l in enumerate(range(self.min_level, self.max_level + 1)): + stride = 2**l + anchors = self.make_grid(stride) + var = helper.create_parameter( + attr=ParamAttr(name='anchors_{}'.format(idx)), + shape=anchors.shape, + dtype='float32', + stop_gradient=True, + default_initializer=NumpyArrayInitializer(anchors)) + anchor_vars.append(var) + var.persistable = True + self._anchor_vars = anchor_vars + + return self._anchor_vars diff --git a/ppdet/utils/data_structure.py b/ppdet/utils/data_structure.py new file mode 100644 index 000000000..b55504172 --- /dev/null +++ b/ppdet/utils/data_structure.py @@ -0,0 +1,45 @@ +import numpy as np + + +class BufferDict(dict): + def __init__(self, **kwargs): + super(BufferDict, self).__init__(**kwargs) + + def __getitem__(self, key): + if key in self.keys(): + return super(BufferDict, self).__getitem__(key) + else: + raise Exception("The %s is not in global inputs dict" % key) + + def __setitem__(self, key, value): + if key not in self.keys(): + super(BufferDict, self).__setitem__(key, value) + else: + raise Exception("The %s is already in global inputs dict" % key) + + def update(self, *args, **kwargs): + for k, v in dict(*args, **kwargs).items(): + self[k] = v + + def get(self, key): + return self.__getitem__(key) + + def set(self, key, value): + self.__setitem__(key, value) + + def debug(self, dshape=True, dtype=False, dvalue=False, name='all'): + if name == 'all': + ditems = self.items() + else: + ditems = self.get(name) + + for k, v in ditems: + info = [k] + if dshape == True and hasattr(v, 'shape'): + info.append(v.shape) + if dtype == True: + info.append(type(v)) + if dvalue == True and hasattr(v, 'numpy'): + info.append(np.mean(np.abs(v.numpy()))) + + print(info) -- GitLab