未验证 提交 988574fe 编写于 作者: W wangguanzhong 提交者: GitHub

add coments for rcnn (#2461)

上级 10d13c43
...@@ -64,7 +64,7 @@ BBoxAssigner: ...@@ -64,7 +64,7 @@ BBoxAssigner:
use_random: True use_random: True
CascadeTwoFCHead: CascadeTwoFCHead:
mlp_dim: 1024 out_channel: 1024
BBoxPostProcess: BBoxPostProcess:
decode: decode:
...@@ -88,7 +88,7 @@ MaskHead: ...@@ -88,7 +88,7 @@ MaskHead:
MaskFeat: MaskFeat:
num_convs: 4 num_convs: 4
out_channels: 256 out_channel: 256
MaskAssigner: MaskAssigner:
mask_resolution: 28 mask_resolution: 28
......
...@@ -62,7 +62,7 @@ BBoxAssigner: ...@@ -62,7 +62,7 @@ BBoxAssigner:
use_random: True use_random: True
CascadeTwoFCHead: CascadeTwoFCHead:
mlp_dim: 1024 out_channel: 1024
BBoxPostProcess: BBoxPostProcess:
decode: decode:
......
...@@ -61,7 +61,7 @@ BBoxAssigner: ...@@ -61,7 +61,7 @@ BBoxAssigner:
use_random: True use_random: True
TwoFCHead: TwoFCHead:
mlp_dim: 1024 out_channel: 1024
BBoxPostProcess: BBoxPostProcess:
......
...@@ -31,7 +31,7 @@ CascadeHead: ...@@ -31,7 +31,7 @@ CascadeHead:
CascadeXConvNormHead: CascadeXConvNormHead:
num_convs: 4 num_convs: 4
mlp_dim: 1024 out_channel: 1024
norm_type: gn norm_type: gn
MaskHead: MaskHead:
...@@ -45,7 +45,7 @@ MaskHead: ...@@ -45,7 +45,7 @@ MaskHead:
MaskFeat: MaskFeat:
num_convs: 4 num_convs: 4
out_channels: 256 out_channel: 256
norm_type: gn norm_type: gn
......
...@@ -21,7 +21,7 @@ CascadeHead: ...@@ -21,7 +21,7 @@ CascadeHead:
CascadeXConvNormHead: CascadeXConvNormHead:
num_convs: 4 num_convs: 4
mlp_dim: 1024 out_channel: 1024
norm_type: gn norm_type: gn
......
...@@ -29,7 +29,7 @@ BBoxHead: ...@@ -29,7 +29,7 @@ BBoxHead:
XConvNormHead: XConvNormHead:
num_convs: 4 num_convs: 4
mlp_dim: 1024 out_channel: 1024
norm_type: gn norm_type: gn
......
...@@ -31,7 +31,7 @@ BBoxHead: ...@@ -31,7 +31,7 @@ BBoxHead:
XConvNormHead: XConvNormHead:
num_convs: 4 num_convs: 4
mlp_dim: 1024 out_channel: 1024
norm_type: gn norm_type: gn
MaskHead: MaskHead:
...@@ -45,7 +45,7 @@ MaskHead: ...@@ -45,7 +45,7 @@ MaskHead:
MaskFeat: MaskFeat:
num_convs: 4 num_convs: 4
out_channels: 256 out_channel: 256
norm_type: gn norm_type: gn
......
...@@ -57,7 +57,7 @@ BBoxAssigner: ...@@ -57,7 +57,7 @@ BBoxAssigner:
use_random: True use_random: True
TwoFCHead: TwoFCHead:
mlp_dim: 1024 out_channel: 1024
BBoxPostProcess: BBoxPostProcess:
decode: RCNNBox decode: RCNNBox
......
...@@ -78,7 +78,7 @@ MaskHead: ...@@ -78,7 +78,7 @@ MaskHead:
MaskFeat: MaskFeat:
num_convs: 0 num_convs: 0
out_channels: 256 out_channel: 256
MaskAssigner: MaskAssigner:
mask_resolution: 14 mask_resolution: 14
......
...@@ -61,7 +61,7 @@ BBoxAssigner: ...@@ -61,7 +61,7 @@ BBoxAssigner:
use_random: True use_random: True
TwoFCHead: TwoFCHead:
mlp_dim: 1024 out_channel: 1024
BBoxPostProcess: BBoxPostProcess:
decode: RCNNBox decode: RCNNBox
...@@ -82,7 +82,7 @@ MaskHead: ...@@ -82,7 +82,7 @@ MaskHead:
MaskFeat: MaskFeat:
num_convs: 4 num_convs: 4
out_channels: 256 out_channel: 256
MaskAssigner: MaskAssigner:
mask_resolution: 28 mask_resolution: 28
......
...@@ -31,31 +31,40 @@ __all__ = ['TwoFCHead', 'XConvNormHead', 'BBoxHead'] ...@@ -31,31 +31,40 @@ __all__ = ['TwoFCHead', 'XConvNormHead', 'BBoxHead']
@register @register
class TwoFCHead(nn.Layer): class TwoFCHead(nn.Layer):
def __init__(self, in_dim=256, mlp_dim=1024, resolution=7): """
RCNN bbox head with Two fc layers to extract feature
Args:
in_channel (int): Input channel which can be derived by from_config
out_channel (int): Output channel
resolution (int): Resolution of input feature map, default 7
"""
def __init__(self, in_channel=256, out_channel=1024, resolution=7):
super(TwoFCHead, self).__init__() super(TwoFCHead, self).__init__()
self.in_dim = in_dim self.in_channel = in_channel
self.mlp_dim = mlp_dim self.out_channel = out_channel
fan = in_dim * resolution * resolution fan = in_channel * resolution * resolution
self.fc6 = nn.Linear( self.fc6 = nn.Linear(
in_dim * resolution * resolution, in_channel * resolution * resolution,
mlp_dim, out_channel,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
initializer=XavierUniform(fan_out=fan))) initializer=XavierUniform(fan_out=fan)))
self.fc7 = nn.Linear( self.fc7 = nn.Linear(
mlp_dim, out_channel,
mlp_dim, out_channel,
weight_attr=paddle.ParamAttr(initializer=XavierUniform())) weight_attr=paddle.ParamAttr(initializer=XavierUniform()))
@classmethod @classmethod
def from_config(cls, cfg, input_shape): def from_config(cls, cfg, input_shape):
s = input_shape s = input_shape
s = s[0] if isinstance(s, (list, tuple)) else s s = s[0] if isinstance(s, (list, tuple)) else s
return {'in_dim': s.channels} return {'in_channel': s.channels}
@property @property
def out_shape(self): def out_shape(self):
return [ShapeSpec(channels=self.mlp_dim, )] return [ShapeSpec(channels=self.out_channel, )]
def forward(self, rois_feat): def forward(self, rois_feat):
rois_feat = paddle.flatten(rois_feat, start_axis=1, stop_axis=-1) rois_feat = paddle.flatten(rois_feat, start_axis=1, stop_axis=-1)
...@@ -68,34 +77,36 @@ class TwoFCHead(nn.Layer): ...@@ -68,34 +77,36 @@ class TwoFCHead(nn.Layer):
@register @register
class XConvNormHead(nn.Layer): class XConvNormHead(nn.Layer):
__shared__ = ['norm_type', 'freeze_norm']
""" """
RCNN bbox head with serveral convolution layers RCNN bbox head with serveral convolution layers
Args: Args:
in_dim(int): num of channels for the input rois_feat in_channel (int): Input channels which can be derived by from_config
num_convs(int): num of convolution layers for the rcnn bbox head num_convs (int): The number of conv layers
conv_dim(int): num of channels for the conv layers conv_dim (int): The number of channels for the conv layers
mlp_dim(int): num of channels for the fc layers out_channel (int): Output channels
resolution(int): resolution of the rois_feat resolution (int): Resolution of input feature map
norm_type(str): norm type, 'gn' by defalut norm_type (string): Norm type, bn, gn, sync_bn are available,
freeze_norm(bool): whether to freeze the norm default `gn`
stage_name(str): used in CascadeXConvNormHead, '' by default freeze_norm (bool): Whether to freeze the norm
stage_name (string): Prefix name for conv layer, '' by default
""" """
__shared__ = ['norm_type', 'freeze_norm']
def __init__(self, def __init__(self,
in_dim=256, in_channel=256,
num_convs=4, num_convs=4,
conv_dim=256, conv_dim=256,
mlp_dim=1024, out_channel=1024,
resolution=7, resolution=7,
norm_type='gn', norm_type='gn',
freeze_norm=False, freeze_norm=False,
stage_name=''): stage_name=''):
super(XConvNormHead, self).__init__() super(XConvNormHead, self).__init__()
self.in_dim = in_dim self.in_channel = in_channel
self.num_convs = num_convs self.num_convs = num_convs
self.conv_dim = conv_dim self.conv_dim = conv_dim
self.mlp_dim = mlp_dim self.out_channel = out_channel
self.norm_type = norm_type self.norm_type = norm_type
self.freeze_norm = freeze_norm self.freeze_norm = freeze_norm
...@@ -103,7 +114,7 @@ class XConvNormHead(nn.Layer): ...@@ -103,7 +114,7 @@ class XConvNormHead(nn.Layer):
fan = conv_dim * 3 * 3 fan = conv_dim * 3 * 3
initializer = KaimingNormal(fan_in=fan) initializer = KaimingNormal(fan_in=fan)
for i in range(self.num_convs): for i in range(self.num_convs):
in_c = in_dim if i == 0 else conv_dim in_c = in_channel if i == 0 else conv_dim
head_conv_name = stage_name + 'bbox_head_conv{}'.format(i) head_conv_name = stage_name + 'bbox_head_conv{}'.format(i)
head_conv = self.add_sublayer( head_conv = self.add_sublayer(
head_conv_name, head_conv_name,
...@@ -122,7 +133,7 @@ class XConvNormHead(nn.Layer): ...@@ -122,7 +133,7 @@ class XConvNormHead(nn.Layer):
fan = conv_dim * resolution * resolution fan = conv_dim * resolution * resolution
self.fc6 = nn.Linear( self.fc6 = nn.Linear(
conv_dim * resolution * resolution, conv_dim * resolution * resolution,
mlp_dim, out_channel,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
initializer=XavierUniform(fan_out=fan)), initializer=XavierUniform(fan_out=fan)),
bias_attr=paddle.ParamAttr( bias_attr=paddle.ParamAttr(
...@@ -132,11 +143,11 @@ class XConvNormHead(nn.Layer): ...@@ -132,11 +143,11 @@ class XConvNormHead(nn.Layer):
def from_config(cls, cfg, input_shape): def from_config(cls, cfg, input_shape):
s = input_shape s = input_shape
s = s[0] if isinstance(s, (list, tuple)) else s s = s[0] if isinstance(s, (list, tuple)) else s
return {'in_dim': s.channels} return {'in_channel': s.channels}
@property @property
def out_shape(self): def out_shape(self):
return [ShapeSpec(channels=self.mlp_dim, )] return [ShapeSpec(channels=self.out_channel, )]
def forward(self, rois_feat): def forward(self, rois_feat):
for i in range(self.num_convs): for i in range(self.num_convs):
...@@ -151,14 +162,17 @@ class BBoxHead(nn.Layer): ...@@ -151,14 +162,17 @@ class BBoxHead(nn.Layer):
__shared__ = ['num_classes'] __shared__ = ['num_classes']
__inject__ = ['bbox_assigner'] __inject__ = ['bbox_assigner']
""" """
head (nn.Layer): Extract feature in bbox head RCNN bbox head
in_channel (int): Input channel after RoI extractor
roi_extractor (object): The module of RoI Extractor Args:
bbox_assigner (object): The module of Box Assigner, label and sample the head (nn.Layer): Extract feature in bbox head
box. in_channel (int): Input channel after RoI extractor
with_pool (bool): Whether to use pooling for the RoI feature. roi_extractor (object): The module of RoI Extractor
num_classes (int): The number of classes bbox_assigner (object): The module of Box Assigner, label and sample the
bbox_weight (List[float]): The weight to get the decode box box.
with_pool (bool): Whether to use pooling for the RoI feature.
num_classes (int): The number of classes
bbox_weight (List[float]): The weight to get the decode box
""" """
def __init__(self, def __init__(self,
......
...@@ -32,32 +32,41 @@ __all__ = ['CascadeTwoFCHead', 'CascadeXConvNormHead', 'CascadeHead'] ...@@ -32,32 +32,41 @@ __all__ = ['CascadeTwoFCHead', 'CascadeXConvNormHead', 'CascadeHead']
@register @register
class CascadeTwoFCHead(nn.Layer): class CascadeTwoFCHead(nn.Layer):
__shared__ = ['num_cascade_stage'] __shared__ = ['num_cascade_stage']
"""
Cascade RCNN bbox head with Two fc layers to extract feature
Args:
in_channel (int): Input channel which can be derived by from_config
out_channel (int): Output channel
resolution (int): Resolution of input feature map, default 7
num_cascade_stage (int): The number of cascade stage, default 3
"""
def __init__(self, def __init__(self,
in_dim=256, in_channel=256,
mlp_dim=1024, out_channel=1024,
resolution=7, resolution=7,
num_cascade_stage=3): num_cascade_stage=3):
super(CascadeTwoFCHead, self).__init__() super(CascadeTwoFCHead, self).__init__()
self.in_dim = in_dim self.in_channel = in_channel
self.mlp_dim = mlp_dim self.out_channel = out_channel
self.head_list = [] self.head_list = []
for stage in range(num_cascade_stage): for stage in range(num_cascade_stage):
head_per_stage = self.add_sublayer( head_per_stage = self.add_sublayer(
str(stage), TwoFCHead(in_dim, mlp_dim, resolution)) str(stage), TwoFCHead(in_channel, out_channel, resolution))
self.head_list.append(head_per_stage) self.head_list.append(head_per_stage)
@classmethod @classmethod
def from_config(cls, cfg, input_shape): def from_config(cls, cfg, input_shape):
s = input_shape s = input_shape
s = s[0] if isinstance(s, (list, tuple)) else s s = s[0] if isinstance(s, (list, tuple)) else s
return {'in_dim': s.channels} return {'in_channel': s.channels}
@property @property
def out_shape(self): def out_shape(self):
return [ShapeSpec(channels=self.mlp_dim, )] return [ShapeSpec(channels=self.out_channel, )]
def forward(self, rois_feat, stage=0): def forward(self, rois_feat, stage=0):
out = self.head_list[stage](rois_feat) out = self.head_list[stage](rois_feat)
...@@ -67,29 +76,43 @@ class CascadeTwoFCHead(nn.Layer): ...@@ -67,29 +76,43 @@ class CascadeTwoFCHead(nn.Layer):
@register @register
class CascadeXConvNormHead(nn.Layer): class CascadeXConvNormHead(nn.Layer):
__shared__ = ['norm_type', 'freeze_norm', 'num_cascade_stage'] __shared__ = ['norm_type', 'freeze_norm', 'num_cascade_stage']
"""
Cascade RCNN bbox head with serveral convolution layers
Args:
in_channel (int): Input channels which can be derived by from_config
num_convs (int): The number of conv layers
conv_dim (int): The number of channels for the conv layers
out_channel (int): Output channels
resolution (int): Resolution of input feature map
norm_type (string): Norm type, bn, gn, sync_bn are available,
default `gn`
freeze_norm (bool): Whether to freeze the norm
num_cascade_stage (int): The number of cascade stage, default 3
"""
def __init__(self, def __init__(self,
in_dim=256, in_channel=256,
num_convs=4, num_convs=4,
conv_dim=256, conv_dim=256,
mlp_dim=1024, out_channel=1024,
resolution=7, resolution=7,
norm_type='gn', norm_type='gn',
freeze_norm=False, freeze_norm=False,
num_cascade_stage=3): num_cascade_stage=3):
super(CascadeXConvNormHead, self).__init__() super(CascadeXConvNormHead, self).__init__()
self.in_dim = in_dim self.in_channel = in_channel
self.mlp_dim = mlp_dim self.out_channel = out_channel
self.head_list = [] self.head_list = []
for stage in range(num_cascade_stage): for stage in range(num_cascade_stage):
head_per_stage = self.add_sublayer( head_per_stage = self.add_sublayer(
str(stage), str(stage),
XConvNormHead( XConvNormHead(
in_dim, in_channel,
num_convs, num_convs,
conv_dim, conv_dim,
mlp_dim, out_channel,
resolution, resolution,
norm_type, norm_type,
freeze_norm, freeze_norm,
...@@ -100,11 +123,11 @@ class CascadeXConvNormHead(nn.Layer): ...@@ -100,11 +123,11 @@ class CascadeXConvNormHead(nn.Layer):
def from_config(cls, cfg, input_shape): def from_config(cls, cfg, input_shape):
s = input_shape s = input_shape
s = s[0] if isinstance(s, (list, tuple)) else s s = s[0] if isinstance(s, (list, tuple)) else s
return {'in_dim': s.channels} return {'in_channel': s.channels}
@property @property
def out_shape(self): def out_shape(self):
return [ShapeSpec(channels=self.mlp_dim, )] return [ShapeSpec(channels=self.out_channel, )]
def forward(self, rois_feat, stage=0): def forward(self, rois_feat, stage=0):
out = self.head_list[stage](rois_feat) out = self.head_list[stage](rois_feat)
...@@ -116,16 +139,18 @@ class CascadeHead(BBoxHead): ...@@ -116,16 +139,18 @@ class CascadeHead(BBoxHead):
__shared__ = ['num_classes', 'num_cascade_stages'] __shared__ = ['num_classes', 'num_cascade_stages']
__inject__ = ['bbox_assigner'] __inject__ = ['bbox_assigner']
""" """
head (nn.Layer): Extract feature in bbox head Cascade RCNN bbox head
in_channel (int): Input channel after RoI extractor
roi_extractor (object): The module of RoI Extractor Args:
bbox_assigner (object): The module of Box Assigner, label and sample the head (nn.Layer): Extract feature in bbox head
box. in_channel (int): Input channel after RoI extractor
num_classes (int): The number of classes roi_extractor (object): The module of RoI Extractor
bbox_weight (List[List[float]]): The weight to get the decode box and the bbox_assigner (object): The module of Box Assigner, label and sample the
length of weight is the number of cascade box.
stage num_classes (int): The number of classes
num_cascade_stages (int): THe number of stage to refine the box bbox_weight (List[List[float]]): The weight to get the decode box and the
length of weight is the number of cascade stage
num_cascade_stages (int): THe number of stage to refine the box
""" """
def __init__(self, def __init__(self,
......
...@@ -27,18 +27,29 @@ from .roi_extractor import RoIAlign ...@@ -27,18 +27,29 @@ from .roi_extractor import RoIAlign
@register @register
class MaskFeat(nn.Layer): class MaskFeat(nn.Layer):
"""
Feature extraction in Mask head
Args:
in_channel (int): Input channels
out_channel (int): Output channels
num_convs (int): The number of conv layers, default 4
norm_type (string | None): Norm type, bn, gn, sync_bn are available,
default None
"""
def __init__(self, def __init__(self,
in_channel=256,
out_channel=256,
num_convs=4, num_convs=4,
in_channels=256,
out_channels=256,
norm_type=None): norm_type=None):
super(MaskFeat, self).__init__() super(MaskFeat, self).__init__()
self.num_convs = num_convs self.num_convs = num_convs
self.in_channels = in_channels self.in_channel = in_channel
self.out_channels = out_channels self.out_channel = out_channel
self.norm_type = norm_type self.norm_type = norm_type
fan_conv = out_channels * 3 * 3 fan_conv = out_channel * 3 * 3
fan_deconv = out_channels * 2 * 2 fan_deconv = out_channel * 2 * 2
mask_conv = nn.Sequential() mask_conv = nn.Sequential()
if norm_type == 'gn': if norm_type == 'gn':
...@@ -47,8 +58,8 @@ class MaskFeat(nn.Layer): ...@@ -47,8 +58,8 @@ class MaskFeat(nn.Layer):
mask_conv.add_sublayer( mask_conv.add_sublayer(
conv_name, conv_name,
ConvNormLayer( ConvNormLayer(
ch_in=in_channels if i == 0 else out_channels, ch_in=in_channel if i == 0 else out_channel,
ch_out=out_channels, ch_out=out_channel,
filter_size=3, filter_size=3,
stride=1, stride=1,
norm_type=self.norm_type, norm_type=self.norm_type,
...@@ -62,8 +73,8 @@ class MaskFeat(nn.Layer): ...@@ -62,8 +73,8 @@ class MaskFeat(nn.Layer):
mask_conv.add_sublayer( mask_conv.add_sublayer(
conv_name, conv_name,
nn.Conv2D( nn.Conv2D(
in_channels=in_channels if i == 0 else out_channels, in_channels=in_channel if i == 0 else out_channel,
out_channels=out_channels, out_channels=out_channel,
kernel_size=3, kernel_size=3,
padding=1, padding=1,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
...@@ -72,8 +83,8 @@ class MaskFeat(nn.Layer): ...@@ -72,8 +83,8 @@ class MaskFeat(nn.Layer):
mask_conv.add_sublayer( mask_conv.add_sublayer(
'conv5_mask', 'conv5_mask',
nn.Conv2DTranspose( nn.Conv2DTranspose(
in_channels=self.in_channels, in_channels=self.in_channel,
out_channels=self.out_channels, out_channels=self.out_channel,
kernel_size=2, kernel_size=2,
stride=2, stride=2,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
...@@ -85,10 +96,10 @@ class MaskFeat(nn.Layer): ...@@ -85,10 +96,10 @@ class MaskFeat(nn.Layer):
def from_config(cls, cfg, input_shape): def from_config(cls, cfg, input_shape):
if isinstance(input_shape, (list, tuple)): if isinstance(input_shape, (list, tuple)):
input_shape = input_shape[0] input_shape = input_shape[0]
return {'in_channels': input_shape.channels, } return {'in_channel': input_shape.channels, }
def out_channel(self): def out_channels(self):
return self.out_channels return self.out_channel
def forward(self, feats): def forward(self, feats):
return self.upsample(feats) return self.upsample(feats)
...@@ -98,6 +109,18 @@ class MaskFeat(nn.Layer): ...@@ -98,6 +109,18 @@ class MaskFeat(nn.Layer):
class MaskHead(nn.Layer): class MaskHead(nn.Layer):
__shared__ = ['num_classes'] __shared__ = ['num_classes']
__inject__ = ['mask_assigner'] __inject__ = ['mask_assigner']
"""
RCNN mask head
Args:
head (nn.Layer): Extract feature in mask head
roi_extractor (object): The module of RoI Extractor
mask_assigner (object): The module of Mask Assigner,
label and sample the mask
num_classes (int): The number of classes
share_bbox_feat (bool): Whether to share the feature from bbox head,
default false
"""
def __init__(self, def __init__(self,
head, head,
...@@ -112,7 +135,7 @@ class MaskHead(nn.Layer): ...@@ -112,7 +135,7 @@ class MaskHead(nn.Layer):
if isinstance(roi_extractor, dict): if isinstance(roi_extractor, dict):
self.roi_extractor = RoIAlign(**roi_extractor) self.roi_extractor = RoIAlign(**roi_extractor)
self.head = head self.head = head
self.in_channels = head.out_channel() self.in_channels = head.out_channels()
self.mask_assigner = mask_assigner self.mask_assigner = mask_assigner
self.share_bbox_feat = share_bbox_feat self.share_bbox_feat = share_bbox_feat
self.bbox_head = None self.bbox_head = None
...@@ -159,7 +182,6 @@ class MaskHead(nn.Layer): ...@@ -159,7 +182,6 @@ class MaskHead(nn.Layer):
rois_num (Tensor): The number of proposals for each batch rois_num (Tensor): The number of proposals for each batch
inputs (dict): ground truth info inputs (dict): ground truth info
""" """
#assert self.bbox_head
tgt_labels, _, tgt_gt_inds = targets tgt_labels, _, tgt_gt_inds = targets
rois, rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights = self.mask_assigner( rois, rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights = self.mask_assigner(
rois, tgt_labels, tgt_gt_inds, inputs) rois, tgt_labels, tgt_gt_inds, inputs)
......
...@@ -25,6 +25,31 @@ def _to_list(v): ...@@ -25,6 +25,31 @@ def _to_list(v):
@register @register
class RoIAlign(object): class RoIAlign(object):
"""
RoI Align module
For more details, please refer to the document of roi_align in
in ppdet/modeing/ops.py
Args:
resolution (int): The output size, default 14
spatial_scale (float): Multiplicative spatial scale factor to translate
ROI coords from their input scale to the scale used when pooling.
default 0.0625
sampling_ratio (int): The number of sampling points in the interpolation
grid, default 0
canconical_level (int): The referring level of FPN layer with
specified level. default 4
canonical_size (int): The referring scale of FPN layer with
specified scale. default 224
start_level (int): The start level of FPN layer to extract RoI feature,
default 0
end_level (int): The end level of FPN layer to extract RoI feature,
default 3
aligned (bool): Whether to add offset to rois' coord in roi_align.
default false
"""
def __init__(self, def __init__(self,
resolution=14, resolution=14,
spatial_scale=0.0625, spatial_scale=0.0625,
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import Normal
from paddle.regularizer import L2Decay
from paddle.nn import Conv2D
from ppdet.core.workspace import register
from ppdet.modeling import ops
@register
class RPNFeat(nn.Layer):
def __init__(self, feat_in=1024, feat_out=1024):
super(RPNFeat, self).__init__()
# rpn feat is shared with each level
self.rpn_conv = Conv2D(
in_channels=feat_in,
out_channels=feat_out,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=Normal(
mean=0., std=0.01)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.)))
def forward(self, inputs, feats):
rpn_feats = []
for feat in feats:
rpn_feats.append(F.relu(self.rpn_conv(feat)))
return rpn_feats
@register
class RPNHead(nn.Layer):
__inject__ = ['rpn_feat']
def __init__(self, rpn_feat, anchor_per_position=15, rpn_channel=1024):
super(RPNHead, self).__init__()
self.rpn_feat = rpn_feat
if isinstance(rpn_feat, dict):
self.rpn_feat = RPNFeat(**rpn_feat)
# rpn head is shared with each level
# rpn roi classification scores
self.rpn_rois_score = Conv2D(
in_channels=rpn_channel,
out_channels=anchor_per_position,
kernel_size=1,
padding=0,
weight_attr=ParamAttr(initializer=Normal(
mean=0., std=0.01)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.)))
# rpn roi bbox regression deltas
self.rpn_rois_delta = Conv2D(
in_channels=rpn_channel,
out_channels=4 * anchor_per_position,
kernel_size=1,
padding=0,
weight_attr=ParamAttr(initializer=Normal(
mean=0., std=0.01)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.)))
def forward(self, inputs, feats):
rpn_feats = self.rpn_feat(inputs, feats)
rpn_head_out = []
for rpn_feat in rpn_feats:
rrs = self.rpn_rois_score(rpn_feat)
rrd = self.rpn_rois_delta(rpn_feat)
rpn_head_out.append((rrs, rrd))
return rpn_feats, rpn_head_out
def get_loss(self, loss_inputs):
# cls loss
score_tgt = paddle.cast(
x=loss_inputs['rpn_score_target'], dtype='float32')
score_tgt.stop_gradient = True
loss_rpn_cls = ops.sigmoid_cross_entropy_with_logits(
input=loss_inputs['rpn_score_pred'], label=score_tgt)
loss_rpn_cls = paddle.mean(loss_rpn_cls, name='loss_rpn_cls')
# reg loss
loc_tgt = paddle.cast(x=loss_inputs['rpn_rois_target'], dtype='float32')
loc_tgt.stop_gradient = True
loss_rpn_reg = ops.smooth_l1(
input=loss_inputs['rpn_rois_pred'],
label=loc_tgt,
inside_weight=loss_inputs['rpn_rois_weight'],
outside_weight=loss_inputs['rpn_rois_weight'],
sigma=3.0, )
loss_rpn_reg = paddle.sum(loss_rpn_reg)
score_shape = paddle.shape(score_tgt)
score_shape = paddle.cast(score_shape, dtype='float32')
norm = paddle.prod(score_shape)
norm.stop_gradient = True
loss_rpn_reg = loss_rpn_reg / norm
return {'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_reg': loss_rpn_reg}
...@@ -29,6 +29,34 @@ __all__ = ['FPN'] ...@@ -29,6 +29,34 @@ __all__ = ['FPN']
@register @register
@serializable @serializable
class FPN(nn.Layer): class FPN(nn.Layer):
"""
Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
Args:
in_channels (list[int]): input channels of each level which can be
derived from the output shape of backbone by from_config
out_channel (list[int]): output channel of each level
spatial_scales (list[float]): the spatial scales between input feature
maps and original input image which can be derived from the output
shape of backbone by from_config
has_extra_convs (bool): whether to add extra conv to the last level.
default False
extra_stage (int): the number of extra stages added to the last level.
default 1
use_c5 (bool): Whether to use c5 as the input of extra stage,
otherwise p5 is used. default True
norm_type (string|None): The normalization type in FPN module. If
norm_type is None, norm will not be used after conv and if
norm_type is string, bn, gn, sync_bn are available. default None
norm_decay (float): weight decay for normalization layer weights.
default 0.
freeze_norm (bool): whether to freeze normalization layer.
default False
relu_before_extra_convs (bool): whether to add relu before extra convs.
default False
"""
def __init__(self, def __init__(self,
in_channels, in_channels,
out_channel, out_channel,
...@@ -67,7 +95,7 @@ class FPN(nn.Layer): ...@@ -67,7 +95,7 @@ class FPN(nn.Layer):
else: else:
lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2) lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
in_c = in_channels[i - st_stage] in_c = in_channels[i - st_stage]
if self.norm_type == 'gn': if self.norm_type is not None:
lateral = self.add_sublayer( lateral = self.add_sublayer(
lateral_name, lateral_name,
ConvNormLayer( ConvNormLayer(
...@@ -93,7 +121,7 @@ class FPN(nn.Layer): ...@@ -93,7 +121,7 @@ class FPN(nn.Layer):
self.lateral_convs.append(lateral) self.lateral_convs.append(lateral)
fpn_name = 'fpn_res{}_sum'.format(i + 2) fpn_name = 'fpn_res{}_sum'.format(i + 2)
if self.norm_type == 'gn': if self.norm_type is not None:
fpn_conv = self.add_sublayer( fpn_conv = self.add_sublayer(
fpn_name, fpn_name,
ConvNormLayer( ConvNormLayer(
...@@ -128,7 +156,7 @@ class FPN(nn.Layer): ...@@ -128,7 +156,7 @@ class FPN(nn.Layer):
else: else:
in_c = out_channel in_c = out_channel
extra_fpn_name = 'fpn_{}'.format(lvl + 2) extra_fpn_name = 'fpn_{}'.format(lvl + 2)
if self.norm_type == 'gn': if self.norm_type is not None:
extra_fpn_conv = self.add_sublayer( extra_fpn_conv = self.add_sublayer(
extra_fpn_name, extra_fpn_name,
ConvNormLayer( ConvNormLayer(
......
...@@ -25,6 +25,24 @@ from .. import ops ...@@ -25,6 +25,24 @@ from .. import ops
@register @register
class AnchorGenerator(nn.Layer): class AnchorGenerator(nn.Layer):
"""
Generate anchors according to the feature maps
Args:
anchor_sizes (list[float] | list[list[float]]): The anchor sizes at
each feature point. list[float] means all feature levels share the
same sizes. list[list[float]] means the anchor sizes for
each level. The sizes stand for the scale of input size.
aspect_ratios (list[float] | list[list[float]]): The aspect ratios at
each feature point. list[float] means all feature levels share the
same ratios. list[list[float]] means the aspect ratios for
each level.
strides (list[float]): The strides of feature maps which generate
anchors
offset (float): The offset of the coordinate of anchors, default 0.
"""
def __init__(self, def __init__(self,
anchor_sizes=[32, 64, 128, 256, 512], anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1.0, 2.0], aspect_ratios=[0.5, 1.0, 2.0],
......
...@@ -25,6 +25,28 @@ from .. import ops ...@@ -25,6 +25,28 @@ from .. import ops
@register @register
@serializable @serializable
class ProposalGenerator(object): class ProposalGenerator(object):
"""
Proposal generation module
For more details, please refer to the document of generate_proposals
in ppdet/modeing/ops.py
Args:
pre_nms_top_n (int): Number of total bboxes to be kept per
image before NMS. default 6000
post_nms_top_n (int): Number of total bboxes to be kept per
image after NMS. default 1000
nms_thresh (float): Threshold in NMS. default 0.5
min_size (flaot): Remove predicted boxes with either height or
width < min_size. default 0.1
eta (float): Apply in adaptive NMS, if adaptive `threshold > 0.5`,
`adaptive_threshold = adaptive_threshold * eta` in each iteration.
default 1.
topk_after_collect (bool): whether to adopt topk after batch
collection. If topk_after_collect is true, box filter will not be
used after NMS at each image in proposal generation. default false
"""
def __init__(self, def __init__(self,
pre_nms_top_n=12000, pre_nms_top_n=12000,
post_nms_top_n=2000, post_nms_top_n=2000,
......
...@@ -27,12 +27,20 @@ from .proposal_generator import ProposalGenerator ...@@ -27,12 +27,20 @@ from .proposal_generator import ProposalGenerator
class RPNFeat(nn.Layer): class RPNFeat(nn.Layer):
def __init__(self, feat_in=1024, feat_out=1024): """
Feature extraction in RPN head
Args:
in_channel (int): Input channel
out_channel (int): Output channel
"""
def __init__(self, in_channel=1024, out_channel=1024):
super(RPNFeat, self).__init__() super(RPNFeat, self).__init__()
# rpn feat is shared with each level # rpn feat is shared with each level
self.rpn_conv = nn.Conv2D( self.rpn_conv = nn.Conv2D(
in_channels=feat_in, in_channels=in_channel,
out_channels=feat_out, out_channels=out_channel,
kernel_size=3, kernel_size=3,
padding=1, padding=1,
weight_attr=paddle.ParamAttr(initializer=Normal( weight_attr=paddle.ParamAttr(initializer=Normal(
...@@ -47,6 +55,20 @@ class RPNFeat(nn.Layer): ...@@ -47,6 +55,20 @@ class RPNFeat(nn.Layer):
@register @register
class RPNHead(nn.Layer): class RPNHead(nn.Layer):
"""
Region Proposal Network
Args:
anchor_generator (dict): configure of anchor generation
rpn_target_assign (dict): configure of rpn targets assignment
train_proposal (dict): configure of proposals generation
at the stage of training
test_proposal (dict): configure of proposals generation
at the stage of prediction
in_channel (int): channel of input feature maps which can be
derived by from_config
"""
def __init__(self, def __init__(self,
anchor_generator=AnchorGenerator().__dict__, anchor_generator=AnchorGenerator().__dict__,
rpn_target_assign=RPNTargetAssign().__dict__, rpn_target_assign=RPNTargetAssign().__dict__,
......
...@@ -135,12 +135,15 @@ def generate_proposal_target(rpn_rois, ...@@ -135,12 +135,15 @@ def generate_proposal_target(rpn_rois,
tgt_gt_inds = [] tgt_gt_inds = []
new_rois_num = [] new_rois_num = []
# In cascade rcnn, the threshold for foreground and background
# is used from cascade_iou
fg_thresh = cascade_iou if is_cascade else fg_thresh fg_thresh = cascade_iou if is_cascade else fg_thresh
bg_thresh = cascade_iou if is_cascade else bg_thresh bg_thresh = cascade_iou if is_cascade else bg_thresh
for i, rpn_roi in enumerate(rpn_rois): for i, rpn_roi in enumerate(rpn_rois):
gt_bbox = gt_boxes[i] gt_bbox = gt_boxes[i]
gt_class = paddle.squeeze(gt_classes[i], axis=-1) gt_class = paddle.squeeze(gt_classes[i], axis=-1)
# Concat RoIs and gt boxes except cascade rcnn
if not is_cascade: if not is_cascade:
bbox = paddle.concat([rpn_roi, gt_bbox]) bbox = paddle.concat([rpn_roi, gt_bbox])
else: else:
...@@ -247,10 +250,12 @@ def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds, ...@@ -247,10 +250,12 @@ def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds,
tgt_weights = [] tgt_weights = []
for k in range(len(rois)): for k in range(len(rois)):
labels_per_im = labels_int32[k] labels_per_im = labels_int32[k]
# select rois labeled with foreground
fg_inds = paddle.nonzero( fg_inds = paddle.nonzero(
paddle.logical_and(labels_per_im != -1, labels_per_im != paddle.logical_and(labels_per_im != -1, labels_per_im !=
num_classes)) num_classes))
has_fg = True has_fg = True
# generate fake roi if foreground is empty
if fg_inds.numel() == 0: if fg_inds.numel() == 0:
has_fg = False has_fg = False
fg_inds = paddle.ones([1], dtype='int32') fg_inds = paddle.ones([1], dtype='int32')
...@@ -259,6 +264,8 @@ def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds, ...@@ -259,6 +264,8 @@ def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds,
rois_per_im = rois[k] rois_per_im = rois[k]
fg_rois = paddle.gather(rois_per_im, fg_inds) fg_rois = paddle.gather(rois_per_im, fg_inds)
# Copy the foreground roi to cpu
# to generate mask target with ground-truth
boxes = fg_rois.numpy() boxes = fg_rois.numpy()
gt_segms_per_im = gt_segms[k] gt_segms_per_im = gt_segms[k]
new_segm = [] new_segm = []
......
...@@ -22,6 +22,32 @@ from .target import rpn_anchor_target, generate_proposal_target, generate_mask_t ...@@ -22,6 +22,32 @@ from .target import rpn_anchor_target, generate_proposal_target, generate_mask_t
@register @register
@serializable @serializable
class RPNTargetAssign(object): class RPNTargetAssign(object):
"""
RPN targets assignment module
The assignment consists of three steps:
1. Match anchor and ground-truth box, label the anchor with foreground
or background sample
2. Sample anchors to keep the properly ratio between foreground and
background
3. Generate the targets for classification and regression branch
Args:
batch_size_per_im (int): Total number of RPN samples per image.
default 256
fg_fraction (float): Fraction of anchors that is labeled
foreground, default 0.5
positive_overlap (float): Minimum overlap required between an anchor
and ground-truth box for the (anchor, gt box) pair to be
a foreground sample. default 0.7
negative_overlap (float): Maximum overlap allowed between an anchor
and ground-truth box for the (anchor, gt box) pair to be
a background sample. default 0.3
use_random (bool): Use random sampling to choose foreground and
background boxes, default true.
"""
def __init__(self, def __init__(self,
batch_size_per_im=256, batch_size_per_im=256,
fg_fraction=0.5, fg_fraction=0.5,
...@@ -54,6 +80,33 @@ class RPNTargetAssign(object): ...@@ -54,6 +80,33 @@ class RPNTargetAssign(object):
@register @register
class BBoxAssigner(object): class BBoxAssigner(object):
__shared__ = ['num_classes'] __shared__ = ['num_classes']
"""
RCNN targets assignment module
The assignment consists of three steps:
1. Match RoIs and ground-truth box, label the RoIs with foreground
or background sample
2. Sample anchors to keep the properly ratio between foreground and
background
3. Generate the targets for classification and regression branch
Args:
batch_size_per_im (int): Total number of RoIs per image.
default 512
fg_fraction (float): Fraction of RoIs that is labeled
foreground, default 0.25
positive_overlap (float): Minimum overlap required between a RoI
and ground-truth box for the (roi, gt box) pair to be
a foreground sample. default 0.5
negative_overlap (float): Maximum overlap allowed between a RoI
and ground-truth box for the (roi, gt box) pair to be
a background sample. default 0.5
use_random (bool): Use random sampling to choose foreground and
background boxes, default true
cascade_iou (list[iou]): The list of overlap to select foreground and
background of each stage, which is only used In Cascade RCNN.
num_classes (int): The number of class.
"""
def __init__(self, def __init__(self,
batch_size_per_im=512, batch_size_per_im=512,
...@@ -61,7 +114,6 @@ class BBoxAssigner(object): ...@@ -61,7 +114,6 @@ class BBoxAssigner(object):
fg_thresh=.5, fg_thresh=.5,
bg_thresh=.5, bg_thresh=.5,
use_random=True, use_random=True,
is_cls_agnostic=False,
cascade_iou=[0.5, 0.6, 0.7], cascade_iou=[0.5, 0.6, 0.7],
num_classes=80): num_classes=80):
super(BBoxAssigner, self).__init__() super(BBoxAssigner, self).__init__()
...@@ -70,7 +122,6 @@ class BBoxAssigner(object): ...@@ -70,7 +122,6 @@ class BBoxAssigner(object):
self.fg_thresh = fg_thresh self.fg_thresh = fg_thresh
self.bg_thresh = bg_thresh self.bg_thresh = bg_thresh
self.use_random = use_random self.use_random = use_random
self.is_cls_agnostic = is_cls_agnostic
self.cascade_iou = cascade_iou self.cascade_iou = cascade_iou
self.num_classes = num_classes self.num_classes = num_classes
...@@ -99,6 +150,18 @@ class BBoxAssigner(object): ...@@ -99,6 +150,18 @@ class BBoxAssigner(object):
@serializable @serializable
class MaskAssigner(object): class MaskAssigner(object):
__shared__ = ['num_classes', 'mask_resolution'] __shared__ = ['num_classes', 'mask_resolution']
"""
Mask targets assignment module
The assignment consists of three steps:
1. Select RoIs labels with foreground.
2. Encode the RoIs and corresponding gt polygons to generate
mask target
Args:
num_classes (int): The number of class
mask_resolution (int): The resolution of mask target, default 14
"""
def __init__(self, num_classes=80, mask_resolution=14): def __init__(self, num_classes=80, mask_resolution=14):
super(MaskAssigner, self).__init__() super(MaskAssigner, self).__init__()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册