# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import paddle import paddle.nn as nn import paddle.nn.functional as F from paddle import ParamAttr from paddle.regularizer import L2Decay from paddle.nn.initializer import KaimingNormal from ppdet.core.workspace import register, serializable from numbers import Integral from ..shape_spec import ShapeSpec __all__ = ['BlazeNet'] def hard_swish(x): return x * F.relu6(x + 3) / 6. class ConvBNLayer(nn.Layer): def __init__(self, in_channels, out_channels, kernel_size, stride, padding, num_groups=1, act='relu', conv_lr=0.1, conv_decay=0., norm_decay=0., norm_type='bn', name=None): super(ConvBNLayer, self).__init__() self.act = act self._conv = nn.Conv2D( in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=num_groups, weight_attr=ParamAttr( learning_rate=conv_lr, initializer=KaimingNormal(), name=name + "_weights"), bias_attr=False) param_attr = ParamAttr(name=name + "_bn_scale") bias_attr = ParamAttr(name=name + "_bn_offset") if norm_type == 'sync_bn': self._batch_norm = nn.SyncBatchNorm( out_channels, weight_attr=param_attr, bias_attr=bias_attr) else: self._batch_norm = nn.BatchNorm( out_channels, act=None, param_attr=param_attr, bias_attr=bias_attr, use_global_stats=False, moving_mean_name=name + '_bn_mean', moving_variance_name=name + '_bn_variance') def forward(self, x): x = self._conv(x) x = self._batch_norm(x) if self.act == "relu": x = F.relu(x) elif self.act == "relu6": x = F.relu6(x) elif self.act == 'leaky': x = F.leaky_relu(x) elif self.act == 'hard_swish': x = hard_swish(x) return x class BlazeBlock(nn.Layer): def __init__(self, in_channels, out_channels1, out_channels2, double_channels=None, stride=1, use_5x5kernel=True, act='relu', name=None): super(BlazeBlock, self).__init__() assert stride in [1, 2] self.use_pool = not stride == 1 self.use_double_block = double_channels is not None self.conv_dw = [] if use_5x5kernel: self.conv_dw.append( self.add_sublayer( name + "1_dw", ConvBNLayer( in_channels=in_channels, out_channels=out_channels1, kernel_size=5, stride=stride, padding=2, num_groups=out_channels1, name=name + "1_dw"))) else: self.conv_dw.append( self.add_sublayer( name + "1_dw_1", ConvBNLayer( in_channels=in_channels, out_channels=out_channels1, kernel_size=3, stride=1, padding=1, num_groups=out_channels1, name=name + "1_dw_1"))) self.conv_dw.append( self.add_sublayer( name + "1_dw_2", ConvBNLayer( in_channels=out_channels1, out_channels=out_channels1, kernel_size=3, stride=stride, padding=1, num_groups=out_channels1, name=name + "1_dw_2"))) self.act = act if self.use_double_block else None self.conv_pw = ConvBNLayer( in_channels=out_channels1, out_channels=out_channels2, kernel_size=1, stride=1, padding=0, act=self.act, name=name + "1_sep") if self.use_double_block: self.conv_dw2 = [] if use_5x5kernel: self.conv_dw2.append( self.add_sublayer( name + "2_dw", ConvBNLayer( in_channels=out_channels2, out_channels=out_channels2, kernel_size=5, stride=1, padding=2, num_groups=out_channels2, name=name + "2_dw"))) else: self.conv_dw2.append( self.add_sublayer( name + "2_dw_1", ConvBNLayer( in_channels=out_channels2, out_channels=out_channels2, kernel_size=3, stride=1, padding=1, num_groups=out_channels2, name=name + "1_dw_1"))) self.conv_dw2.append( self.add_sublayer( name + "2_dw_2", ConvBNLayer( in_channels=out_channels2, out_channels=out_channels2, kernel_size=3, stride=1, padding=1, num_groups=out_channels2, name=name + "2_dw_2"))) self.conv_pw2 = ConvBNLayer( in_channels=out_channels2, out_channels=double_channels, kernel_size=1, stride=1, padding=0, name=name + "2_sep") # shortcut if self.use_pool: shortcut_channel = double_channels or out_channels2 self._shortcut = [] self._shortcut.append( self.add_sublayer( name + '_shortcut_pool', nn.MaxPool2D( kernel_size=stride, stride=stride, ceil_mode=True))) self._shortcut.append( self.add_sublayer( name + '_shortcut_conv', ConvBNLayer( in_channels=in_channels, out_channels=shortcut_channel, kernel_size=1, stride=1, padding=0, name="shortcut" + name))) def forward(self, x): y = x for conv_dw_block in self.conv_dw: y = conv_dw_block(y) y = self.conv_pw(y) if self.use_double_block: for conv_dw2_block in self.conv_dw2: y = conv_dw2_block(y) y = self.conv_pw2(y) if self.use_pool: for shortcut in self._shortcut: x = shortcut(x) return F.relu(paddle.add(x, y)) @register @serializable class BlazeNet(nn.Layer): """ BlazeFace, see https://arxiv.org/abs/1907.05047 Args: blaze_filters (list): number of filter for each blaze block. double_blaze_filters (list): number of filter for each double_blaze block. use_5x5kernel (bool): whether or not filter size is 5x5 in depth-wise conv. """ def __init__( self, blaze_filters=[[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]], double_blaze_filters=[[48, 24, 96, 2], [96, 24, 96], [96, 24, 96], [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]], use_5x5kernel=True, act=None): super(BlazeNet, self).__init__() conv1_num_filters = blaze_filters[0][0] self.conv1 = ConvBNLayer( in_channels=3, out_channels=conv1_num_filters, kernel_size=3, stride=2, padding=1, name="conv1") in_channels = conv1_num_filters self.blaze_block = [] self._out_channels = [] for k, v in enumerate(blaze_filters): assert len(v) in [2, 3], \ "blaze_filters {} not in [2, 3]" if len(v) == 2: self.blaze_block.append( self.add_sublayer( 'blaze_{}'.format(k), BlazeBlock( in_channels, v[0], v[1], use_5x5kernel=use_5x5kernel, act=act, name='blaze_{}'.format(k)))) elif len(v) == 3: self.blaze_block.append( self.add_sublayer( 'blaze_{}'.format(k), BlazeBlock( in_channels, v[0], v[1], stride=v[2], use_5x5kernel=use_5x5kernel, act=act, name='blaze_{}'.format(k)))) in_channels = v[1] for k, v in enumerate(double_blaze_filters): assert len(v) in [3, 4], \ "blaze_filters {} not in [3, 4]" if len(v) == 3: self.blaze_block.append( self.add_sublayer( 'double_blaze_{}'.format(k), BlazeBlock( in_channels, v[0], v[1], double_channels=v[2], use_5x5kernel=use_5x5kernel, act=act, name='double_blaze_{}'.format(k)))) elif len(v) == 4: self.blaze_block.append( self.add_sublayer( 'double_blaze_{}'.format(k), BlazeBlock( in_channels, v[0], v[1], double_channels=v[2], stride=v[3], use_5x5kernel=use_5x5kernel, act=act, name='double_blaze_{}'.format(k)))) in_channels = v[2] self._out_channels.append(in_channels) def forward(self, inputs): outs = [] y = self.conv1(inputs['image']) for block in self.blaze_block: y = block(y) outs.append(y) return [outs[-4], outs[-1]] @property def out_shape(self): return [ ShapeSpec(channels=c) for c in [self._out_channels[-4], self._out_channels[-1]] ]