# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import paddle import math import paddle.nn as nn from paddle import ParamAttr from paddle.nn.initializer import MSRA from paddle.nn import Conv2d, BatchNorm, Linear, Dropout from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d from paddle.regularizer import L2Decay __all__ = ["ResNeSt50_fast_1s1x64d", "ResNeSt50"] class ConvBNLayer(nn.Layer): def __init__(self, num_channels, num_filters, filter_size, stride=1, dilation=1, groups=1, act=None, name=None): super(ConvBNLayer, self).__init__() bn_decay = 0.0 self._conv = Conv2d( in_channels=num_channels, out_channels=num_filters, kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, dilation=dilation, groups=groups, weight_attr=ParamAttr(name=name + "_weight"), bias_attr=False) self._batch_norm = BatchNorm( num_filters, act=act, param_attr=ParamAttr( name=name + "_scale", regularizer=L2Decay(bn_decay)), bias_attr=ParamAttr( name + "_offset", regularizer=L2Decay(bn_decay)), moving_mean_name=name + "_mean", moving_variance_name=name + "_variance") def forward(self, x): x = self._conv(x) x = self._batch_norm(x) return x class rSoftmax(nn.Layer): def __init__(self, radix, cardinality): super(rSoftmax, self).__init__() self.radix = radix self.cardinality = cardinality def forward(self, x): cardinality = self.cardinality radix = self.radix batch, r, h, w = x.shape if self.radix > 1: x = paddle.reshape( x=x, shape=[ 0, cardinality, radix, int(r * h * w / cardinality / radix) ]) x = paddle.transpose(x=x, perm=[0, 2, 1, 3]) x = nn.functional.softmax(x, axis=1) x = paddle.reshape(x=x, shape=[0, r * h * w]) else: x = nn.functional.sigmoid(x) return x class SplatConv(nn.Layer): def __init__(self, in_channels, channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, radix=2, reduction_factor=4, rectify_avg=False, name=None): super(SplatConv, self).__init__() self.radix = radix self.conv1 = ConvBNLayer( num_channels=in_channels, num_filters=channels * radix, filter_size=kernel_size, stride=stride, groups=groups * radix, act="relu", name=name + "_splat1") self.avg_pool2d = AdaptiveAvgPool2d(1) inter_channels = int(max(in_channels * radix // reduction_factor, 32)) # to calc gap self.conv2 = ConvBNLayer( num_channels=channels, num_filters=inter_channels, filter_size=1, stride=1, groups=groups, act="relu", name=name + "_splat2") # to calc atten self.conv3 = Conv2d( in_channels=inter_channels, out_channels=channels * radix, kernel_size=1, stride=1, padding=0, groups=groups, weight_attr=ParamAttr( name=name + "_splat_weights", initializer=MSRA()), bias_attr=False) self.rsoftmax = rSoftmax(radix=radix, cardinality=groups) def forward(self, x): x = self.conv1(x) if self.radix > 1: splited = paddle.split(x, num_or_sections=self.radix, axis=1) gap = paddle.sums(splited) else: gap = x gap = self.avg_pool2d(gap) gap = self.conv2(gap) atten = self.conv3(gap) atten = self.rsoftmax(atten) atten = paddle.reshape(x=atten, shape=[-1, atten.shape[1], 1, 1]) if self.radix > 1: attens = paddle.split(atten, num_or_sections=self.radix, axis=1) y = paddle.sums( [att * split for (att, split) in zip(attens, splited)]) else: y = atten * x return y class BottleneckBlock(nn.Layer): def __init__(self, inplanes, planes, stride=1, radix=1, cardinality=1, bottleneck_width=64, avd=False, avd_first=False, dilation=1, is_first=False, rectify_avg=False, last_gamma=False, avg_down=False, name=None): super(BottleneckBlock, self).__init__() self.inplanes = inplanes self.planes = planes self.stride = stride self.radix = radix self.cardinality = cardinality self.avd = avd self.avd_first = avd_first self.dilation = dilation self.is_first = is_first self.rectify_avg = rectify_avg self.last_gamma = last_gamma self.avg_down = avg_down group_width = int(planes * (bottleneck_width / 64.)) * cardinality self.conv1 = ConvBNLayer( num_channels=self.inplanes, num_filters=group_width, filter_size=1, stride=1, groups=1, act="relu", name=name + "_conv1") if avd and avd_first and (stride > 1 or is_first): self.avg_pool2d_1 = AvgPool2d( kernel_size=3, stride=stride, padding=1) if radix >= 1: self.conv2 = SplatConv( in_channels=group_width, channels=group_width, kernel_size=3, stride=1, padding=dilation, dilation=dilation, groups=cardinality, bias=False, radix=radix, rectify_avg=rectify_avg, name=name + "_splatconv") else: self.conv2 = ConvBNLayer( num_channels=group_width, num_filters=group_width, filter_size=3, stride=1, dilation=dialtion, groups=cardinality, act="relu", name=name + "_conv2") if avd and avd_first == False and (stride > 1 or is_first): self.avg_pool2d_2 = AvgPool2d( kernel_size=3, stride=stride, padding=1) self.conv3 = ConvBNLayer( num_channels=group_width, num_filters=planes * 4, filter_size=1, stride=1, groups=1, act=None, name=name + "_conv3") if stride != 1 or self.inplanes != self.planes * 4: if avg_down: if dilation == 1: self.avg_pool2d_3 = AvgPool2d( kernel_size=stride, stride=stride, padding=0) else: self.avg_pool2d_3 = AvgPool2d( kernel_size=1, stride=1, padding=0, ceil_mode=True) self.conv4 = Conv2d( in_channels=self.inplanes, out_channels=planes * 4, kernel_size=1, stride=1, padding=0, groups=1, weight_attr=ParamAttr( name=name + "_weights", initializer=MSRA()), bias_attr=False) else: self.conv4 = Conv2d( in_channels=self.inplanes, out_channels=planes * 4, kernel_size=1, stride=stride, padding=0, groups=1, weight_attr=ParamAttr( name=name + "_shortcut_weights", initializer=MSRA()), bias_attr=False) bn_decay = 0.0 self._batch_norm = BatchNorm( planes * 4, act=None, param_attr=ParamAttr( name=name + "_shortcut_scale", regularizer=L2Decay(regularization_coeff=bn_decay)), bias_attr=ParamAttr( name + "_shortcut_offset", regularizer=L2Decay(regularization_coeff=bn_decay)), moving_mean_name=name + "_shortcut_mean", moving_variance_name=name + "_shortcut_variance") def forward(self, x): short = x x = self.conv1(x) if self.avd and self.avd_first and (self.stride > 1 or self.is_first): x = self.avg_pool2d_1(x) x = self.conv2(x) if self.avd and self.avd_first == False and (self.stride > 1 or self.is_first): x = self.avg_pool2d_2(x) x = self.conv3(x) if self.stride != 1 or self.inplanes != self.planes * 4: if self.avg_down: short = self.avg_pool2d_3(short) short = self.conv4(short) short = self._batch_norm(short) y = paddle.elementwise_add(x=short, y=x, act="relu") return y class ResNeStLayer(nn.Layer): def __init__(self, inplanes, planes, blocks, radix, cardinality, bottleneck_width, avg_down, avd, avd_first, rectify_avg, last_gamma, stride=1, dilation=1, is_first=True, name=None): super(ResNeStLayer, self).__init__() self.inplanes = inplanes self.planes = planes self.blocks = blocks self.radix = radix self.cardinality = cardinality self.bottleneck_width = bottleneck_width self.avg_down = avg_down self.avd = avd self.avd_first = avd_first self.rectify_avg = rectify_avg self.last_gamma = last_gamma self.is_first = is_first if dilation == 1 or dilation == 2: bottleneck_func = self.add_sublayer( name + "_bottleneck_0", BottleneckBlock( inplanes=self.inplanes, planes=planes, stride=stride, radix=radix, cardinality=cardinality, bottleneck_width=bottleneck_width, avg_down=self.avg_down, avd=avd, avd_first=avd_first, dilation=1, is_first=is_first, rectify_avg=rectify_avg, last_gamma=last_gamma, name=name + "_bottleneck_0")) elif dilation == 4: bottleneck_func = self.add_sublayer( name + "_bottleneck_0", BottleneckBlock( inplanes=self.inplanes, planes=planes, stride=stride, radix=radix, cardinality=cardinality, bottleneck_width=bottleneck_width, avg_down=self.avg_down, avd=avd, avd_first=avd_first, dilation=2, is_first=is_first, rectify_avg=rectify_avg, last_gamma=last_gamma, name=name + "_bottleneck_0")) else: raise RuntimeError("=>unknown dilation size") self.inplanes = planes * 4 self.bottleneck_block_list = [bottleneck_func] for i in range(1, blocks): name = name + "_bottleneck_" + str(i) bottleneck_func = self.add_sublayer( name, BottleneckBlock( inplanes=self.inplanes, planes=planes, radix=radix, cardinality=cardinality, bottleneck_width=bottleneck_width, avg_down=self.avg_down, avd=avd, avd_first=avd_first, dilation=dilation, rectify_avg=rectify_avg, last_gamma=last_gamma, name=name)) self.bottleneck_block_list.append(bottleneck_func) def forward(self, x): for bottleneck_block in self.bottleneck_block_list: x = bottleneck_block(x) return x class ResNeSt(nn.Layer): def __init__(self, layers, radix=1, groups=1, bottleneck_width=64, dilated=False, dilation=1, deep_stem=False, stem_width=64, avg_down=False, rectify_avg=False, avd=False, avd_first=False, final_drop=0.0, last_gamma=False, class_dim=1000): super(ResNeSt, self).__init__() self.cardinality = groups self.bottleneck_width = bottleneck_width # ResNet-D params self.inplanes = stem_width * 2 if deep_stem else 64 self.avg_down = avg_down self.last_gamma = last_gamma # ResNeSt params self.radix = radix self.avd = avd self.avd_first = avd_first self.deep_stem = deep_stem self.stem_width = stem_width self.layers = layers self.final_drop = final_drop self.dilated = dilated self.dilation = dilation self.rectify_avg = rectify_avg if self.deep_stem: self.stem = nn.Sequential( ("conv1", ConvBNLayer( num_channels=3, num_filters=stem_width, filter_size=3, stride=2, act="relu", name="conv1")), ("conv2", ConvBNLayer( num_channels=stem_width, num_filters=stem_width, filter_size=3, stride=1, act="relu", name="conv2")), ("conv3", ConvBNLayer( num_channels=stem_width, num_filters=stem_width * 2, filter_size=3, stride=1, act="relu", name="conv3"))) else: self.stem = ConvBNLayer( num_channels=3, num_filters=stem_width, filter_size=7, stride=2, act="relu", name="conv1") self.max_pool2d = MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = ResNeStLayer( inplanes=self.stem_width * 2 if self.deep_stem else self.stem_width, planes=64, blocks=self.layers[0], radix=radix, cardinality=self.cardinality, bottleneck_width=bottleneck_width, avg_down=self.avg_down, avd=avd, avd_first=avd_first, rectify_avg=rectify_avg, last_gamma=last_gamma, stride=1, dilation=1, is_first=False, name="layer1") # return self.layer2 = ResNeStLayer( inplanes=256, planes=128, blocks=self.layers[1], radix=radix, cardinality=self.cardinality, bottleneck_width=bottleneck_width, avg_down=self.avg_down, avd=avd, avd_first=avd_first, rectify_avg=rectify_avg, last_gamma=last_gamma, stride=2, name="layer2") if self.dilated or self.dilation == 4: self.layer3 = ResNeStLayer( inplanes=512, planes=256, blocks=self.layers[2], radix=radix, cardinality=self.cardinality, bottleneck_width=bottleneck_width, avg_down=self.avg_down, avd=avd, avd_first=avd_first, rectify_avg=rectify_avg, last_gamma=last_gamma, stride=1, dilation=2, name="layer3") self.layer4 = ResNeStLayer( inplanes=1024, planes=512, blocks=self.layers[3], radix=radix, cardinality=self.cardinality, bottleneck_width=bottleneck_width, avg_down=self.avg_down, avd=avd, avd_first=avd_first, rectify_avg=rectify_avg, last_gamma=last_gamma, stride=1, dilation=4, name="layer4") elif self.dilation == 2: self.layer3 = ResNeStLayer( inplanes=512, planes=256, blocks=self.layers[2], radix=radix, cardinality=self.cardinality, bottleneck_width=bottleneck_width, avg_down=self.avg_down, avd=avd, avd_first=avd_first, rectify_avg=rectify_avg, last_gamma=last_gamma, stride=2, dilation=1, name="layer3") self.layer4 = ResNeStLayer( inplanes=1024, planes=512, blocks=self.layers[3], radix=radix, cardinality=self.cardinality, bottleneck_width=bottleneck_width, avg_down=self.avg_down, avd=avd, avd_first=avd_first, rectify_avg=rectify_avg, last_gamma=last_gamma, stride=1, dilation=2, name="layer4") else: self.layer3 = ResNeStLayer( inplanes=512, planes=256, blocks=self.layers[2], radix=radix, cardinality=self.cardinality, bottleneck_width=bottleneck_width, avg_down=self.avg_down, avd=avd, avd_first=avd_first, rectify_avg=rectify_avg, last_gamma=last_gamma, stride=2, name="layer3") self.layer4 = ResNeStLayer( inplanes=1024, planes=512, blocks=self.layers[3], radix=radix, cardinality=self.cardinality, bottleneck_width=bottleneck_width, avg_down=self.avg_down, avd=avd, avd_first=avd_first, rectify_avg=rectify_avg, last_gamma=last_gamma, stride=2, name="layer4") self.pool2d_avg = AdaptiveAvgPool2d(1) self.out_channels = 2048 stdv = 1.0 / math.sqrt(self.out_channels * 1.0) self.out = Linear( self.out_channels, class_dim, weight_attr=ParamAttr( initializer=nn.initializer.Uniform(-stdv, stdv), name="fc_weights"), bias_attr=ParamAttr(name="fc_offset")) def forward(self, x): x = self.stem(x) x = self.max_pool2d(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.pool2d_avg(x) x = paddle.reshape(x, shape=[-1, self.out_channels]) x = self.out(x) return x def ResNeSt50_fast_1s1x64d(**args): model = ResNeSt( layers=[3, 4, 6, 3], radix=1, groups=1, bottleneck_width=64, deep_stem=True, stem_width=32, avg_down=True, avd=True, avd_first=True, final_drop=0.0, **args) return model def ResNeSt50(**args): model = ResNeSt( layers=[3, 4, 6, 3], radix=2, groups=1, bottleneck_width=64, deep_stem=True, stem_width=32, avg_down=True, avd=True, avd_first=False, final_drop=0.0, **args) return model