From a950ec42b0d3283edf052df827ad7bda4b90b00d Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Thu, 10 Jun 2021 23:15:40 +0800 Subject: [PATCH] add the codes of TNT, HarDNet, RedNet and DLA models --- ppcls/arch/backbone/__init__.py | 4 + ppcls/arch/backbone/model_zoo/dla.py | 451 +++++++++++++++++++++++ ppcls/arch/backbone/model_zoo/hardnet.py | 248 +++++++++++++ ppcls/arch/backbone/model_zoo/rednet.py | 189 ++++++++++ ppcls/arch/backbone/model_zoo/tnt.py | 301 +++++++++++++++ 5 files changed, 1193 insertions(+) create mode 100644 ppcls/arch/backbone/model_zoo/dla.py create mode 100644 ppcls/arch/backbone/model_zoo/hardnet.py create mode 100644 ppcls/arch/backbone/model_zoo/rednet.py create mode 100644 ppcls/arch/backbone/model_zoo/tnt.py diff --git a/ppcls/arch/backbone/__init__.py b/ppcls/arch/backbone/__init__.py index de00c2a2..256a9950 100644 --- a/ppcls/arch/backbone/__init__.py +++ b/ppcls/arch/backbone/__init__.py @@ -47,4 +47,8 @@ from ppcls.arch.backbone.model_zoo.distillation_models import ResNet50_vd_distil from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384 from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L from ppcls.arch.backbone.model_zoo.rexnet import ReXNet_1_0, ReXNet_1_3, ReXNet_1_5, ReXNet_2_0, ReXNet_3_0 +from ppcls.arch.backbone.model_zoo.dla import DLA34, DLA46_c, DLA46x_c, DLA60, DLA60x, DLA60x_c, DLA102, DLA102x, DLA102x2, DLA169 +from ppcls.arch.backbone.model_zoo.rednet import RedNet26, RedNet38, RedNet50, RedNet101, RedNet152 +from ppcls.arch.backbone.model_zoo.tnt import TNT_small +from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet39_ds, HarDNet68_ds from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1 diff --git a/ppcls/arch/backbone/model_zoo/dla.py b/ppcls/arch/backbone/model_zoo/dla.py new file mode 100644 index 00000000..3b75bf27 --- /dev/null +++ b/ppcls/arch/backbone/model_zoo/dla.py @@ -0,0 +1,451 @@ +import math + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddle.nn.initializer import Normal, Constant + +from ppcls.arch.backbone.base.theseus_layer import Identity +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + + +MODEL_URLS = { + "DLA34": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA34_pretrained.pdparams", + "DLA46_c": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA46_c_pretrained.pdparams", + "DLA46x_c": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA46x_c_pretrained.pdparams", + "DLA60": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA60_pretrained.pdparams", + "DLA60x": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA60x_pretrained.pdparams", + "DLA60x_c": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA60x_c_pretrained.pdparams", + "DLA102": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA102_pretrained.pdparams", + "DLA102x": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA102x_pretrained.pdparams", + "DLA102x2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA102x2_pretrained.pdparams", + "DLA169": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA169_pretrained.pdparams" +} + + +__all__ = MODEL_URLS.keys() + + +zeros_ = Constant(value=0.) +ones_ = Constant(value=1.) + + +class DlaBasic(nn.Layer): + def __init__(self, inplanes, planes, stride=1, dilation=1, **cargs): + super(DlaBasic, self).__init__() + self.conv1 = nn.Conv2D( + inplanes, planes, kernel_size=3, stride=stride, + padding=dilation, bias_attr=False, dilation=dilation + ) + self.bn1 = nn.BatchNorm2D(planes) + self.relu = nn.ReLU() + self.conv2 = nn.Conv2D( + planes, planes, kernel_size=3, stride=1, + padding=dilation, bias_attr=False, dilation=dilation + ) + self.bn2 = nn.BatchNorm2D(planes) + self.stride = stride + + def forward(self, x, residual=None): + if residual is None: + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + out += residual + out = self.relu(out) + + return out + + +class DlaBottleneck(nn.Layer): + expansion = 2 + + def __init__(self, inplanes, outplanes, stride=1, + dilation=1, cardinality=1, base_width=64): + super(DlaBottleneck, self).__init__() + self.stride = stride + mid_planes = int(math.floor( + outplanes * (base_width / 64)) * cardinality) + mid_planes = mid_planes // self.expansion + + self.conv1 = nn.Conv2D(inplanes, mid_planes, kernel_size=1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(mid_planes) + self.conv2 = nn.Conv2D( + mid_planes, mid_planes, kernel_size=3, + stride=stride, padding=dilation, bias_attr=False, + dilation=dilation, groups=cardinality + ) + self.bn2 = nn.BatchNorm2D(mid_planes) + self.conv3 = nn.Conv2D(mid_planes, outplanes, kernel_size=1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(outplanes) + self.relu = nn.ReLU() + + def forward(self, x, residual=None): + if residual is None: + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + out += residual + out = self.relu(out) + + return out + + +class DlaRoot(nn.Layer): + def __init__(self, in_channels, out_channels, kernel_size, residual): + super(DlaRoot, self).__init__() + self.conv = nn.Conv2D( + in_channels, out_channels, 1, stride=1, + bias_attr=False, padding=(kernel_size - 1) // 2 + ) + self.bn = nn.BatchNorm2D(out_channels) + self.relu = nn.ReLU() + self.residual = residual + + def forward(self, *x): + children = x + x = self.conv(paddle.concat(x, 1)) + x = self.bn(x) + if self.residual: + x += children[0] + x = self.relu(x) + + return x + + +class DlaTree(nn.Layer): + def __init__(self, levels, block, in_channels, out_channels, + stride=1,dilation=1, cardinality=1, base_width=64, + level_root=False, root_dim=0, root_kernel_size=1, + root_residual=False): + super(DlaTree, self).__init__() + if root_dim == 0: + root_dim = 2 * out_channels + if level_root: + root_dim += in_channels + + self.downsample = nn.MaxPool2D( + stride, stride=stride) if stride > 1 else Identity() + self.project = Identity() + cargs = dict(dilation=dilation, cardinality=cardinality, base_width=base_width) + + if levels == 1: + self.tree1 = block(in_channels, out_channels, stride, **cargs) + self.tree2 = block(out_channels, out_channels, 1, **cargs) + if in_channels != out_channels: + self.project = nn.Sequential( + nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, bias_attr=False), + nn.BatchNorm2D(out_channels)) + else: + cargs.update(dict(root_kernel_size=root_kernel_size, root_residual=root_residual)) + self.tree1 = DlaTree( + levels - 1, block, in_channels, + out_channels, stride, root_dim=0, **cargs + ) + self.tree2 = DlaTree( + levels - 1, block, out_channels, + out_channels, root_dim=root_dim + out_channels, **cargs + ) + + if levels == 1: + self.root = DlaRoot(root_dim, out_channels, root_kernel_size, root_residual) + + self.level_root = level_root + self.root_dim = root_dim + self.levels = levels + + def forward(self, x, residual=None, children=None): + children = [] if children is None else children + bottom = self.downsample(x) + residual = self.project(bottom) + + if self.level_root: + children.append(bottom) + x1 = self.tree1(x, residual) + + if self.levels == 1: + x2 = self.tree2(x1) + x = self.root(x2, x1, *children) + else: + children.append(x1) + x = self.tree2(x1, children=children) + return x + + +class DLA(nn.Layer): + def __init__(self, levels, channels, in_chans=3, cardinality=1, + base_width=64, block=DlaBottleneck, residual_root=False, + drop_rate=0.0, class_dim=1000, with_pool=True): + super(DLA, self).__init__() + self.channels = channels + self.class_dim = class_dim + self.with_pool = with_pool + self.cardinality = cardinality + self.base_width = base_width + self.drop_rate = drop_rate + + self.base_layer = nn.Sequential( + nn.Conv2D( + in_chans, channels[0], kernel_size=7, + stride=1, padding=3, bias_attr=False + ), + nn.BatchNorm2D(channels[0]), + nn.ReLU()) + + self.level0 = self._make_conv_level(channels[0], channels[0], levels[0]) + self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], stride=2) + + cargs = dict( + cardinality=cardinality, + base_width=base_width, + root_residual=residual_root + ) + + self.level2 = DlaTree( + levels[2], block, channels[1], + channels[2], 2, level_root=False, **cargs + ) + self.level3 = DlaTree( + levels[3], block, channels[2], + channels[3], 2, level_root=True, **cargs + ) + self.level4 = DlaTree( + levels[4], block, channels[3], + channels[4], 2, level_root=True, **cargs + ) + self.level5 = DlaTree( + levels[5], block, channels[4], + channels[5], 2, level_root=True, **cargs + ) + + self.feature_info = [ + # rare to have a meaningful stride 1 level + dict(num_chs=channels[0], reduction=1, module='level0'), + dict(num_chs=channels[1], reduction=2, module='level1'), + dict(num_chs=channels[2], reduction=4, module='level2'), + dict(num_chs=channels[3], reduction=8, module='level3'), + dict(num_chs=channels[4], reduction=16, module='level4'), + dict(num_chs=channels[5], reduction=32, module='level5'), + ] + + self.num_features = channels[-1] + + if with_pool: + self.global_pool = nn.AdaptiveAvgPool2D(1) + + if class_dim > 0: + self.fc = nn.Conv2D(self.num_features, class_dim, 1) + + for m in self.sublayers(): + if isinstance(m, nn.Conv2D): + n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels + normal_ = Normal(mean=0.0, std=math.sqrt(2. / n)) + normal_(m.weight) + elif isinstance(m, nn.BatchNorm2D): + ones_(m.weight) + zeros_(m.bias) + + def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1): + modules = [] + for i in range(convs): + modules.extend([ + nn.Conv2D( + inplanes, planes, kernel_size=3, + stride=stride if i == 0 else 1, + padding=dilation, bias_attr=False, dilation=dilation + ), + nn.BatchNorm2D(planes), + nn.ReLU()]) + inplanes = planes + return nn.Sequential(*modules) + + def forward_features(self, x): + x = self.base_layer(x) + + x = self.level0(x) + x = self.level1(x) + x = self.level2(x) + x = self.level3(x) + x = self.level4(x) + x = self.level5(x) + + return x + + def forward(self, x): + x = self.forward_features(x) + + if self.with_pool: + x = self.global_pool(x) + + if self.drop_rate > 0.: + x = F.dropout(x, p=self.drop_rate, training=self.training) + + if self.class_dim > 0: + x = self.fc(x) + x = x.flatten(1) + + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def DLA34(pretrained=False, **kwargs): + model = DLA( + levels=(1, 1, 1, 2, 2, 1), + channels=(16, 32, 64, 128, 256, 512), + block=DlaBasic, + **kwargs + ) + _load_pretrained(pretrained, model, MODEL_URLS["DLA34"]) + return model + + +def DLA46_c(pretrained=False, **kwargs): + model = DLA( + levels=(1, 1, 1, 2, 2, 1), + channels=(16, 32, 64, 64, 128, 256), + block=DlaBottleneck, + **kwargs + ) + _load_pretrained(pretrained, model, MODEL_URLS["DLA46_c"]) + return model + + +def DLA46x_c(pretrained=False, **kwargs): + model = DLA( + levels=(1, 1, 1, 2, 2, 1), + channels=(16, 32, 64, 64, 128, 256), + block=DlaBottleneck, + cardinality=32, + base_width=4, + **kwargs + ) + _load_pretrained(pretrained, model, MODEL_URLS["DLA46x_c"]) + return model + + +def DLA60(pretrained=False, **kwargs): + model = DLA( + levels=(1, 1, 1, 2, 3, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + **kwargs + ) + _load_pretrained(pretrained, model, MODEL_URLS["DLA60"]) + return model + + +def DLA60x(pretrained=False, **kwargs): + model = DLA( + levels=(1, 1, 1, 2, 3, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + cardinality=32, + base_width=4, + **kwargs + ) + _load_pretrained(pretrained, model, MODEL_URLS["DLA60x"]) + return model + + +def DLA60x_c(pretrained=False, **kwargs): + model = DLA( + levels=(1, 1, 1, 2, 3, 1), + channels=(16, 32, 64, 64, 128, 256), + block=DlaBottleneck, + cardinality=32, + base_width=4, + **kwargs + ) + _load_pretrained(pretrained, model, MODEL_URLS["DLA60x_c"]) + return model + + +def DLA102(pretrained=False, **kwargs): + model = DLA( + levels=(1, 1, 1, 3, 4, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + residual_root=True, + **kwargs + ) + _load_pretrained(pretrained, model, MODEL_URLS["DLA102"]) + return model + + +def DLA102x(pretrained=False, **kwargs): + model = DLA( + levels=(1, 1, 1, 3, 4, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + cardinality=32, + base_width=4, + residual_root=True, + **kwargs + ) + _load_pretrained(pretrained, model, MODEL_URLS["DLA102x"]) + return model + + +def DLA102x2(pretrained=False, **kwargs): + model = DLA( + levels=(1, 1, 1, 3, 4, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + cardinality=64, + base_width=4, + residual_root=True, + **kwargs + ) + _load_pretrained(pretrained, model, MODEL_URLS["DLA102x2"]) + return model + + +def DLA169(pretrained=False, **kwargs): + model = DLA( + levels=(1, 1, 2, 3, 5, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + residual_root=True, + **kwargs + ) + _load_pretrained(pretrained, model, MODEL_URLS["DLA169"]) + return model diff --git a/ppcls/arch/backbone/model_zoo/hardnet.py b/ppcls/arch/backbone/model_zoo/hardnet.py new file mode 100644 index 00000000..006db856 --- /dev/null +++ b/ppcls/arch/backbone/model_zoo/hardnet.py @@ -0,0 +1,248 @@ +import paddle +import paddle.nn as nn + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + + +MODEL_URLS = { + 'HarDNet39_ds': + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet39_ds_pretrained.pdparams', + 'HarDNet68_ds': + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet68_ds_pretrained.pdparams', + 'HarDNet68': + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet68_pretrained.pdparams', + 'HarDNet85': + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet85_pretrained.pdparams' +} + + +def ConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False): + layer = nn.Sequential( + ('conv', nn.Conv2D( + in_channels, out_channels, kernel_size=kernel_size, + stride=stride, padding=kernel_size//2, groups=1, bias_attr=bias_attr + )), + ('norm', nn.BatchNorm2D(out_channels)), + ('relu', nn.ReLU6()) + ) + return layer + + +def DWConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False): + layer = nn.Sequential( + ('dwconv', nn.Conv2D( + in_channels, out_channels, kernel_size=kernel_size, + stride=stride, padding=1, groups=out_channels, bias_attr=bias_attr + )), + ('norm', nn.BatchNorm2D(out_channels)) + ) + return layer + + +def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1): + layer = nn.Sequential( + ('layer1', ConvLayer(in_channels, out_channels, kernel_size=kernel_size)), + ('layer2', DWConvLayer(out_channels, out_channels, stride=stride)) + ) + return layer + + +class HarDBlock(nn.Layer): + def __init__(self, in_channels, growth_rate, grmul, n_layers, + keepBase=False, residual_out=False, dwconv=False): + super().__init__() + self.keepBase = keepBase + self.links = [] + layers_ = [] + self.out_channels = 0 # if upsample else in_channels + for i in range(n_layers): + outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul) + self.links.append(link) + if dwconv: + layers_.append(CombConvLayer(inch, outch)) + else: + layers_.append(ConvLayer(inch, outch)) + + if (i % 2 == 0) or (i == n_layers - 1): + self.out_channels += outch + # print("Blk out =",self.out_channels) + self.layers = nn.LayerList(layers_) + + def get_link(self, layer, base_ch, growth_rate, grmul): + if layer == 0: + return base_ch, 0, [] + out_channels = growth_rate + + link = [] + for i in range(10): + dv = 2 ** i + if layer % dv == 0: + k = layer - dv + link.append(k) + if i > 0: + out_channels *= grmul + + out_channels = int(int(out_channels + 1) / 2) * 2 + in_channels = 0 + + for i in link: + ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul) + in_channels += ch + + return out_channels, in_channels, link + + def forward(self, x): + layers_ = [x] + + for layer in range(len(self.layers)): + link = self.links[layer] + tin = [] + for i in link: + tin.append(layers_[i]) + if len(tin) > 1: + x = paddle.concat(tin, 1) + else: + x = tin[0] + out = self.layers[layer](x) + layers_.append(out) + + t = len(layers_) + out_ = [] + for i in range(t): + if (i == 0 and self.keepBase) or (i == t-1) or (i % 2 == 1): + out_.append(layers_[i]) + out = paddle.concat(out_, 1) + + return out + + +class HarDNet(nn.Layer): + def __init__(self, depth_wise=False, arch=85, + class_dim=1000, with_pool=True): + super().__init__() + first_ch = [32, 64] + second_kernel = 3 + max_pool = True + grmul = 1.7 + drop_rate = 0.1 + + # HarDNet68 + ch_list = [128, 256, 320, 640, 1024] + gr = [14, 16, 20, 40, 160] + n_layers = [8, 16, 16, 16, 4] + downSamp = [1, 0, 1, 1, 0] + + if arch == 85: + # HarDNet85 + first_ch = [48, 96] + ch_list = [192, 256, 320, 480, 720, 1280] + gr = [24, 24, 28, 36, 48, 256] + n_layers = [8, 16, 16, 16, 16, 4] + downSamp = [1, 0, 1, 0, 1, 0] + drop_rate = 0.2 + + elif arch == 39: + # HarDNet39 + first_ch = [24, 48] + ch_list = [96, 320, 640, 1024] + grmul = 1.6 + gr = [16, 20, 64, 160] + n_layers = [4, 16, 8, 4] + downSamp = [1, 1, 1, 0] + + if depth_wise: + second_kernel = 1 + max_pool = False + drop_rate = 0.05 + + blks = len(n_layers) + self.base = nn.LayerList([]) + + # First Layer: Standard Conv3x3, Stride=2 + self.base.append( + ConvLayer(in_channels=3, out_channels=first_ch[0], kernel_size=3, + stride=2, bias_attr=False)) + + # Second Layer + self.base.append( + ConvLayer(first_ch[0], first_ch[1], kernel_size=second_kernel)) + + # Maxpooling or DWConv3x3 downsampling + if max_pool: + self.base.append(nn.MaxPool2D(kernel_size=3, stride=2, padding=1)) + else: + self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2)) + + # Build all HarDNet blocks + ch = first_ch[1] + for i in range(blks): + blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise) + ch = blk.out_channels + self.base.append(blk) + + if i == blks-1 and arch == 85: + self.base.append(nn.Dropout(0.1)) + + self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1)) + ch = ch_list[i] + if downSamp[i] == 1: + if max_pool: + self.base.append(nn.MaxPool2D(kernel_size=2, stride=2)) + else: + self.base.append(DWConvLayer(ch, ch, stride=2)) + + ch = ch_list[blks-1] + + layers = [] + + if with_pool: + layers.append(nn.AdaptiveAvgPool2D((1, 1))) + + if class_dim > 0: + layers.append(nn.Flatten()) + layers.append(nn.Dropout(drop_rate)) + layers.append(nn.Linear(ch, class_dim)) + + self.base.append(nn.Sequential(*layers)) + + def forward(self, x): + for layer in self.base: + x = layer(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def HarDNet39_ds(pretrained=False, **kwargs): + model = HarDNet(arch=39, depth_wise=True, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HarDNet39_ds"]) + return model + + +def HarDNet68_ds(pretrained=False, **kwargs): + model = HarDNet(arch=68, depth_wise=True, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HarDNet68_ds"]) + return model + + +def HarDNet68(pretrained=False, **kwargs): + model = HarDNet(arch=68, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HarDNet68"]) + return model + + +def HarDNet85(pretrained=False, **kwargs): + model = HarDNet(arch=85, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HarDNet85"]) + return model \ No newline at end of file diff --git a/ppcls/arch/backbone/model_zoo/rednet.py b/ppcls/arch/backbone/model_zoo/rednet.py new file mode 100644 index 00000000..c3e037aa --- /dev/null +++ b/ppcls/arch/backbone/model_zoo/rednet.py @@ -0,0 +1,189 @@ +import paddle +import paddle.nn as nn + +from paddle.vision.models import resnet + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + + +MODEL_URLS = { + "RedNet26": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet26_pretrained.pdparams", + "RedNet38": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet38_pretrained.pdparams", + "RedNet50": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet50_pretrained.pdparams", + "RedNet101": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet101_pretrained.pdparams", + "RedNet152": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet152_pretrained.pdparams" +} + + +class Involution(nn.Layer): + def __init__(self, channels, kernel_size, stride): + super(Involution, self).__init__() + self.kernel_size = kernel_size + self.stride = stride + self.channels = channels + reduction_ratio = 4 + self.group_channels = 16 + self.groups = self.channels // self.group_channels + self.conv1 = nn.Sequential( + ('conv', nn.Conv2D( + in_channels=channels, + out_channels=channels // reduction_ratio, + kernel_size=1, + bias_attr=False + )), + ('bn', nn.BatchNorm2D(channels // reduction_ratio)), + ('activate', nn.ReLU()) + ) + self.conv2 = nn.Sequential( + ('conv', nn.Conv2D( + in_channels=channels // reduction_ratio, + out_channels=kernel_size**2 * self.groups, + kernel_size=1, + stride=1 + )) + ) + if stride > 1: + self.avgpool = nn.AvgPool2D(stride, stride) + + def forward(self, x): + weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x))) + b, c, h, w = weight.shape + weight = weight.reshape((b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2) + + out = nn.functional.unfold(x, self.kernel_size, self.stride, (self.kernel_size-1)//2, 1) + out = out.reshape((b, self.groups, self.group_channels, self.kernel_size**2, h, w)) + out = (weight * out).sum(axis=3).reshape((b, self.channels, h, w)) + return out + + +class BottleneckBlock(resnet.BottleneckBlock): + def __init__(self, inplanes, planes, stride=1, downsample=None, + groups=1, base_width=64, dilation=1, norm_layer=None): + super(BottleneckBlock, self).__init__( + inplanes, planes, stride, downsample, + groups, base_width, dilation, norm_layer + ) + width = int(planes * (base_width / 64.)) * groups + self.conv2 = Involution(width, 7, stride) + + +class RedNet(resnet.ResNet): + def __init__(self, block, depth, class_dim=1000, with_pool=True): + super(RedNet, self).__init__( + block=block, depth=50, + num_classes=class_dim, with_pool=with_pool + ) + layer_cfg = { + 26: [1, 2, 4, 1], + 38: [2, 3, 5, 2], + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + 152: [3, 8, 36, 3] + } + layers = layer_cfg[depth] + + self.conv1 = None + self.bn1 = None + self.relu = None + self.inplanes = 64 + self.class_dim = class_dim + self.stem = nn.Sequential( + nn.Sequential( + ('conv', nn.Conv2D( + in_channels=3, + out_channels=self.inplanes // 2, + kernel_size=3, + stride=2, + padding=1, + bias_attr=False + )), + ('bn', nn.BatchNorm2D(self.inplanes // 2)), + ('activate', nn.ReLU()) + ), + Involution(self.inplanes // 2, 3, 1), + nn.BatchNorm2D(self.inplanes // 2), + nn.ReLU(), + nn.Sequential( + ('conv', nn.Conv2D( + in_channels=self.inplanes // 2, + out_channels=self.inplanes, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False + )), + ('bn', nn.BatchNorm2D(self.inplanes)), + ('activate', nn.ReLU()) + ) + ) + + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + + def forward(self, x): + x = self.stem(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + if self.with_pool: + x = self.avgpool(x) + + if self.class_dim > 0: + x = paddle.flatten(x, 1) + x = self.fc(x) + + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def RedNet26(pretrained=False, **kwargs): + model = RedNet(BottleneckBlock, 26, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["RedNet26"]) + return model + + +def RedNet38(pretrained=False, **kwargs): + model = RedNet(BottleneckBlock, 38, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["RedNet38"]) + return model + + +def RedNet50(pretrained=False, **kwargs): + model = RedNet(BottleneckBlock, 50, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["RedNet50"]) + return model + + +def RedNet101(pretrained=False, **kwargs): + model = RedNet(BottleneckBlock, 101, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["RedNet101"]) + return model + + +def RedNet152(pretrained=False, **kwargs): + model = RedNet(BottleneckBlock, 152, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["RedNet152"]) + return model diff --git a/ppcls/arch/backbone/model_zoo/tnt.py b/ppcls/arch/backbone/model_zoo/tnt.py new file mode 100644 index 00000000..f95a4bcd --- /dev/null +++ b/ppcls/arch/backbone/model_zoo/tnt.py @@ -0,0 +1,301 @@ +import math +import numpy as np + +import paddle +import paddle.nn as nn + +from paddle.nn.initializer import TruncatedNormal, Constant + +from ppcls.arch.backbone.base.theseus_layer import Identity +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + + +MODEL_URLS = { + "TNT_small": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/TNT_small_pretrained.pdparams" +} + + +trunc_normal_ = TruncatedNormal(std=.02) +zeros_ = Constant(value=0.) +ones_ = Constant(value=1.) + + +def drop_path(x, drop_prob=0., training=False): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... + """ + if drop_prob == 0. or not training: + return x + keep_prob = paddle.to_tensor(1 - drop_prob) + shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) + random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) + random_tensor = paddle.floor(random_tensor) # binarize + output = x.divide(keep_prob) * random_tensor + return output + + +class DropPath(nn.Layer): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + """ + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + +class Mlp(nn.Layer): + def __init__(self, in_features, hidden_features=None, + out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Layer): + def __init__(self, dim, hidden_dim, num_heads=8, + qkv_bias=False, attn_drop=0., proj_drop=0.): + super().__init__() + self.hidden_dim = hidden_dim + self.num_heads = num_heads + head_dim = hidden_dim // num_heads + self.head_dim = head_dim + self.scale = head_dim ** -0.5 + + self.qk = nn.Linear(dim, hidden_dim * 2, bias_attr=qkv_bias) + self.v = nn.Linear(dim, dim, bias_attr=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + B, N, C = x.shape + qk = self.qk(x).reshape((B, N, 2, self.num_heads, self.head_dim)).transpose((2, 0, 3, 1, 4)) + + q, k = qk[0], qk[1] + v = self.v(x).reshape((B, N, self.num_heads, -1)).transpose((0, 2, 1, 3)) + + attn = (q @ k.transpose((0, 1, 3, 2))) * self.scale + attn = nn.functional.softmax(attn, axis=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose((0, 2, 1, 3)).reshape((B, N, -1)) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(nn.Layer): + def __init__(self, dim, in_dim, num_pixel, num_heads=12, in_num_head=4, mlp_ratio=4., + qkv_bias=False, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU, + norm_layer=nn.LayerNorm): + super().__init__() + # Inner transformer + self.norm_in = norm_layer(in_dim) + self.attn_in = Attention( + in_dim, in_dim, num_heads=in_num_head, + qkv_bias=qkv_bias, attn_drop=attn_drop, + proj_drop=drop + ) + + self.norm_mlp_in = norm_layer(in_dim) + self.mlp_in = Mlp( + in_features=in_dim, hidden_features=int(in_dim * 4), + out_features=in_dim, act_layer=act_layer, drop=drop + ) + + self.norm1_proj = norm_layer(in_dim) + self.proj = nn.Linear(in_dim * num_pixel, dim) + # Outer transformer + self.norm_out = norm_layer(dim) + self.attn_out = Attention( + dim, dim, num_heads=num_heads, qkv_bias=qkv_bias, + attn_drop=attn_drop, proj_drop=drop + ) + + self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity() + + self.norm_mlp = norm_layer(dim) + self.mlp = Mlp( + in_features=dim, hidden_features=int(dim * mlp_ratio), + out_features=dim, act_layer=act_layer, drop=drop + ) + + def forward(self, pixel_embed, patch_embed): + # inner + pixel_embed = pixel_embed + self.drop_path(self.attn_in(self.norm_in(pixel_embed))) + pixel_embed = pixel_embed + self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed))) + # outer + B, N, C = patch_embed.shape + patch_embed[:, 1:] = patch_embed[:, 1:] + self.proj(self.norm1_proj(pixel_embed).reshape((B, N - 1, -1))) + patch_embed = patch_embed + self.drop_path(self.attn_out(self.norm_out(patch_embed))) + patch_embed = patch_embed + self.drop_path(self.mlp(self.norm_mlp(patch_embed))) + return pixel_embed, patch_embed + + +class PixelEmbed(nn.Layer): + def __init__(self, img_size=224, patch_size=16, in_chans=3, in_dim=48, stride=4): + super().__init__() + num_patches = (img_size // patch_size) ** 2 + self.img_size = img_size + self.num_patches = num_patches + self.in_dim = in_dim + new_patch_size = math.ceil(patch_size / stride) + self.new_patch_size = new_patch_size + + self.proj = nn.Conv2D( + in_chans, self.in_dim, + kernel_size=7, padding=3, + stride=stride + ) + + def forward(self, x, pixel_pos): + B, C, H, W = x.shape + assert H == self.img_size and W == self.img_size, f"Input image size ({H}*{W}) doesn't match model ({self.img_size}*{self.img_size})." + + x = self.proj(x) + x = nn.functional.unfold(x, self.new_patch_size, self.new_patch_size) + x = x.transpose((0, 2, 1)).reshape((B * self.num_patches, self.in_dim, self.new_patch_size, self.new_patch_size)) + x = x + pixel_pos + x = x.reshape((B * self.num_patches, self.in_dim, -1)).transpose((0, 2, 1)) + return x + + +class TNT(nn.Layer): + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, in_dim=48, depth=12, + num_heads=12, in_num_head=4, mlp_ratio=4., qkv_bias=False, drop_rate=0., attn_drop_rate=0., + drop_path_rate=0., norm_layer=nn.LayerNorm, first_stride=4, class_dim=1000): + super().__init__() + self.class_dim = class_dim + # num_features for consistency with other models + self.num_features = self.embed_dim = embed_dim + + self.pixel_embed = PixelEmbed( + img_size=img_size, patch_size=patch_size, + in_chans=in_chans, in_dim=in_dim, stride=first_stride + ) + num_patches = self.pixel_embed.num_patches + self.num_patches = num_patches + new_patch_size = self.pixel_embed.new_patch_size + num_pixel = new_patch_size ** 2 + + self.norm1_proj = norm_layer(num_pixel * in_dim) + self.proj = nn.Linear(num_pixel * in_dim, embed_dim) + self.norm2_proj = norm_layer(embed_dim) + + self.cls_token = self.create_parameter( + shape=(1, 1, embed_dim), + default_initializer=zeros_ + ) + self.add_parameter("cls_token", self.cls_token) + + self.patch_pos = self.create_parameter( + shape=(1, num_patches + 1, embed_dim), + default_initializer=zeros_ + ) + self.add_parameter("patch_pos", self.patch_pos) + + self.pixel_pos = self.create_parameter( + shape=(1, in_dim, new_patch_size, new_patch_size), + default_initializer=zeros_ + ) + self.add_parameter("pixel_pos", self.pixel_pos) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth decay rule + dpr = np.linspace(0, drop_path_rate, depth) + + blocks = [] + for i in range(depth): + blocks.append(Block( + dim=embed_dim, in_dim=in_dim, num_pixel=num_pixel, num_heads=num_heads, + in_num_head=in_num_head, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], + norm_layer=norm_layer + )) + self.blocks = nn.LayerList(blocks) + self.norm = norm_layer(embed_dim) + + if class_dim > 0: + self.head = nn.Linear(embed_dim, class_dim) + + trunc_normal_(self.cls_token) + trunc_normal_(self.patch_pos) + trunc_normal_(self.pixel_pos) + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + zeros_(m.bias) + ones_(m.weight) + + def forward_features(self, x): + B = x.shape[0] + pixel_embed = self.pixel_embed(x, self.pixel_pos) + + patch_embed = self.norm2_proj(self.proj(self.norm1_proj(pixel_embed.reshape((B, self.num_patches, -1))))) + patch_embed = paddle.concat((self.cls_token.expand((B, -1, -1)), patch_embed), axis=1) + patch_embed = patch_embed + self.patch_pos + patch_embed = self.pos_drop(patch_embed) + + for blk in self.blocks: + pixel_embed, patch_embed = blk(pixel_embed, patch_embed) + + patch_embed = self.norm(patch_embed) + return patch_embed[:, 0] + + def forward(self, x): + x = self.forward_features(x) + + if self.class_dim > 0: + x = self.head(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def TNT_small(pretrained=False, **kwargs): + model = TNT( + patch_size=16, + embed_dim=384, + in_dim=24, + depth=12, + num_heads=6, + in_num_head=4, + qkv_bias=False, + **kwargs + ) + _load_pretrained(pretrained, model, MODEL_URLS["TNT_small"]) + return model -- GitLab