提交 a950ec42 编写于 作者: jm_12138's avatar jm_12138

add the codes of TNT, HarDNet, RedNet and DLA models

上级 f4f09840
......@@ -47,4 +47,8 @@ from ppcls.arch.backbone.model_zoo.distillation_models import ResNet50_vd_distil
from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384
from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L
from ppcls.arch.backbone.model_zoo.rexnet import ReXNet_1_0, ReXNet_1_3, ReXNet_1_5, ReXNet_2_0, ReXNet_3_0
from ppcls.arch.backbone.model_zoo.dla import DLA34, DLA46_c, DLA46x_c, DLA60, DLA60x, DLA60x_c, DLA102, DLA102x, DLA102x2, DLA169
from ppcls.arch.backbone.model_zoo.rednet import RedNet26, RedNet38, RedNet50, RedNet101, RedNet152
from ppcls.arch.backbone.model_zoo.tnt import TNT_small
from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet39_ds, HarDNet68_ds
from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1
import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal, Constant
from ppcls.arch.backbone.base.theseus_layer import Identity
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"DLA34":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA34_pretrained.pdparams",
"DLA46_c":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA46_c_pretrained.pdparams",
"DLA46x_c":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA46x_c_pretrained.pdparams",
"DLA60":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA60_pretrained.pdparams",
"DLA60x":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA60x_pretrained.pdparams",
"DLA60x_c":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA60x_c_pretrained.pdparams",
"DLA102":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA102_pretrained.pdparams",
"DLA102x":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA102x_pretrained.pdparams",
"DLA102x2":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA102x2_pretrained.pdparams",
"DLA169":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA169_pretrained.pdparams"
}
__all__ = MODEL_URLS.keys()
zeros_ = Constant(value=0.)
ones_ = Constant(value=1.)
class DlaBasic(nn.Layer):
def __init__(self, inplanes, planes, stride=1, dilation=1, **cargs):
super(DlaBasic, self).__init__()
self.conv1 = nn.Conv2D(
inplanes, planes, kernel_size=3, stride=stride,
padding=dilation, bias_attr=False, dilation=dilation
)
self.bn1 = nn.BatchNorm2D(planes)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2D(
planes, planes, kernel_size=3, stride=1,
padding=dilation, bias_attr=False, dilation=dilation
)
self.bn2 = nn.BatchNorm2D(planes)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += residual
out = self.relu(out)
return out
class DlaBottleneck(nn.Layer):
expansion = 2
def __init__(self, inplanes, outplanes, stride=1,
dilation=1, cardinality=1, base_width=64):
super(DlaBottleneck, self).__init__()
self.stride = stride
mid_planes = int(math.floor(
outplanes * (base_width / 64)) * cardinality)
mid_planes = mid_planes // self.expansion
self.conv1 = nn.Conv2D(inplanes, mid_planes, kernel_size=1, bias_attr=False)
self.bn1 = nn.BatchNorm2D(mid_planes)
self.conv2 = nn.Conv2D(
mid_planes, mid_planes, kernel_size=3,
stride=stride, padding=dilation, bias_attr=False,
dilation=dilation, groups=cardinality
)
self.bn2 = nn.BatchNorm2D(mid_planes)
self.conv3 = nn.Conv2D(mid_planes, outplanes, kernel_size=1, bias_attr=False)
self.bn3 = nn.BatchNorm2D(outplanes)
self.relu = nn.ReLU()
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class DlaRoot(nn.Layer):
def __init__(self, in_channels, out_channels, kernel_size, residual):
super(DlaRoot, self).__init__()
self.conv = nn.Conv2D(
in_channels, out_channels, 1, stride=1,
bias_attr=False, padding=(kernel_size - 1) // 2
)
self.bn = nn.BatchNorm2D(out_channels)
self.relu = nn.ReLU()
self.residual = residual
def forward(self, *x):
children = x
x = self.conv(paddle.concat(x, 1))
x = self.bn(x)
if self.residual:
x += children[0]
x = self.relu(x)
return x
class DlaTree(nn.Layer):
def __init__(self, levels, block, in_channels, out_channels,
stride=1,dilation=1, cardinality=1, base_width=64,
level_root=False, root_dim=0, root_kernel_size=1,
root_residual=False):
super(DlaTree, self).__init__()
if root_dim == 0:
root_dim = 2 * out_channels
if level_root:
root_dim += in_channels
self.downsample = nn.MaxPool2D(
stride, stride=stride) if stride > 1 else Identity()
self.project = Identity()
cargs = dict(dilation=dilation, cardinality=cardinality, base_width=base_width)
if levels == 1:
self.tree1 = block(in_channels, out_channels, stride, **cargs)
self.tree2 = block(out_channels, out_channels, 1, **cargs)
if in_channels != out_channels:
self.project = nn.Sequential(
nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, bias_attr=False),
nn.BatchNorm2D(out_channels))
else:
cargs.update(dict(root_kernel_size=root_kernel_size, root_residual=root_residual))
self.tree1 = DlaTree(
levels - 1, block, in_channels,
out_channels, stride, root_dim=0, **cargs
)
self.tree2 = DlaTree(
levels - 1, block, out_channels,
out_channels, root_dim=root_dim + out_channels, **cargs
)
if levels == 1:
self.root = DlaRoot(root_dim, out_channels, root_kernel_size, root_residual)
self.level_root = level_root
self.root_dim = root_dim
self.levels = levels
def forward(self, x, residual=None, children=None):
children = [] if children is None else children
bottom = self.downsample(x)
residual = self.project(bottom)
if self.level_root:
children.append(bottom)
x1 = self.tree1(x, residual)
if self.levels == 1:
x2 = self.tree2(x1)
x = self.root(x2, x1, *children)
else:
children.append(x1)
x = self.tree2(x1, children=children)
return x
class DLA(nn.Layer):
def __init__(self, levels, channels, in_chans=3, cardinality=1,
base_width=64, block=DlaBottleneck, residual_root=False,
drop_rate=0.0, class_dim=1000, with_pool=True):
super(DLA, self).__init__()
self.channels = channels
self.class_dim = class_dim
self.with_pool = with_pool
self.cardinality = cardinality
self.base_width = base_width
self.drop_rate = drop_rate
self.base_layer = nn.Sequential(
nn.Conv2D(
in_chans, channels[0], kernel_size=7,
stride=1, padding=3, bias_attr=False
),
nn.BatchNorm2D(channels[0]),
nn.ReLU())
self.level0 = self._make_conv_level(channels[0], channels[0], levels[0])
self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], stride=2)
cargs = dict(
cardinality=cardinality,
base_width=base_width,
root_residual=residual_root
)
self.level2 = DlaTree(
levels[2], block, channels[1],
channels[2], 2, level_root=False, **cargs
)
self.level3 = DlaTree(
levels[3], block, channels[2],
channels[3], 2, level_root=True, **cargs
)
self.level4 = DlaTree(
levels[4], block, channels[3],
channels[4], 2, level_root=True, **cargs
)
self.level5 = DlaTree(
levels[5], block, channels[4],
channels[5], 2, level_root=True, **cargs
)
self.feature_info = [
# rare to have a meaningful stride 1 level
dict(num_chs=channels[0], reduction=1, module='level0'),
dict(num_chs=channels[1], reduction=2, module='level1'),
dict(num_chs=channels[2], reduction=4, module='level2'),
dict(num_chs=channels[3], reduction=8, module='level3'),
dict(num_chs=channels[4], reduction=16, module='level4'),
dict(num_chs=channels[5], reduction=32, module='level5'),
]
self.num_features = channels[-1]
if with_pool:
self.global_pool = nn.AdaptiveAvgPool2D(1)
if class_dim > 0:
self.fc = nn.Conv2D(self.num_features, class_dim, 1)
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
normal_ = Normal(mean=0.0, std=math.sqrt(2. / n))
normal_(m.weight)
elif isinstance(m, nn.BatchNorm2D):
ones_(m.weight)
zeros_(m.bias)
def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
modules = []
for i in range(convs):
modules.extend([
nn.Conv2D(
inplanes, planes, kernel_size=3,
stride=stride if i == 0 else 1,
padding=dilation, bias_attr=False, dilation=dilation
),
nn.BatchNorm2D(planes),
nn.ReLU()])
inplanes = planes
return nn.Sequential(*modules)
def forward_features(self, x):
x = self.base_layer(x)
x = self.level0(x)
x = self.level1(x)
x = self.level2(x)
x = self.level3(x)
x = self.level4(x)
x = self.level5(x)
return x
def forward(self, x):
x = self.forward_features(x)
if self.with_pool:
x = self.global_pool(x)
if self.drop_rate > 0.:
x = F.dropout(x, p=self.drop_rate, training=self.training)
if self.class_dim > 0:
x = self.fc(x)
x = x.flatten(1)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def DLA34(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 2, 2, 1),
channels=(16, 32, 64, 128, 256, 512),
block=DlaBasic,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA34"])
return model
def DLA46_c(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 2, 2, 1),
channels=(16, 32, 64, 64, 128, 256),
block=DlaBottleneck,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA46_c"])
return model
def DLA46x_c(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 2, 2, 1),
channels=(16, 32, 64, 64, 128, 256),
block=DlaBottleneck,
cardinality=32,
base_width=4,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA46x_c"])
return model
def DLA60(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 2, 3, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA60"])
return model
def DLA60x(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 2, 3, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
cardinality=32,
base_width=4,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA60x"])
return model
def DLA60x_c(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 2, 3, 1),
channels=(16, 32, 64, 64, 128, 256),
block=DlaBottleneck,
cardinality=32,
base_width=4,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA60x_c"])
return model
def DLA102(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 3, 4, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
residual_root=True,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA102"])
return model
def DLA102x(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 3, 4, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
cardinality=32,
base_width=4,
residual_root=True,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA102x"])
return model
def DLA102x2(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 3, 4, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
cardinality=64,
base_width=4,
residual_root=True,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA102x2"])
return model
def DLA169(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 2, 3, 5, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
residual_root=True,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA169"])
return model
import paddle
import paddle.nn as nn
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
'HarDNet39_ds':
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet39_ds_pretrained.pdparams',
'HarDNet68_ds':
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet68_ds_pretrained.pdparams',
'HarDNet68':
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet68_pretrained.pdparams',
'HarDNet85':
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet85_pretrained.pdparams'
}
def ConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False):
layer = nn.Sequential(
('conv', nn.Conv2D(
in_channels, out_channels, kernel_size=kernel_size,
stride=stride, padding=kernel_size//2, groups=1, bias_attr=bias_attr
)),
('norm', nn.BatchNorm2D(out_channels)),
('relu', nn.ReLU6())
)
return layer
def DWConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False):
layer = nn.Sequential(
('dwconv', nn.Conv2D(
in_channels, out_channels, kernel_size=kernel_size,
stride=stride, padding=1, groups=out_channels, bias_attr=bias_attr
)),
('norm', nn.BatchNorm2D(out_channels))
)
return layer
def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1):
layer = nn.Sequential(
('layer1', ConvLayer(in_channels, out_channels, kernel_size=kernel_size)),
('layer2', DWConvLayer(out_channels, out_channels, stride=stride))
)
return layer
class HarDBlock(nn.Layer):
def __init__(self, in_channels, growth_rate, grmul, n_layers,
keepBase=False, residual_out=False, dwconv=False):
super().__init__()
self.keepBase = keepBase
self.links = []
layers_ = []
self.out_channels = 0 # if upsample else in_channels
for i in range(n_layers):
outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul)
self.links.append(link)
if dwconv:
layers_.append(CombConvLayer(inch, outch))
else:
layers_.append(ConvLayer(inch, outch))
if (i % 2 == 0) or (i == n_layers - 1):
self.out_channels += outch
# print("Blk out =",self.out_channels)
self.layers = nn.LayerList(layers_)
def get_link(self, layer, base_ch, growth_rate, grmul):
if layer == 0:
return base_ch, 0, []
out_channels = growth_rate
link = []
for i in range(10):
dv = 2 ** i
if layer % dv == 0:
k = layer - dv
link.append(k)
if i > 0:
out_channels *= grmul
out_channels = int(int(out_channels + 1) / 2) * 2
in_channels = 0
for i in link:
ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul)
in_channels += ch
return out_channels, in_channels, link
def forward(self, x):
layers_ = [x]
for layer in range(len(self.layers)):
link = self.links[layer]
tin = []
for i in link:
tin.append(layers_[i])
if len(tin) > 1:
x = paddle.concat(tin, 1)
else:
x = tin[0]
out = self.layers[layer](x)
layers_.append(out)
t = len(layers_)
out_ = []
for i in range(t):
if (i == 0 and self.keepBase) or (i == t-1) or (i % 2 == 1):
out_.append(layers_[i])
out = paddle.concat(out_, 1)
return out
class HarDNet(nn.Layer):
def __init__(self, depth_wise=False, arch=85,
class_dim=1000, with_pool=True):
super().__init__()
first_ch = [32, 64]
second_kernel = 3
max_pool = True
grmul = 1.7
drop_rate = 0.1
# HarDNet68
ch_list = [128, 256, 320, 640, 1024]
gr = [14, 16, 20, 40, 160]
n_layers = [8, 16, 16, 16, 4]
downSamp = [1, 0, 1, 1, 0]
if arch == 85:
# HarDNet85
first_ch = [48, 96]
ch_list = [192, 256, 320, 480, 720, 1280]
gr = [24, 24, 28, 36, 48, 256]
n_layers = [8, 16, 16, 16, 16, 4]
downSamp = [1, 0, 1, 0, 1, 0]
drop_rate = 0.2
elif arch == 39:
# HarDNet39
first_ch = [24, 48]
ch_list = [96, 320, 640, 1024]
grmul = 1.6
gr = [16, 20, 64, 160]
n_layers = [4, 16, 8, 4]
downSamp = [1, 1, 1, 0]
if depth_wise:
second_kernel = 1
max_pool = False
drop_rate = 0.05
blks = len(n_layers)
self.base = nn.LayerList([])
# First Layer: Standard Conv3x3, Stride=2
self.base.append(
ConvLayer(in_channels=3, out_channels=first_ch[0], kernel_size=3,
stride=2, bias_attr=False))
# Second Layer
self.base.append(
ConvLayer(first_ch[0], first_ch[1], kernel_size=second_kernel))
# Maxpooling or DWConv3x3 downsampling
if max_pool:
self.base.append(nn.MaxPool2D(kernel_size=3, stride=2, padding=1))
else:
self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2))
# Build all HarDNet blocks
ch = first_ch[1]
for i in range(blks):
blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise)
ch = blk.out_channels
self.base.append(blk)
if i == blks-1 and arch == 85:
self.base.append(nn.Dropout(0.1))
self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1))
ch = ch_list[i]
if downSamp[i] == 1:
if max_pool:
self.base.append(nn.MaxPool2D(kernel_size=2, stride=2))
else:
self.base.append(DWConvLayer(ch, ch, stride=2))
ch = ch_list[blks-1]
layers = []
if with_pool:
layers.append(nn.AdaptiveAvgPool2D((1, 1)))
if class_dim > 0:
layers.append(nn.Flatten())
layers.append(nn.Dropout(drop_rate))
layers.append(nn.Linear(ch, class_dim))
self.base.append(nn.Sequential(*layers))
def forward(self, x):
for layer in self.base:
x = layer(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def HarDNet39_ds(pretrained=False, **kwargs):
model = HarDNet(arch=39, depth_wise=True, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet39_ds"])
return model
def HarDNet68_ds(pretrained=False, **kwargs):
model = HarDNet(arch=68, depth_wise=True, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet68_ds"])
return model
def HarDNet68(pretrained=False, **kwargs):
model = HarDNet(arch=68, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet68"])
return model
def HarDNet85(pretrained=False, **kwargs):
model = HarDNet(arch=85, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet85"])
return model
\ No newline at end of file
import paddle
import paddle.nn as nn
from paddle.vision.models import resnet
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"RedNet26":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet26_pretrained.pdparams",
"RedNet38":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet38_pretrained.pdparams",
"RedNet50":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet50_pretrained.pdparams",
"RedNet101":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet101_pretrained.pdparams",
"RedNet152":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet152_pretrained.pdparams"
}
class Involution(nn.Layer):
def __init__(self, channels, kernel_size, stride):
super(Involution, self).__init__()
self.kernel_size = kernel_size
self.stride = stride
self.channels = channels
reduction_ratio = 4
self.group_channels = 16
self.groups = self.channels // self.group_channels
self.conv1 = nn.Sequential(
('conv', nn.Conv2D(
in_channels=channels,
out_channels=channels // reduction_ratio,
kernel_size=1,
bias_attr=False
)),
('bn', nn.BatchNorm2D(channels // reduction_ratio)),
('activate', nn.ReLU())
)
self.conv2 = nn.Sequential(
('conv', nn.Conv2D(
in_channels=channels // reduction_ratio,
out_channels=kernel_size**2 * self.groups,
kernel_size=1,
stride=1
))
)
if stride > 1:
self.avgpool = nn.AvgPool2D(stride, stride)
def forward(self, x):
weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x)))
b, c, h, w = weight.shape
weight = weight.reshape((b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2)
out = nn.functional.unfold(x, self.kernel_size, self.stride, (self.kernel_size-1)//2, 1)
out = out.reshape((b, self.groups, self.group_channels, self.kernel_size**2, h, w))
out = (weight * out).sum(axis=3).reshape((b, self.channels, h, w))
return out
class BottleneckBlock(resnet.BottleneckBlock):
def __init__(self, inplanes, planes, stride=1, downsample=None,
groups=1, base_width=64, dilation=1, norm_layer=None):
super(BottleneckBlock, self).__init__(
inplanes, planes, stride, downsample,
groups, base_width, dilation, norm_layer
)
width = int(planes * (base_width / 64.)) * groups
self.conv2 = Involution(width, 7, stride)
class RedNet(resnet.ResNet):
def __init__(self, block, depth, class_dim=1000, with_pool=True):
super(RedNet, self).__init__(
block=block, depth=50,
num_classes=class_dim, with_pool=with_pool
)
layer_cfg = {
26: [1, 2, 4, 1],
38: [2, 3, 5, 2],
50: [3, 4, 6, 3],
101: [3, 4, 23, 3],
152: [3, 8, 36, 3]
}
layers = layer_cfg[depth]
self.conv1 = None
self.bn1 = None
self.relu = None
self.inplanes = 64
self.class_dim = class_dim
self.stem = nn.Sequential(
nn.Sequential(
('conv', nn.Conv2D(
in_channels=3,
out_channels=self.inplanes // 2,
kernel_size=3,
stride=2,
padding=1,
bias_attr=False
)),
('bn', nn.BatchNorm2D(self.inplanes // 2)),
('activate', nn.ReLU())
),
Involution(self.inplanes // 2, 3, 1),
nn.BatchNorm2D(self.inplanes // 2),
nn.ReLU(),
nn.Sequential(
('conv', nn.Conv2D(
in_channels=self.inplanes // 2,
out_channels=self.inplanes,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False
)),
('bn', nn.BatchNorm2D(self.inplanes)),
('activate', nn.ReLU())
)
)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
def forward(self, x):
x = self.stem(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
if self.with_pool:
x = self.avgpool(x)
if self.class_dim > 0:
x = paddle.flatten(x, 1)
x = self.fc(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def RedNet26(pretrained=False, **kwargs):
model = RedNet(BottleneckBlock, 26, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["RedNet26"])
return model
def RedNet38(pretrained=False, **kwargs):
model = RedNet(BottleneckBlock, 38, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["RedNet38"])
return model
def RedNet50(pretrained=False, **kwargs):
model = RedNet(BottleneckBlock, 50, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["RedNet50"])
return model
def RedNet101(pretrained=False, **kwargs):
model = RedNet(BottleneckBlock, 101, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["RedNet101"])
return model
def RedNet152(pretrained=False, **kwargs):
model = RedNet(BottleneckBlock, 152, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["RedNet152"])
return model
import math
import numpy as np
import paddle
import paddle.nn as nn
from paddle.nn.initializer import TruncatedNormal, Constant
from ppcls.arch.backbone.base.theseus_layer import Identity
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"TNT_small":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/TNT_small_pretrained.pdparams"
}
trunc_normal_ = TruncatedNormal(std=.02)
zeros_ = Constant(value=0.)
ones_ = Constant(value=1.)
def drop_path(x, drop_prob=0., training=False):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
"""
if drop_prob == 0. or not training:
return x
keep_prob = paddle.to_tensor(1 - drop_prob)
shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
random_tensor = paddle.floor(random_tensor) # binarize
output = x.divide(keep_prob) * random_tensor
return output
class DropPath(nn.Layer):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
"""
def __init__(self, drop_prob=None):
super(DropPath, self).__init__()
self.drop_prob = drop_prob
def forward(self, x):
return drop_path(x, self.drop_prob, self.training)
class Mlp(nn.Layer):
def __init__(self, in_features, hidden_features=None,
out_features=None, act_layer=nn.GELU, drop=0.):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = nn.Linear(in_features, hidden_features)
self.act = act_layer()
self.fc2 = nn.Linear(hidden_features, out_features)
self.drop = nn.Dropout(drop)
def forward(self, x):
x = self.fc1(x)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
class Attention(nn.Layer):
def __init__(self, dim, hidden_dim, num_heads=8,
qkv_bias=False, attn_drop=0., proj_drop=0.):
super().__init__()
self.hidden_dim = hidden_dim
self.num_heads = num_heads
head_dim = hidden_dim // num_heads
self.head_dim = head_dim
self.scale = head_dim ** -0.5
self.qk = nn.Linear(dim, hidden_dim * 2, bias_attr=qkv_bias)
self.v = nn.Linear(dim, dim, bias_attr=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x):
B, N, C = x.shape
qk = self.qk(x).reshape((B, N, 2, self.num_heads, self.head_dim)).transpose((2, 0, 3, 1, 4))
q, k = qk[0], qk[1]
v = self.v(x).reshape((B, N, self.num_heads, -1)).transpose((0, 2, 1, 3))
attn = (q @ k.transpose((0, 1, 3, 2))) * self.scale
attn = nn.functional.softmax(attn, axis=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose((0, 2, 1, 3)).reshape((B, N, -1))
x = self.proj(x)
x = self.proj_drop(x)
return x
class Block(nn.Layer):
def __init__(self, dim, in_dim, num_pixel, num_heads=12, in_num_head=4, mlp_ratio=4.,
qkv_bias=False, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU,
norm_layer=nn.LayerNorm):
super().__init__()
# Inner transformer
self.norm_in = norm_layer(in_dim)
self.attn_in = Attention(
in_dim, in_dim, num_heads=in_num_head,
qkv_bias=qkv_bias, attn_drop=attn_drop,
proj_drop=drop
)
self.norm_mlp_in = norm_layer(in_dim)
self.mlp_in = Mlp(
in_features=in_dim, hidden_features=int(in_dim * 4),
out_features=in_dim, act_layer=act_layer, drop=drop
)
self.norm1_proj = norm_layer(in_dim)
self.proj = nn.Linear(in_dim * num_pixel, dim)
# Outer transformer
self.norm_out = norm_layer(dim)
self.attn_out = Attention(
dim, dim, num_heads=num_heads, qkv_bias=qkv_bias,
attn_drop=attn_drop, proj_drop=drop
)
self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
self.norm_mlp = norm_layer(dim)
self.mlp = Mlp(
in_features=dim, hidden_features=int(dim * mlp_ratio),
out_features=dim, act_layer=act_layer, drop=drop
)
def forward(self, pixel_embed, patch_embed):
# inner
pixel_embed = pixel_embed + self.drop_path(self.attn_in(self.norm_in(pixel_embed)))
pixel_embed = pixel_embed + self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed)))
# outer
B, N, C = patch_embed.shape
patch_embed[:, 1:] = patch_embed[:, 1:] + self.proj(self.norm1_proj(pixel_embed).reshape((B, N - 1, -1)))
patch_embed = patch_embed + self.drop_path(self.attn_out(self.norm_out(patch_embed)))
patch_embed = patch_embed + self.drop_path(self.mlp(self.norm_mlp(patch_embed)))
return pixel_embed, patch_embed
class PixelEmbed(nn.Layer):
def __init__(self, img_size=224, patch_size=16, in_chans=3, in_dim=48, stride=4):
super().__init__()
num_patches = (img_size // patch_size) ** 2
self.img_size = img_size
self.num_patches = num_patches
self.in_dim = in_dim
new_patch_size = math.ceil(patch_size / stride)
self.new_patch_size = new_patch_size
self.proj = nn.Conv2D(
in_chans, self.in_dim,
kernel_size=7, padding=3,
stride=stride
)
def forward(self, x, pixel_pos):
B, C, H, W = x.shape
assert H == self.img_size and W == self.img_size, f"Input image size ({H}*{W}) doesn't match model ({self.img_size}*{self.img_size})."
x = self.proj(x)
x = nn.functional.unfold(x, self.new_patch_size, self.new_patch_size)
x = x.transpose((0, 2, 1)).reshape((B * self.num_patches, self.in_dim, self.new_patch_size, self.new_patch_size))
x = x + pixel_pos
x = x.reshape((B * self.num_patches, self.in_dim, -1)).transpose((0, 2, 1))
return x
class TNT(nn.Layer):
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, in_dim=48, depth=12,
num_heads=12, in_num_head=4, mlp_ratio=4., qkv_bias=False, drop_rate=0., attn_drop_rate=0.,
drop_path_rate=0., norm_layer=nn.LayerNorm, first_stride=4, class_dim=1000):
super().__init__()
self.class_dim = class_dim
# num_features for consistency with other models
self.num_features = self.embed_dim = embed_dim
self.pixel_embed = PixelEmbed(
img_size=img_size, patch_size=patch_size,
in_chans=in_chans, in_dim=in_dim, stride=first_stride
)
num_patches = self.pixel_embed.num_patches
self.num_patches = num_patches
new_patch_size = self.pixel_embed.new_patch_size
num_pixel = new_patch_size ** 2
self.norm1_proj = norm_layer(num_pixel * in_dim)
self.proj = nn.Linear(num_pixel * in_dim, embed_dim)
self.norm2_proj = norm_layer(embed_dim)
self.cls_token = self.create_parameter(
shape=(1, 1, embed_dim),
default_initializer=zeros_
)
self.add_parameter("cls_token", self.cls_token)
self.patch_pos = self.create_parameter(
shape=(1, num_patches + 1, embed_dim),
default_initializer=zeros_
)
self.add_parameter("patch_pos", self.patch_pos)
self.pixel_pos = self.create_parameter(
shape=(1, in_dim, new_patch_size, new_patch_size),
default_initializer=zeros_
)
self.add_parameter("pixel_pos", self.pixel_pos)
self.pos_drop = nn.Dropout(p=drop_rate)
# stochastic depth decay rule
dpr = np.linspace(0, drop_path_rate, depth)
blocks = []
for i in range(depth):
blocks.append(Block(
dim=embed_dim, in_dim=in_dim, num_pixel=num_pixel, num_heads=num_heads,
in_num_head=in_num_head, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
norm_layer=norm_layer
))
self.blocks = nn.LayerList(blocks)
self.norm = norm_layer(embed_dim)
if class_dim > 0:
self.head = nn.Linear(embed_dim, class_dim)
trunc_normal_(self.cls_token)
trunc_normal_(self.patch_pos)
trunc_normal_(self.pixel_pos)
self.apply(self._init_weights)
def _init_weights(self, m):
if isinstance(m, nn.Linear):
trunc_normal_(m.weight)
if isinstance(m, nn.Linear) and m.bias is not None:
zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
zeros_(m.bias)
ones_(m.weight)
def forward_features(self, x):
B = x.shape[0]
pixel_embed = self.pixel_embed(x, self.pixel_pos)
patch_embed = self.norm2_proj(self.proj(self.norm1_proj(pixel_embed.reshape((B, self.num_patches, -1)))))
patch_embed = paddle.concat((self.cls_token.expand((B, -1, -1)), patch_embed), axis=1)
patch_embed = patch_embed + self.patch_pos
patch_embed = self.pos_drop(patch_embed)
for blk in self.blocks:
pixel_embed, patch_embed = blk(pixel_embed, patch_embed)
patch_embed = self.norm(patch_embed)
return patch_embed[:, 0]
def forward(self, x):
x = self.forward_features(x)
if self.class_dim > 0:
x = self.head(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def TNT_small(pretrained=False, **kwargs):
model = TNT(
patch_size=16,
embed_dim=384,
in_dim=24,
depth=12,
num_heads=6,
in_num_head=4,
qkv_bias=False,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["TNT_small"])
return model
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册