Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleClas
提交
a950ec42
P
PaddleClas
项目概览
PaddlePaddle
/
PaddleClas
大约 1 年 前同步成功
通知
115
Star
4999
Fork
1114
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
6
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleClas
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
6
合并请求
6
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a950ec42
编写于
6月 10, 2021
作者:
jm_12138
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add the codes of TNT, HarDNet, RedNet and DLA models
上级
f4f09840
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
1193 addition
and
0 deletion
+1193
-0
ppcls/arch/backbone/__init__.py
ppcls/arch/backbone/__init__.py
+4
-0
ppcls/arch/backbone/model_zoo/dla.py
ppcls/arch/backbone/model_zoo/dla.py
+451
-0
ppcls/arch/backbone/model_zoo/hardnet.py
ppcls/arch/backbone/model_zoo/hardnet.py
+248
-0
ppcls/arch/backbone/model_zoo/rednet.py
ppcls/arch/backbone/model_zoo/rednet.py
+189
-0
ppcls/arch/backbone/model_zoo/tnt.py
ppcls/arch/backbone/model_zoo/tnt.py
+301
-0
未找到文件。
ppcls/arch/backbone/__init__.py
浏览文件 @
a950ec42
...
...
@@ -47,4 +47,8 @@ from ppcls.arch.backbone.model_zoo.distillation_models import ResNet50_vd_distil
from
ppcls.arch.backbone.model_zoo.swin_transformer
import
SwinTransformer_tiny_patch4_window7_224
,
SwinTransformer_small_patch4_window7_224
,
SwinTransformer_base_patch4_window7_224
,
SwinTransformer_base_patch4_window12_384
,
SwinTransformer_large_patch4_window7_224
,
SwinTransformer_large_patch4_window12_384
from
ppcls.arch.backbone.model_zoo.mixnet
import
MixNet_S
,
MixNet_M
,
MixNet_L
from
ppcls.arch.backbone.model_zoo.rexnet
import
ReXNet_1_0
,
ReXNet_1_3
,
ReXNet_1_5
,
ReXNet_2_0
,
ReXNet_3_0
from
ppcls.arch.backbone.model_zoo.dla
import
DLA34
,
DLA46_c
,
DLA46x_c
,
DLA60
,
DLA60x
,
DLA60x_c
,
DLA102
,
DLA102x
,
DLA102x2
,
DLA169
from
ppcls.arch.backbone.model_zoo.rednet
import
RedNet26
,
RedNet38
,
RedNet50
,
RedNet101
,
RedNet152
from
ppcls.arch.backbone.model_zoo.tnt
import
TNT_small
from
ppcls.arch.backbone.model_zoo.hardnet
import
HarDNet68
,
HarDNet85
,
HarDNet39_ds
,
HarDNet68_ds
from
ppcls.arch.backbone.variant_models.resnet_variant
import
ResNet50_last_stage_stride1
ppcls/arch/backbone/model_zoo/dla.py
0 → 100644
浏览文件 @
a950ec42
import
math
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn.initializer
import
Normal
,
Constant
from
ppcls.arch.backbone.base.theseus_layer
import
Identity
from
ppcls.utils.save_load
import
load_dygraph_pretrain
,
load_dygraph_pretrain_from_url
MODEL_URLS
=
{
"DLA34"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA34_pretrained.pdparams"
,
"DLA46_c"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA46_c_pretrained.pdparams"
,
"DLA46x_c"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA46x_c_pretrained.pdparams"
,
"DLA60"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA60_pretrained.pdparams"
,
"DLA60x"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA60x_pretrained.pdparams"
,
"DLA60x_c"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA60x_c_pretrained.pdparams"
,
"DLA102"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA102_pretrained.pdparams"
,
"DLA102x"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA102x_pretrained.pdparams"
,
"DLA102x2"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA102x2_pretrained.pdparams"
,
"DLA169"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA169_pretrained.pdparams"
}
__all__
=
MODEL_URLS
.
keys
()
zeros_
=
Constant
(
value
=
0.
)
ones_
=
Constant
(
value
=
1.
)
class
DlaBasic
(
nn
.
Layer
):
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
dilation
=
1
,
**
cargs
):
super
(
DlaBasic
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2D
(
inplanes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
dilation
,
bias_attr
=
False
,
dilation
=
dilation
)
self
.
bn1
=
nn
.
BatchNorm2D
(
planes
)
self
.
relu
=
nn
.
ReLU
()
self
.
conv2
=
nn
.
Conv2D
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
1
,
padding
=
dilation
,
bias_attr
=
False
,
dilation
=
dilation
)
self
.
bn2
=
nn
.
BatchNorm2D
(
planes
)
self
.
stride
=
stride
def
forward
(
self
,
x
,
residual
=
None
):
if
residual
is
None
:
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
class
DlaBottleneck
(
nn
.
Layer
):
expansion
=
2
def
__init__
(
self
,
inplanes
,
outplanes
,
stride
=
1
,
dilation
=
1
,
cardinality
=
1
,
base_width
=
64
):
super
(
DlaBottleneck
,
self
).
__init__
()
self
.
stride
=
stride
mid_planes
=
int
(
math
.
floor
(
outplanes
*
(
base_width
/
64
))
*
cardinality
)
mid_planes
=
mid_planes
//
self
.
expansion
self
.
conv1
=
nn
.
Conv2D
(
inplanes
,
mid_planes
,
kernel_size
=
1
,
bias_attr
=
False
)
self
.
bn1
=
nn
.
BatchNorm2D
(
mid_planes
)
self
.
conv2
=
nn
.
Conv2D
(
mid_planes
,
mid_planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
dilation
,
bias_attr
=
False
,
dilation
=
dilation
,
groups
=
cardinality
)
self
.
bn2
=
nn
.
BatchNorm2D
(
mid_planes
)
self
.
conv3
=
nn
.
Conv2D
(
mid_planes
,
outplanes
,
kernel_size
=
1
,
bias_attr
=
False
)
self
.
bn3
=
nn
.
BatchNorm2D
(
outplanes
)
self
.
relu
=
nn
.
ReLU
()
def
forward
(
self
,
x
,
residual
=
None
):
if
residual
is
None
:
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv3
(
out
)
out
=
self
.
bn3
(
out
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
class
DlaRoot
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
residual
):
super
(
DlaRoot
,
self
).
__init__
()
self
.
conv
=
nn
.
Conv2D
(
in_channels
,
out_channels
,
1
,
stride
=
1
,
bias_attr
=
False
,
padding
=
(
kernel_size
-
1
)
//
2
)
self
.
bn
=
nn
.
BatchNorm2D
(
out_channels
)
self
.
relu
=
nn
.
ReLU
()
self
.
residual
=
residual
def
forward
(
self
,
*
x
):
children
=
x
x
=
self
.
conv
(
paddle
.
concat
(
x
,
1
))
x
=
self
.
bn
(
x
)
if
self
.
residual
:
x
+=
children
[
0
]
x
=
self
.
relu
(
x
)
return
x
class
DlaTree
(
nn
.
Layer
):
def
__init__
(
self
,
levels
,
block
,
in_channels
,
out_channels
,
stride
=
1
,
dilation
=
1
,
cardinality
=
1
,
base_width
=
64
,
level_root
=
False
,
root_dim
=
0
,
root_kernel_size
=
1
,
root_residual
=
False
):
super
(
DlaTree
,
self
).
__init__
()
if
root_dim
==
0
:
root_dim
=
2
*
out_channels
if
level_root
:
root_dim
+=
in_channels
self
.
downsample
=
nn
.
MaxPool2D
(
stride
,
stride
=
stride
)
if
stride
>
1
else
Identity
()
self
.
project
=
Identity
()
cargs
=
dict
(
dilation
=
dilation
,
cardinality
=
cardinality
,
base_width
=
base_width
)
if
levels
==
1
:
self
.
tree1
=
block
(
in_channels
,
out_channels
,
stride
,
**
cargs
)
self
.
tree2
=
block
(
out_channels
,
out_channels
,
1
,
**
cargs
)
if
in_channels
!=
out_channels
:
self
.
project
=
nn
.
Sequential
(
nn
.
Conv2D
(
in_channels
,
out_channels
,
kernel_size
=
1
,
stride
=
1
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
out_channels
))
else
:
cargs
.
update
(
dict
(
root_kernel_size
=
root_kernel_size
,
root_residual
=
root_residual
))
self
.
tree1
=
DlaTree
(
levels
-
1
,
block
,
in_channels
,
out_channels
,
stride
,
root_dim
=
0
,
**
cargs
)
self
.
tree2
=
DlaTree
(
levels
-
1
,
block
,
out_channels
,
out_channels
,
root_dim
=
root_dim
+
out_channels
,
**
cargs
)
if
levels
==
1
:
self
.
root
=
DlaRoot
(
root_dim
,
out_channels
,
root_kernel_size
,
root_residual
)
self
.
level_root
=
level_root
self
.
root_dim
=
root_dim
self
.
levels
=
levels
def
forward
(
self
,
x
,
residual
=
None
,
children
=
None
):
children
=
[]
if
children
is
None
else
children
bottom
=
self
.
downsample
(
x
)
residual
=
self
.
project
(
bottom
)
if
self
.
level_root
:
children
.
append
(
bottom
)
x1
=
self
.
tree1
(
x
,
residual
)
if
self
.
levels
==
1
:
x2
=
self
.
tree2
(
x1
)
x
=
self
.
root
(
x2
,
x1
,
*
children
)
else
:
children
.
append
(
x1
)
x
=
self
.
tree2
(
x1
,
children
=
children
)
return
x
class
DLA
(
nn
.
Layer
):
def
__init__
(
self
,
levels
,
channels
,
in_chans
=
3
,
cardinality
=
1
,
base_width
=
64
,
block
=
DlaBottleneck
,
residual_root
=
False
,
drop_rate
=
0.0
,
class_dim
=
1000
,
with_pool
=
True
):
super
(
DLA
,
self
).
__init__
()
self
.
channels
=
channels
self
.
class_dim
=
class_dim
self
.
with_pool
=
with_pool
self
.
cardinality
=
cardinality
self
.
base_width
=
base_width
self
.
drop_rate
=
drop_rate
self
.
base_layer
=
nn
.
Sequential
(
nn
.
Conv2D
(
in_chans
,
channels
[
0
],
kernel_size
=
7
,
stride
=
1
,
padding
=
3
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
channels
[
0
]),
nn
.
ReLU
())
self
.
level0
=
self
.
_make_conv_level
(
channels
[
0
],
channels
[
0
],
levels
[
0
])
self
.
level1
=
self
.
_make_conv_level
(
channels
[
0
],
channels
[
1
],
levels
[
1
],
stride
=
2
)
cargs
=
dict
(
cardinality
=
cardinality
,
base_width
=
base_width
,
root_residual
=
residual_root
)
self
.
level2
=
DlaTree
(
levels
[
2
],
block
,
channels
[
1
],
channels
[
2
],
2
,
level_root
=
False
,
**
cargs
)
self
.
level3
=
DlaTree
(
levels
[
3
],
block
,
channels
[
2
],
channels
[
3
],
2
,
level_root
=
True
,
**
cargs
)
self
.
level4
=
DlaTree
(
levels
[
4
],
block
,
channels
[
3
],
channels
[
4
],
2
,
level_root
=
True
,
**
cargs
)
self
.
level5
=
DlaTree
(
levels
[
5
],
block
,
channels
[
4
],
channels
[
5
],
2
,
level_root
=
True
,
**
cargs
)
self
.
feature_info
=
[
# rare to have a meaningful stride 1 level
dict
(
num_chs
=
channels
[
0
],
reduction
=
1
,
module
=
'level0'
),
dict
(
num_chs
=
channels
[
1
],
reduction
=
2
,
module
=
'level1'
),
dict
(
num_chs
=
channels
[
2
],
reduction
=
4
,
module
=
'level2'
),
dict
(
num_chs
=
channels
[
3
],
reduction
=
8
,
module
=
'level3'
),
dict
(
num_chs
=
channels
[
4
],
reduction
=
16
,
module
=
'level4'
),
dict
(
num_chs
=
channels
[
5
],
reduction
=
32
,
module
=
'level5'
),
]
self
.
num_features
=
channels
[
-
1
]
if
with_pool
:
self
.
global_pool
=
nn
.
AdaptiveAvgPool2D
(
1
)
if
class_dim
>
0
:
self
.
fc
=
nn
.
Conv2D
(
self
.
num_features
,
class_dim
,
1
)
for
m
in
self
.
sublayers
():
if
isinstance
(
m
,
nn
.
Conv2D
):
n
=
m
.
_kernel_size
[
0
]
*
m
.
_kernel_size
[
1
]
*
m
.
_out_channels
normal_
=
Normal
(
mean
=
0.0
,
std
=
math
.
sqrt
(
2.
/
n
))
normal_
(
m
.
weight
)
elif
isinstance
(
m
,
nn
.
BatchNorm2D
):
ones_
(
m
.
weight
)
zeros_
(
m
.
bias
)
def
_make_conv_level
(
self
,
inplanes
,
planes
,
convs
,
stride
=
1
,
dilation
=
1
):
modules
=
[]
for
i
in
range
(
convs
):
modules
.
extend
([
nn
.
Conv2D
(
inplanes
,
planes
,
kernel_size
=
3
,
stride
=
stride
if
i
==
0
else
1
,
padding
=
dilation
,
bias_attr
=
False
,
dilation
=
dilation
),
nn
.
BatchNorm2D
(
planes
),
nn
.
ReLU
()])
inplanes
=
planes
return
nn
.
Sequential
(
*
modules
)
def
forward_features
(
self
,
x
):
x
=
self
.
base_layer
(
x
)
x
=
self
.
level0
(
x
)
x
=
self
.
level1
(
x
)
x
=
self
.
level2
(
x
)
x
=
self
.
level3
(
x
)
x
=
self
.
level4
(
x
)
x
=
self
.
level5
(
x
)
return
x
def
forward
(
self
,
x
):
x
=
self
.
forward_features
(
x
)
if
self
.
with_pool
:
x
=
self
.
global_pool
(
x
)
if
self
.
drop_rate
>
0.
:
x
=
F
.
dropout
(
x
,
p
=
self
.
drop_rate
,
training
=
self
.
training
)
if
self
.
class_dim
>
0
:
x
=
self
.
fc
(
x
)
x
=
x
.
flatten
(
1
)
return
x
def
_load_pretrained
(
pretrained
,
model
,
model_url
,
use_ssld
=
False
):
if
pretrained
is
False
:
pass
elif
pretrained
is
True
:
load_dygraph_pretrain_from_url
(
model
,
model_url
,
use_ssld
=
use_ssld
)
elif
isinstance
(
pretrained
,
str
):
load_dygraph_pretrain
(
model
,
pretrained
)
else
:
raise
RuntimeError
(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def
DLA34
(
pretrained
=
False
,
**
kwargs
):
model
=
DLA
(
levels
=
(
1
,
1
,
1
,
2
,
2
,
1
),
channels
=
(
16
,
32
,
64
,
128
,
256
,
512
),
block
=
DlaBasic
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"DLA34"
])
return
model
def
DLA46_c
(
pretrained
=
False
,
**
kwargs
):
model
=
DLA
(
levels
=
(
1
,
1
,
1
,
2
,
2
,
1
),
channels
=
(
16
,
32
,
64
,
64
,
128
,
256
),
block
=
DlaBottleneck
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"DLA46_c"
])
return
model
def
DLA46x_c
(
pretrained
=
False
,
**
kwargs
):
model
=
DLA
(
levels
=
(
1
,
1
,
1
,
2
,
2
,
1
),
channels
=
(
16
,
32
,
64
,
64
,
128
,
256
),
block
=
DlaBottleneck
,
cardinality
=
32
,
base_width
=
4
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"DLA46x_c"
])
return
model
def
DLA60
(
pretrained
=
False
,
**
kwargs
):
model
=
DLA
(
levels
=
(
1
,
1
,
1
,
2
,
3
,
1
),
channels
=
(
16
,
32
,
128
,
256
,
512
,
1024
),
block
=
DlaBottleneck
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"DLA60"
])
return
model
def
DLA60x
(
pretrained
=
False
,
**
kwargs
):
model
=
DLA
(
levels
=
(
1
,
1
,
1
,
2
,
3
,
1
),
channels
=
(
16
,
32
,
128
,
256
,
512
,
1024
),
block
=
DlaBottleneck
,
cardinality
=
32
,
base_width
=
4
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"DLA60x"
])
return
model
def
DLA60x_c
(
pretrained
=
False
,
**
kwargs
):
model
=
DLA
(
levels
=
(
1
,
1
,
1
,
2
,
3
,
1
),
channels
=
(
16
,
32
,
64
,
64
,
128
,
256
),
block
=
DlaBottleneck
,
cardinality
=
32
,
base_width
=
4
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"DLA60x_c"
])
return
model
def
DLA102
(
pretrained
=
False
,
**
kwargs
):
model
=
DLA
(
levels
=
(
1
,
1
,
1
,
3
,
4
,
1
),
channels
=
(
16
,
32
,
128
,
256
,
512
,
1024
),
block
=
DlaBottleneck
,
residual_root
=
True
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"DLA102"
])
return
model
def
DLA102x
(
pretrained
=
False
,
**
kwargs
):
model
=
DLA
(
levels
=
(
1
,
1
,
1
,
3
,
4
,
1
),
channels
=
(
16
,
32
,
128
,
256
,
512
,
1024
),
block
=
DlaBottleneck
,
cardinality
=
32
,
base_width
=
4
,
residual_root
=
True
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"DLA102x"
])
return
model
def
DLA102x2
(
pretrained
=
False
,
**
kwargs
):
model
=
DLA
(
levels
=
(
1
,
1
,
1
,
3
,
4
,
1
),
channels
=
(
16
,
32
,
128
,
256
,
512
,
1024
),
block
=
DlaBottleneck
,
cardinality
=
64
,
base_width
=
4
,
residual_root
=
True
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"DLA102x2"
])
return
model
def
DLA169
(
pretrained
=
False
,
**
kwargs
):
model
=
DLA
(
levels
=
(
1
,
1
,
2
,
3
,
5
,
1
),
channels
=
(
16
,
32
,
128
,
256
,
512
,
1024
),
block
=
DlaBottleneck
,
residual_root
=
True
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"DLA169"
])
return
model
ppcls/arch/backbone/model_zoo/hardnet.py
0 → 100644
浏览文件 @
a950ec42
import
paddle
import
paddle.nn
as
nn
from
ppcls.utils.save_load
import
load_dygraph_pretrain
,
load_dygraph_pretrain_from_url
MODEL_URLS
=
{
'HarDNet39_ds'
:
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet39_ds_pretrained.pdparams'
,
'HarDNet68_ds'
:
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet68_ds_pretrained.pdparams'
,
'HarDNet68'
:
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet68_pretrained.pdparams'
,
'HarDNet85'
:
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet85_pretrained.pdparams'
}
def
ConvLayer
(
in_channels
,
out_channels
,
kernel_size
=
3
,
stride
=
1
,
bias_attr
=
False
):
layer
=
nn
.
Sequential
(
(
'conv'
,
nn
.
Conv2D
(
in_channels
,
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
kernel_size
//
2
,
groups
=
1
,
bias_attr
=
bias_attr
)),
(
'norm'
,
nn
.
BatchNorm2D
(
out_channels
)),
(
'relu'
,
nn
.
ReLU6
())
)
return
layer
def
DWConvLayer
(
in_channels
,
out_channels
,
kernel_size
=
3
,
stride
=
1
,
bias_attr
=
False
):
layer
=
nn
.
Sequential
(
(
'dwconv'
,
nn
.
Conv2D
(
in_channels
,
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
1
,
groups
=
out_channels
,
bias_attr
=
bias_attr
)),
(
'norm'
,
nn
.
BatchNorm2D
(
out_channels
))
)
return
layer
def
CombConvLayer
(
in_channels
,
out_channels
,
kernel_size
=
1
,
stride
=
1
):
layer
=
nn
.
Sequential
(
(
'layer1'
,
ConvLayer
(
in_channels
,
out_channels
,
kernel_size
=
kernel_size
)),
(
'layer2'
,
DWConvLayer
(
out_channels
,
out_channels
,
stride
=
stride
))
)
return
layer
class
HarDBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
growth_rate
,
grmul
,
n_layers
,
keepBase
=
False
,
residual_out
=
False
,
dwconv
=
False
):
super
().
__init__
()
self
.
keepBase
=
keepBase
self
.
links
=
[]
layers_
=
[]
self
.
out_channels
=
0
# if upsample else in_channels
for
i
in
range
(
n_layers
):
outch
,
inch
,
link
=
self
.
get_link
(
i
+
1
,
in_channels
,
growth_rate
,
grmul
)
self
.
links
.
append
(
link
)
if
dwconv
:
layers_
.
append
(
CombConvLayer
(
inch
,
outch
))
else
:
layers_
.
append
(
ConvLayer
(
inch
,
outch
))
if
(
i
%
2
==
0
)
or
(
i
==
n_layers
-
1
):
self
.
out_channels
+=
outch
# print("Blk out =",self.out_channels)
self
.
layers
=
nn
.
LayerList
(
layers_
)
def
get_link
(
self
,
layer
,
base_ch
,
growth_rate
,
grmul
):
if
layer
==
0
:
return
base_ch
,
0
,
[]
out_channels
=
growth_rate
link
=
[]
for
i
in
range
(
10
):
dv
=
2
**
i
if
layer
%
dv
==
0
:
k
=
layer
-
dv
link
.
append
(
k
)
if
i
>
0
:
out_channels
*=
grmul
out_channels
=
int
(
int
(
out_channels
+
1
)
/
2
)
*
2
in_channels
=
0
for
i
in
link
:
ch
,
_
,
_
=
self
.
get_link
(
i
,
base_ch
,
growth_rate
,
grmul
)
in_channels
+=
ch
return
out_channels
,
in_channels
,
link
def
forward
(
self
,
x
):
layers_
=
[
x
]
for
layer
in
range
(
len
(
self
.
layers
)):
link
=
self
.
links
[
layer
]
tin
=
[]
for
i
in
link
:
tin
.
append
(
layers_
[
i
])
if
len
(
tin
)
>
1
:
x
=
paddle
.
concat
(
tin
,
1
)
else
:
x
=
tin
[
0
]
out
=
self
.
layers
[
layer
](
x
)
layers_
.
append
(
out
)
t
=
len
(
layers_
)
out_
=
[]
for
i
in
range
(
t
):
if
(
i
==
0
and
self
.
keepBase
)
or
(
i
==
t
-
1
)
or
(
i
%
2
==
1
):
out_
.
append
(
layers_
[
i
])
out
=
paddle
.
concat
(
out_
,
1
)
return
out
class
HarDNet
(
nn
.
Layer
):
def
__init__
(
self
,
depth_wise
=
False
,
arch
=
85
,
class_dim
=
1000
,
with_pool
=
True
):
super
().
__init__
()
first_ch
=
[
32
,
64
]
second_kernel
=
3
max_pool
=
True
grmul
=
1.7
drop_rate
=
0.1
# HarDNet68
ch_list
=
[
128
,
256
,
320
,
640
,
1024
]
gr
=
[
14
,
16
,
20
,
40
,
160
]
n_layers
=
[
8
,
16
,
16
,
16
,
4
]
downSamp
=
[
1
,
0
,
1
,
1
,
0
]
if
arch
==
85
:
# HarDNet85
first_ch
=
[
48
,
96
]
ch_list
=
[
192
,
256
,
320
,
480
,
720
,
1280
]
gr
=
[
24
,
24
,
28
,
36
,
48
,
256
]
n_layers
=
[
8
,
16
,
16
,
16
,
16
,
4
]
downSamp
=
[
1
,
0
,
1
,
0
,
1
,
0
]
drop_rate
=
0.2
elif
arch
==
39
:
# HarDNet39
first_ch
=
[
24
,
48
]
ch_list
=
[
96
,
320
,
640
,
1024
]
grmul
=
1.6
gr
=
[
16
,
20
,
64
,
160
]
n_layers
=
[
4
,
16
,
8
,
4
]
downSamp
=
[
1
,
1
,
1
,
0
]
if
depth_wise
:
second_kernel
=
1
max_pool
=
False
drop_rate
=
0.05
blks
=
len
(
n_layers
)
self
.
base
=
nn
.
LayerList
([])
# First Layer: Standard Conv3x3, Stride=2
self
.
base
.
append
(
ConvLayer
(
in_channels
=
3
,
out_channels
=
first_ch
[
0
],
kernel_size
=
3
,
stride
=
2
,
bias_attr
=
False
))
# Second Layer
self
.
base
.
append
(
ConvLayer
(
first_ch
[
0
],
first_ch
[
1
],
kernel_size
=
second_kernel
))
# Maxpooling or DWConv3x3 downsampling
if
max_pool
:
self
.
base
.
append
(
nn
.
MaxPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
))
else
:
self
.
base
.
append
(
DWConvLayer
(
first_ch
[
1
],
first_ch
[
1
],
stride
=
2
))
# Build all HarDNet blocks
ch
=
first_ch
[
1
]
for
i
in
range
(
blks
):
blk
=
HarDBlock
(
ch
,
gr
[
i
],
grmul
,
n_layers
[
i
],
dwconv
=
depth_wise
)
ch
=
blk
.
out_channels
self
.
base
.
append
(
blk
)
if
i
==
blks
-
1
and
arch
==
85
:
self
.
base
.
append
(
nn
.
Dropout
(
0.1
))
self
.
base
.
append
(
ConvLayer
(
ch
,
ch_list
[
i
],
kernel_size
=
1
))
ch
=
ch_list
[
i
]
if
downSamp
[
i
]
==
1
:
if
max_pool
:
self
.
base
.
append
(
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
))
else
:
self
.
base
.
append
(
DWConvLayer
(
ch
,
ch
,
stride
=
2
))
ch
=
ch_list
[
blks
-
1
]
layers
=
[]
if
with_pool
:
layers
.
append
(
nn
.
AdaptiveAvgPool2D
((
1
,
1
)))
if
class_dim
>
0
:
layers
.
append
(
nn
.
Flatten
())
layers
.
append
(
nn
.
Dropout
(
drop_rate
))
layers
.
append
(
nn
.
Linear
(
ch
,
class_dim
))
self
.
base
.
append
(
nn
.
Sequential
(
*
layers
))
def
forward
(
self
,
x
):
for
layer
in
self
.
base
:
x
=
layer
(
x
)
return
x
def
_load_pretrained
(
pretrained
,
model
,
model_url
,
use_ssld
=
False
):
if
pretrained
is
False
:
pass
elif
pretrained
is
True
:
load_dygraph_pretrain_from_url
(
model
,
model_url
,
use_ssld
=
use_ssld
)
elif
isinstance
(
pretrained
,
str
):
load_dygraph_pretrain
(
model
,
pretrained
)
else
:
raise
RuntimeError
(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def
HarDNet39_ds
(
pretrained
=
False
,
**
kwargs
):
model
=
HarDNet
(
arch
=
39
,
depth_wise
=
True
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"HarDNet39_ds"
])
return
model
def
HarDNet68_ds
(
pretrained
=
False
,
**
kwargs
):
model
=
HarDNet
(
arch
=
68
,
depth_wise
=
True
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"HarDNet68_ds"
])
return
model
def
HarDNet68
(
pretrained
=
False
,
**
kwargs
):
model
=
HarDNet
(
arch
=
68
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"HarDNet68"
])
return
model
def
HarDNet85
(
pretrained
=
False
,
**
kwargs
):
model
=
HarDNet
(
arch
=
85
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"HarDNet85"
])
return
model
\ No newline at end of file
ppcls/arch/backbone/model_zoo/rednet.py
0 → 100644
浏览文件 @
a950ec42
import
paddle
import
paddle.nn
as
nn
from
paddle.vision.models
import
resnet
from
ppcls.utils.save_load
import
load_dygraph_pretrain
,
load_dygraph_pretrain_from_url
MODEL_URLS
=
{
"RedNet26"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet26_pretrained.pdparams"
,
"RedNet38"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet38_pretrained.pdparams"
,
"RedNet50"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet50_pretrained.pdparams"
,
"RedNet101"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet101_pretrained.pdparams"
,
"RedNet152"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet152_pretrained.pdparams"
}
class
Involution
(
nn
.
Layer
):
def
__init__
(
self
,
channels
,
kernel_size
,
stride
):
super
(
Involution
,
self
).
__init__
()
self
.
kernel_size
=
kernel_size
self
.
stride
=
stride
self
.
channels
=
channels
reduction_ratio
=
4
self
.
group_channels
=
16
self
.
groups
=
self
.
channels
//
self
.
group_channels
self
.
conv1
=
nn
.
Sequential
(
(
'conv'
,
nn
.
Conv2D
(
in_channels
=
channels
,
out_channels
=
channels
//
reduction_ratio
,
kernel_size
=
1
,
bias_attr
=
False
)),
(
'bn'
,
nn
.
BatchNorm2D
(
channels
//
reduction_ratio
)),
(
'activate'
,
nn
.
ReLU
())
)
self
.
conv2
=
nn
.
Sequential
(
(
'conv'
,
nn
.
Conv2D
(
in_channels
=
channels
//
reduction_ratio
,
out_channels
=
kernel_size
**
2
*
self
.
groups
,
kernel_size
=
1
,
stride
=
1
))
)
if
stride
>
1
:
self
.
avgpool
=
nn
.
AvgPool2D
(
stride
,
stride
)
def
forward
(
self
,
x
):
weight
=
self
.
conv2
(
self
.
conv1
(
x
if
self
.
stride
==
1
else
self
.
avgpool
(
x
)))
b
,
c
,
h
,
w
=
weight
.
shape
weight
=
weight
.
reshape
((
b
,
self
.
groups
,
self
.
kernel_size
**
2
,
h
,
w
)).
unsqueeze
(
2
)
out
=
nn
.
functional
.
unfold
(
x
,
self
.
kernel_size
,
self
.
stride
,
(
self
.
kernel_size
-
1
)
//
2
,
1
)
out
=
out
.
reshape
((
b
,
self
.
groups
,
self
.
group_channels
,
self
.
kernel_size
**
2
,
h
,
w
))
out
=
(
weight
*
out
).
sum
(
axis
=
3
).
reshape
((
b
,
self
.
channels
,
h
,
w
))
return
out
class
BottleneckBlock
(
resnet
.
BottleneckBlock
):
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
downsample
=
None
,
groups
=
1
,
base_width
=
64
,
dilation
=
1
,
norm_layer
=
None
):
super
(
BottleneckBlock
,
self
).
__init__
(
inplanes
,
planes
,
stride
,
downsample
,
groups
,
base_width
,
dilation
,
norm_layer
)
width
=
int
(
planes
*
(
base_width
/
64.
))
*
groups
self
.
conv2
=
Involution
(
width
,
7
,
stride
)
class
RedNet
(
resnet
.
ResNet
):
def
__init__
(
self
,
block
,
depth
,
class_dim
=
1000
,
with_pool
=
True
):
super
(
RedNet
,
self
).
__init__
(
block
=
block
,
depth
=
50
,
num_classes
=
class_dim
,
with_pool
=
with_pool
)
layer_cfg
=
{
26
:
[
1
,
2
,
4
,
1
],
38
:
[
2
,
3
,
5
,
2
],
50
:
[
3
,
4
,
6
,
3
],
101
:
[
3
,
4
,
23
,
3
],
152
:
[
3
,
8
,
36
,
3
]
}
layers
=
layer_cfg
[
depth
]
self
.
conv1
=
None
self
.
bn1
=
None
self
.
relu
=
None
self
.
inplanes
=
64
self
.
class_dim
=
class_dim
self
.
stem
=
nn
.
Sequential
(
nn
.
Sequential
(
(
'conv'
,
nn
.
Conv2D
(
in_channels
=
3
,
out_channels
=
self
.
inplanes
//
2
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
bias_attr
=
False
)),
(
'bn'
,
nn
.
BatchNorm2D
(
self
.
inplanes
//
2
)),
(
'activate'
,
nn
.
ReLU
())
),
Involution
(
self
.
inplanes
//
2
,
3
,
1
),
nn
.
BatchNorm2D
(
self
.
inplanes
//
2
),
nn
.
ReLU
(),
nn
.
Sequential
(
(
'conv'
,
nn
.
Conv2D
(
in_channels
=
self
.
inplanes
//
2
,
out_channels
=
self
.
inplanes
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias_attr
=
False
)),
(
'bn'
,
nn
.
BatchNorm2D
(
self
.
inplanes
)),
(
'activate'
,
nn
.
ReLU
())
)
)
self
.
layer1
=
self
.
_make_layer
(
block
,
64
,
layers
[
0
])
self
.
layer2
=
self
.
_make_layer
(
block
,
128
,
layers
[
1
],
stride
=
2
)
self
.
layer3
=
self
.
_make_layer
(
block
,
256
,
layers
[
2
],
stride
=
2
)
self
.
layer4
=
self
.
_make_layer
(
block
,
512
,
layers
[
3
],
stride
=
2
)
def
forward
(
self
,
x
):
x
=
self
.
stem
(
x
)
x
=
self
.
maxpool
(
x
)
x
=
self
.
layer1
(
x
)
x
=
self
.
layer2
(
x
)
x
=
self
.
layer3
(
x
)
x
=
self
.
layer4
(
x
)
if
self
.
with_pool
:
x
=
self
.
avgpool
(
x
)
if
self
.
class_dim
>
0
:
x
=
paddle
.
flatten
(
x
,
1
)
x
=
self
.
fc
(
x
)
return
x
def
_load_pretrained
(
pretrained
,
model
,
model_url
,
use_ssld
=
False
):
if
pretrained
is
False
:
pass
elif
pretrained
is
True
:
load_dygraph_pretrain_from_url
(
model
,
model_url
,
use_ssld
=
use_ssld
)
elif
isinstance
(
pretrained
,
str
):
load_dygraph_pretrain
(
model
,
pretrained
)
else
:
raise
RuntimeError
(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def
RedNet26
(
pretrained
=
False
,
**
kwargs
):
model
=
RedNet
(
BottleneckBlock
,
26
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"RedNet26"
])
return
model
def
RedNet38
(
pretrained
=
False
,
**
kwargs
):
model
=
RedNet
(
BottleneckBlock
,
38
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"RedNet38"
])
return
model
def
RedNet50
(
pretrained
=
False
,
**
kwargs
):
model
=
RedNet
(
BottleneckBlock
,
50
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"RedNet50"
])
return
model
def
RedNet101
(
pretrained
=
False
,
**
kwargs
):
model
=
RedNet
(
BottleneckBlock
,
101
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"RedNet101"
])
return
model
def
RedNet152
(
pretrained
=
False
,
**
kwargs
):
model
=
RedNet
(
BottleneckBlock
,
152
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"RedNet152"
])
return
model
ppcls/arch/backbone/model_zoo/tnt.py
0 → 100644
浏览文件 @
a950ec42
import
math
import
numpy
as
np
import
paddle
import
paddle.nn
as
nn
from
paddle.nn.initializer
import
TruncatedNormal
,
Constant
from
ppcls.arch.backbone.base.theseus_layer
import
Identity
from
ppcls.utils.save_load
import
load_dygraph_pretrain
,
load_dygraph_pretrain_from_url
MODEL_URLS
=
{
"TNT_small"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/TNT_small_pretrained.pdparams"
}
trunc_normal_
=
TruncatedNormal
(
std
=
.
02
)
zeros_
=
Constant
(
value
=
0.
)
ones_
=
Constant
(
value
=
1.
)
def
drop_path
(
x
,
drop_prob
=
0.
,
training
=
False
):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
"""
if
drop_prob
==
0.
or
not
training
:
return
x
keep_prob
=
paddle
.
to_tensor
(
1
-
drop_prob
)
shape
=
(
paddle
.
shape
(
x
)[
0
],
)
+
(
1
,
)
*
(
x
.
ndim
-
1
)
random_tensor
=
keep_prob
+
paddle
.
rand
(
shape
,
dtype
=
x
.
dtype
)
random_tensor
=
paddle
.
floor
(
random_tensor
)
# binarize
output
=
x
.
divide
(
keep_prob
)
*
random_tensor
return
output
class
DropPath
(
nn
.
Layer
):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
"""
def
__init__
(
self
,
drop_prob
=
None
):
super
(
DropPath
,
self
).
__init__
()
self
.
drop_prob
=
drop_prob
def
forward
(
self
,
x
):
return
drop_path
(
x
,
self
.
drop_prob
,
self
.
training
)
class
Mlp
(
nn
.
Layer
):
def
__init__
(
self
,
in_features
,
hidden_features
=
None
,
out_features
=
None
,
act_layer
=
nn
.
GELU
,
drop
=
0.
):
super
().
__init__
()
out_features
=
out_features
or
in_features
hidden_features
=
hidden_features
or
in_features
self
.
fc1
=
nn
.
Linear
(
in_features
,
hidden_features
)
self
.
act
=
act_layer
()
self
.
fc2
=
nn
.
Linear
(
hidden_features
,
out_features
)
self
.
drop
=
nn
.
Dropout
(
drop
)
def
forward
(
self
,
x
):
x
=
self
.
fc1
(
x
)
x
=
self
.
act
(
x
)
x
=
self
.
drop
(
x
)
x
=
self
.
fc2
(
x
)
x
=
self
.
drop
(
x
)
return
x
class
Attention
(
nn
.
Layer
):
def
__init__
(
self
,
dim
,
hidden_dim
,
num_heads
=
8
,
qkv_bias
=
False
,
attn_drop
=
0.
,
proj_drop
=
0.
):
super
().
__init__
()
self
.
hidden_dim
=
hidden_dim
self
.
num_heads
=
num_heads
head_dim
=
hidden_dim
//
num_heads
self
.
head_dim
=
head_dim
self
.
scale
=
head_dim
**
-
0.5
self
.
qk
=
nn
.
Linear
(
dim
,
hidden_dim
*
2
,
bias_attr
=
qkv_bias
)
self
.
v
=
nn
.
Linear
(
dim
,
dim
,
bias_attr
=
qkv_bias
)
self
.
attn_drop
=
nn
.
Dropout
(
attn_drop
)
self
.
proj
=
nn
.
Linear
(
dim
,
dim
)
self
.
proj_drop
=
nn
.
Dropout
(
proj_drop
)
def
forward
(
self
,
x
):
B
,
N
,
C
=
x
.
shape
qk
=
self
.
qk
(
x
).
reshape
((
B
,
N
,
2
,
self
.
num_heads
,
self
.
head_dim
)).
transpose
((
2
,
0
,
3
,
1
,
4
))
q
,
k
=
qk
[
0
],
qk
[
1
]
v
=
self
.
v
(
x
).
reshape
((
B
,
N
,
self
.
num_heads
,
-
1
)).
transpose
((
0
,
2
,
1
,
3
))
attn
=
(
q
@
k
.
transpose
((
0
,
1
,
3
,
2
)))
*
self
.
scale
attn
=
nn
.
functional
.
softmax
(
attn
,
axis
=-
1
)
attn
=
self
.
attn_drop
(
attn
)
x
=
(
attn
@
v
).
transpose
((
0
,
2
,
1
,
3
)).
reshape
((
B
,
N
,
-
1
))
x
=
self
.
proj
(
x
)
x
=
self
.
proj_drop
(
x
)
return
x
class
Block
(
nn
.
Layer
):
def
__init__
(
self
,
dim
,
in_dim
,
num_pixel
,
num_heads
=
12
,
in_num_head
=
4
,
mlp_ratio
=
4.
,
qkv_bias
=
False
,
drop
=
0.
,
attn_drop
=
0.
,
drop_path
=
0.
,
act_layer
=
nn
.
GELU
,
norm_layer
=
nn
.
LayerNorm
):
super
().
__init__
()
# Inner transformer
self
.
norm_in
=
norm_layer
(
in_dim
)
self
.
attn_in
=
Attention
(
in_dim
,
in_dim
,
num_heads
=
in_num_head
,
qkv_bias
=
qkv_bias
,
attn_drop
=
attn_drop
,
proj_drop
=
drop
)
self
.
norm_mlp_in
=
norm_layer
(
in_dim
)
self
.
mlp_in
=
Mlp
(
in_features
=
in_dim
,
hidden_features
=
int
(
in_dim
*
4
),
out_features
=
in_dim
,
act_layer
=
act_layer
,
drop
=
drop
)
self
.
norm1_proj
=
norm_layer
(
in_dim
)
self
.
proj
=
nn
.
Linear
(
in_dim
*
num_pixel
,
dim
)
# Outer transformer
self
.
norm_out
=
norm_layer
(
dim
)
self
.
attn_out
=
Attention
(
dim
,
dim
,
num_heads
=
num_heads
,
qkv_bias
=
qkv_bias
,
attn_drop
=
attn_drop
,
proj_drop
=
drop
)
self
.
drop_path
=
DropPath
(
drop_path
)
if
drop_path
>
0.
else
Identity
()
self
.
norm_mlp
=
norm_layer
(
dim
)
self
.
mlp
=
Mlp
(
in_features
=
dim
,
hidden_features
=
int
(
dim
*
mlp_ratio
),
out_features
=
dim
,
act_layer
=
act_layer
,
drop
=
drop
)
def
forward
(
self
,
pixel_embed
,
patch_embed
):
# inner
pixel_embed
=
pixel_embed
+
self
.
drop_path
(
self
.
attn_in
(
self
.
norm_in
(
pixel_embed
)))
pixel_embed
=
pixel_embed
+
self
.
drop_path
(
self
.
mlp_in
(
self
.
norm_mlp_in
(
pixel_embed
)))
# outer
B
,
N
,
C
=
patch_embed
.
shape
patch_embed
[:,
1
:]
=
patch_embed
[:,
1
:]
+
self
.
proj
(
self
.
norm1_proj
(
pixel_embed
).
reshape
((
B
,
N
-
1
,
-
1
)))
patch_embed
=
patch_embed
+
self
.
drop_path
(
self
.
attn_out
(
self
.
norm_out
(
patch_embed
)))
patch_embed
=
patch_embed
+
self
.
drop_path
(
self
.
mlp
(
self
.
norm_mlp
(
patch_embed
)))
return
pixel_embed
,
patch_embed
class
PixelEmbed
(
nn
.
Layer
):
def
__init__
(
self
,
img_size
=
224
,
patch_size
=
16
,
in_chans
=
3
,
in_dim
=
48
,
stride
=
4
):
super
().
__init__
()
num_patches
=
(
img_size
//
patch_size
)
**
2
self
.
img_size
=
img_size
self
.
num_patches
=
num_patches
self
.
in_dim
=
in_dim
new_patch_size
=
math
.
ceil
(
patch_size
/
stride
)
self
.
new_patch_size
=
new_patch_size
self
.
proj
=
nn
.
Conv2D
(
in_chans
,
self
.
in_dim
,
kernel_size
=
7
,
padding
=
3
,
stride
=
stride
)
def
forward
(
self
,
x
,
pixel_pos
):
B
,
C
,
H
,
W
=
x
.
shape
assert
H
==
self
.
img_size
and
W
==
self
.
img_size
,
f
"Input image size (
{
H
}
*
{
W
}
) doesn't match model (
{
self
.
img_size
}
*
{
self
.
img_size
}
)."
x
=
self
.
proj
(
x
)
x
=
nn
.
functional
.
unfold
(
x
,
self
.
new_patch_size
,
self
.
new_patch_size
)
x
=
x
.
transpose
((
0
,
2
,
1
)).
reshape
((
B
*
self
.
num_patches
,
self
.
in_dim
,
self
.
new_patch_size
,
self
.
new_patch_size
))
x
=
x
+
pixel_pos
x
=
x
.
reshape
((
B
*
self
.
num_patches
,
self
.
in_dim
,
-
1
)).
transpose
((
0
,
2
,
1
))
return
x
class
TNT
(
nn
.
Layer
):
def
__init__
(
self
,
img_size
=
224
,
patch_size
=
16
,
in_chans
=
3
,
embed_dim
=
768
,
in_dim
=
48
,
depth
=
12
,
num_heads
=
12
,
in_num_head
=
4
,
mlp_ratio
=
4.
,
qkv_bias
=
False
,
drop_rate
=
0.
,
attn_drop_rate
=
0.
,
drop_path_rate
=
0.
,
norm_layer
=
nn
.
LayerNorm
,
first_stride
=
4
,
class_dim
=
1000
):
super
().
__init__
()
self
.
class_dim
=
class_dim
# num_features for consistency with other models
self
.
num_features
=
self
.
embed_dim
=
embed_dim
self
.
pixel_embed
=
PixelEmbed
(
img_size
=
img_size
,
patch_size
=
patch_size
,
in_chans
=
in_chans
,
in_dim
=
in_dim
,
stride
=
first_stride
)
num_patches
=
self
.
pixel_embed
.
num_patches
self
.
num_patches
=
num_patches
new_patch_size
=
self
.
pixel_embed
.
new_patch_size
num_pixel
=
new_patch_size
**
2
self
.
norm1_proj
=
norm_layer
(
num_pixel
*
in_dim
)
self
.
proj
=
nn
.
Linear
(
num_pixel
*
in_dim
,
embed_dim
)
self
.
norm2_proj
=
norm_layer
(
embed_dim
)
self
.
cls_token
=
self
.
create_parameter
(
shape
=
(
1
,
1
,
embed_dim
),
default_initializer
=
zeros_
)
self
.
add_parameter
(
"cls_token"
,
self
.
cls_token
)
self
.
patch_pos
=
self
.
create_parameter
(
shape
=
(
1
,
num_patches
+
1
,
embed_dim
),
default_initializer
=
zeros_
)
self
.
add_parameter
(
"patch_pos"
,
self
.
patch_pos
)
self
.
pixel_pos
=
self
.
create_parameter
(
shape
=
(
1
,
in_dim
,
new_patch_size
,
new_patch_size
),
default_initializer
=
zeros_
)
self
.
add_parameter
(
"pixel_pos"
,
self
.
pixel_pos
)
self
.
pos_drop
=
nn
.
Dropout
(
p
=
drop_rate
)
# stochastic depth decay rule
dpr
=
np
.
linspace
(
0
,
drop_path_rate
,
depth
)
blocks
=
[]
for
i
in
range
(
depth
):
blocks
.
append
(
Block
(
dim
=
embed_dim
,
in_dim
=
in_dim
,
num_pixel
=
num_pixel
,
num_heads
=
num_heads
,
in_num_head
=
in_num_head
,
mlp_ratio
=
mlp_ratio
,
qkv_bias
=
qkv_bias
,
drop
=
drop_rate
,
attn_drop
=
attn_drop_rate
,
drop_path
=
dpr
[
i
],
norm_layer
=
norm_layer
))
self
.
blocks
=
nn
.
LayerList
(
blocks
)
self
.
norm
=
norm_layer
(
embed_dim
)
if
class_dim
>
0
:
self
.
head
=
nn
.
Linear
(
embed_dim
,
class_dim
)
trunc_normal_
(
self
.
cls_token
)
trunc_normal_
(
self
.
patch_pos
)
trunc_normal_
(
self
.
pixel_pos
)
self
.
apply
(
self
.
_init_weights
)
def
_init_weights
(
self
,
m
):
if
isinstance
(
m
,
nn
.
Linear
):
trunc_normal_
(
m
.
weight
)
if
isinstance
(
m
,
nn
.
Linear
)
and
m
.
bias
is
not
None
:
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
LayerNorm
):
zeros_
(
m
.
bias
)
ones_
(
m
.
weight
)
def
forward_features
(
self
,
x
):
B
=
x
.
shape
[
0
]
pixel_embed
=
self
.
pixel_embed
(
x
,
self
.
pixel_pos
)
patch_embed
=
self
.
norm2_proj
(
self
.
proj
(
self
.
norm1_proj
(
pixel_embed
.
reshape
((
B
,
self
.
num_patches
,
-
1
)))))
patch_embed
=
paddle
.
concat
((
self
.
cls_token
.
expand
((
B
,
-
1
,
-
1
)),
patch_embed
),
axis
=
1
)
patch_embed
=
patch_embed
+
self
.
patch_pos
patch_embed
=
self
.
pos_drop
(
patch_embed
)
for
blk
in
self
.
blocks
:
pixel_embed
,
patch_embed
=
blk
(
pixel_embed
,
patch_embed
)
patch_embed
=
self
.
norm
(
patch_embed
)
return
patch_embed
[:,
0
]
def
forward
(
self
,
x
):
x
=
self
.
forward_features
(
x
)
if
self
.
class_dim
>
0
:
x
=
self
.
head
(
x
)
return
x
def
_load_pretrained
(
pretrained
,
model
,
model_url
,
use_ssld
=
False
):
if
pretrained
is
False
:
pass
elif
pretrained
is
True
:
load_dygraph_pretrain_from_url
(
model
,
model_url
,
use_ssld
=
use_ssld
)
elif
isinstance
(
pretrained
,
str
):
load_dygraph_pretrain
(
model
,
pretrained
)
else
:
raise
RuntimeError
(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def
TNT_small
(
pretrained
=
False
,
**
kwargs
):
model
=
TNT
(
patch_size
=
16
,
embed_dim
=
384
,
in_dim
=
24
,
depth
=
12
,
num_heads
=
6
,
in_num_head
=
4
,
qkv_bias
=
False
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"TNT_small"
])
return
model
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录