Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleClas
提交
a53b6368
P
PaddleClas
项目概览
PaddlePaddle
/
PaddleClas
大约 1 年 前同步成功
通知
115
Star
4999
Fork
1114
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
6
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleClas
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
6
合并请求
6
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a53b6368
编写于
12月 13, 2021
作者:
G
gaotingquan
提交者:
Tingquan Gao
1月 07, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat: support PVTV2
上级
da7a1bf8
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
1614 addition
and
0 deletion
+1614
-0
ppcls/arch/backbone/__init__.py
ppcls/arch/backbone/__init__.py
+1
-0
ppcls/arch/backbone/model_zoo/pvt_v2.py
ppcls/arch/backbone/model_zoo/pvt_v2.py
+483
-0
ppcls/configs/ImageNet/PVTV2/pvt_v2_b0.yaml
ppcls/configs/ImageNet/PVTV2/pvt_v2_b0.yaml
+161
-0
ppcls/configs/ImageNet/PVTV2/pvt_v2_b1.yaml
ppcls/configs/ImageNet/PVTV2/pvt_v2_b1.yaml
+161
-0
ppcls/configs/ImageNet/PVTV2/pvt_v2_b2.yaml
ppcls/configs/ImageNet/PVTV2/pvt_v2_b2.yaml
+161
-0
ppcls/configs/ImageNet/PVTV2/pvt_v2_b2_linear.yaml
ppcls/configs/ImageNet/PVTV2/pvt_v2_b2_linear.yaml
+161
-0
ppcls/configs/ImageNet/PVTV2/pvt_v2_b3.yaml
ppcls/configs/ImageNet/PVTV2/pvt_v2_b3.yaml
+162
-0
ppcls/configs/ImageNet/PVTV2/pvt_v2_b4.yaml
ppcls/configs/ImageNet/PVTV2/pvt_v2_b4.yaml
+162
-0
ppcls/configs/ImageNet/PVTV2/pvt_v2_b5.yaml
ppcls/configs/ImageNet/PVTV2/pvt_v2_b5.yaml
+162
-0
未找到文件。
ppcls/arch/backbone/__init__.py
浏览文件 @
a53b6368
...
...
@@ -63,6 +63,7 @@ from ppcls.arch.backbone.model_zoo.cspnet import CSPDarkNet53
from
ppcls.arch.backbone.variant_models.resnet_variant
import
ResNet50_last_stage_stride1
from
ppcls.arch.backbone.variant_models.vgg_variant
import
VGG19Sigmoid
from
ppcls.arch.backbone.variant_models.pp_lcnet_variant
import
PPLCNet_x2_5_Tanh
from
ppcls.arch.backbone.model_zoo.pvt_v2
import
PVT_V2_B0
,
PVT_V2_B1
,
PVT_V2_B2_Linear
,
PVT_V2_B2
,
PVT_V2_B3
,
PVT_V2_B4
,
PVT_V2_B5
# help whl get all the models' api (class type) and components' api (func type)
...
...
ppcls/arch/backbone/model_zoo/pvt_v2.py
0 → 100644
浏览文件 @
a53b6368
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was heavily based on https://github.com/whai362/PVT
from
functools
import
partial
import
math
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn.initializer
import
TruncatedNormal
,
Constant
from
.vision_transformer
import
trunc_normal_
,
zeros_
,
ones_
,
to_2tuple
,
DropPath
,
Identity
,
drop_path
from
ppcls.utils.save_load
import
load_dygraph_pretrain
,
load_dygraph_pretrain_from_url
MODEL_URLS
=
{
"PVT_V2_B0"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B0_pretrained.pdparams"
,
"PVT_V2_B1"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B1_pretrained.pdparams"
,
"PVT_V2_B2"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B2_pretrained.pdparams"
,
"PVT_V2_B2_Linear"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B2_Linear_pretrained.pdparams"
,
"PVT_V2_B3"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B3_pretrained.pdparams"
,
"PVT_V2_B4"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B4_pretrained.pdparams"
,
"PVT_V2_B5"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B5_pretrained.pdparams"
,
}
__all__
=
list
(
MODEL_URLS
.
keys
())
def
swapdim
(
x
,
dim1
,
dim2
):
a
=
list
(
range
(
len
(
x
.
shape
)))
a
[
dim1
],
a
[
dim2
]
=
a
[
dim2
],
a
[
dim1
]
return
x
.
transpose
(
a
)
class
Mlp
(
nn
.
Layer
):
def
__init__
(
self
,
in_features
,
hidden_features
=
None
,
out_features
=
None
,
act_layer
=
nn
.
GELU
,
drop
=
0.
,
linear
=
False
):
super
().
__init__
()
out_features
=
out_features
or
in_features
hidden_features
=
hidden_features
or
in_features
self
.
fc1
=
nn
.
Linear
(
in_features
,
hidden_features
)
self
.
dwconv
=
DWConv
(
hidden_features
)
self
.
act
=
act_layer
()
self
.
fc2
=
nn
.
Linear
(
hidden_features
,
out_features
)
self
.
drop
=
nn
.
Dropout
(
drop
)
self
.
linear
=
linear
if
self
.
linear
:
self
.
relu
=
nn
.
ReLU
()
def
forward
(
self
,
x
,
H
,
W
):
x
=
self
.
fc1
(
x
)
if
self
.
linear
:
x
=
self
.
relu
(
x
)
x
=
self
.
dwconv
(
x
,
H
,
W
)
x
=
self
.
act
(
x
)
x
=
self
.
drop
(
x
)
x
=
self
.
fc2
(
x
)
x
=
self
.
drop
(
x
)
return
x
class
Attention
(
nn
.
Layer
):
def
__init__
(
self
,
dim
,
num_heads
=
8
,
qkv_bias
=
False
,
qk_scale
=
None
,
attn_drop
=
0.
,
proj_drop
=
0.
,
sr_ratio
=
1
,
linear
=
False
):
super
().
__init__
()
assert
dim
%
num_heads
==
0
,
f
"dim
{
dim
}
should be divided by num_heads
{
num_heads
}
."
self
.
dim
=
dim
self
.
num_heads
=
num_heads
head_dim
=
dim
//
num_heads
self
.
scale
=
qk_scale
or
head_dim
**-
0.5
self
.
q
=
nn
.
Linear
(
dim
,
dim
,
bias_attr
=
qkv_bias
)
self
.
kv
=
nn
.
Linear
(
dim
,
dim
*
2
,
bias_attr
=
qkv_bias
)
self
.
attn_drop
=
nn
.
Dropout
(
attn_drop
)
self
.
proj
=
nn
.
Linear
(
dim
,
dim
)
self
.
proj_drop
=
nn
.
Dropout
(
proj_drop
)
self
.
linear
=
linear
self
.
sr_ratio
=
sr_ratio
if
not
linear
:
if
sr_ratio
>
1
:
self
.
sr
=
nn
.
Conv2D
(
dim
,
dim
,
kernel_size
=
sr_ratio
,
stride
=
sr_ratio
)
self
.
norm
=
nn
.
LayerNorm
(
dim
)
else
:
self
.
pool
=
nn
.
AdaptiveAvgPool2D
(
7
)
self
.
sr
=
nn
.
Conv2D
(
dim
,
dim
,
kernel_size
=
1
,
stride
=
1
)
self
.
norm
=
nn
.
LayerNorm
(
dim
)
self
.
act
=
nn
.
GELU
()
def
forward
(
self
,
x
,
H
,
W
):
B
,
N
,
C
=
x
.
shape
q
=
self
.
q
(
x
).
reshape
(
[
B
,
N
,
self
.
num_heads
,
C
//
self
.
num_heads
]).
transpose
(
[
0
,
2
,
1
,
3
])
if
not
self
.
linear
:
if
self
.
sr_ratio
>
1
:
x_
=
x
.
transpose
([
0
,
2
,
1
]).
reshape
([
B
,
C
,
H
,
W
])
x_
=
self
.
sr
(
x_
).
reshape
([
B
,
C
,
-
1
]).
transpose
([
0
,
2
,
1
])
x_
=
self
.
norm
(
x_
)
kv
=
self
.
kv
(
x_
).
reshape
(
[
B
,
-
1
,
2
,
self
.
num_heads
,
C
//
self
.
num_heads
]).
transpose
(
[
2
,
0
,
3
,
1
,
4
])
else
:
kv
=
self
.
kv
(
x
).
reshape
(
[
B
,
-
1
,
2
,
self
.
num_heads
,
C
//
self
.
num_heads
]).
transpose
(
[
2
,
0
,
3
,
1
,
4
])
else
:
x_
=
x
.
transpose
([
0
,
2
,
1
]).
reshape
([
B
,
C
,
H
,
W
])
x_
=
self
.
sr
(
self
.
pool
(
x_
)).
reshape
([
B
,
C
,
-
1
]).
transpose
(
[
0
,
2
,
1
])
x_
=
self
.
norm
(
x_
)
x_
=
self
.
act
(
x_
)
kv
=
self
.
kv
(
x_
).
reshape
(
[
B
,
-
1
,
2
,
self
.
num_heads
,
C
//
self
.
num_heads
]).
transpose
(
[
2
,
0
,
3
,
1
,
4
])
k
,
v
=
kv
[
0
],
kv
[
1
]
attn
=
(
q
@
swapdim
(
k
,
-
2
,
-
1
))
*
self
.
scale
attn
=
F
.
softmax
(
attn
,
axis
=-
1
)
attn
=
self
.
attn_drop
(
attn
)
x
=
swapdim
((
attn
@
v
),
1
,
2
).
reshape
([
B
,
N
,
C
])
x
=
self
.
proj
(
x
)
x
=
self
.
proj_drop
(
x
)
return
x
class
Block
(
nn
.
Layer
):
def
__init__
(
self
,
dim
,
num_heads
,
mlp_ratio
=
4.
,
qkv_bias
=
False
,
qk_scale
=
None
,
drop
=
0.
,
attn_drop
=
0.
,
drop_path
=
0.
,
act_layer
=
nn
.
GELU
,
norm_layer
=
nn
.
LayerNorm
,
sr_ratio
=
1
,
linear
=
False
):
super
().
__init__
()
self
.
norm1
=
norm_layer
(
dim
)
self
.
attn
=
Attention
(
dim
,
num_heads
=
num_heads
,
qkv_bias
=
qkv_bias
,
qk_scale
=
qk_scale
,
attn_drop
=
attn_drop
,
proj_drop
=
drop
,
sr_ratio
=
sr_ratio
,
linear
=
linear
)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self
.
drop_path
=
DropPath
(
drop_path
)
if
drop_path
>
0.
else
Identity
()
self
.
norm2
=
norm_layer
(
dim
)
mlp_hidden_dim
=
int
(
dim
*
mlp_ratio
)
self
.
mlp
=
Mlp
(
in_features
=
dim
,
hidden_features
=
mlp_hidden_dim
,
act_layer
=
act_layer
,
drop
=
drop
,
linear
=
linear
)
def
forward
(
self
,
x
,
H
,
W
):
x
=
x
+
self
.
drop_path
(
self
.
attn
(
self
.
norm1
(
x
),
H
,
W
))
x
=
x
+
self
.
drop_path
(
self
.
mlp
(
self
.
norm2
(
x
),
H
,
W
))
return
x
class
OverlapPatchEmbed
(
nn
.
Layer
):
""" Image to Patch Embedding
"""
def
__init__
(
self
,
img_size
=
224
,
patch_size
=
7
,
stride
=
4
,
in_chans
=
3
,
embed_dim
=
768
):
super
().
__init__
()
img_size
=
to_2tuple
(
img_size
)
patch_size
=
to_2tuple
(
patch_size
)
self
.
img_size
=
img_size
self
.
patch_size
=
patch_size
self
.
H
,
self
.
W
=
img_size
[
0
]
//
patch_size
[
0
],
img_size
[
1
]
//
patch_size
[
1
]
self
.
num_patches
=
self
.
H
*
self
.
W
self
.
proj
=
nn
.
Conv2D
(
in_chans
,
embed_dim
,
kernel_size
=
patch_size
,
stride
=
stride
,
padding
=
(
patch_size
[
0
]
//
2
,
patch_size
[
1
]
//
2
))
self
.
norm
=
nn
.
LayerNorm
(
embed_dim
)
def
forward
(
self
,
x
):
x
=
self
.
proj
(
x
)
_
,
_
,
H
,
W
=
x
.
shape
x
=
x
.
flatten
(
2
)
x
=
swapdim
(
x
,
1
,
2
)
x
=
self
.
norm
(
x
)
return
x
,
H
,
W
class
PyramidVisionTransformerV2
(
nn
.
Layer
):
def
__init__
(
self
,
img_size
=
224
,
patch_size
=
16
,
in_chans
=
3
,
class_num
=
1000
,
embed_dims
=
[
64
,
128
,
256
,
512
],
num_heads
=
[
1
,
2
,
4
,
8
],
mlp_ratios
=
[
4
,
4
,
4
,
4
],
qkv_bias
=
False
,
qk_scale
=
None
,
drop_rate
=
0.
,
attn_drop_rate
=
0.
,
drop_path_rate
=
0.
,
norm_layer
=
nn
.
LayerNorm
,
depths
=
[
3
,
4
,
6
,
3
],
sr_ratios
=
[
8
,
4
,
2
,
1
],
num_stages
=
4
,
linear
=
False
):
super
().
__init__
()
self
.
class_num
=
class_num
self
.
depths
=
depths
self
.
num_stages
=
num_stages
dpr
=
[
x
for
x
in
paddle
.
linspace
(
0
,
drop_path_rate
,
sum
(
depths
))
]
# stochastic depth decay rule
cur
=
0
for
i
in
range
(
num_stages
):
patch_embed
=
OverlapPatchEmbed
(
img_size
=
img_size
if
i
==
0
else
img_size
//
(
2
**
(
i
+
1
)),
patch_size
=
7
if
i
==
0
else
3
,
stride
=
4
if
i
==
0
else
2
,
in_chans
=
in_chans
if
i
==
0
else
embed_dims
[
i
-
1
],
embed_dim
=
embed_dims
[
i
])
block
=
nn
.
LayerList
([
Block
(
dim
=
embed_dims
[
i
],
num_heads
=
num_heads
[
i
],
mlp_ratio
=
mlp_ratios
[
i
],
qkv_bias
=
qkv_bias
,
qk_scale
=
qk_scale
,
drop
=
drop_rate
,
attn_drop
=
attn_drop_rate
,
drop_path
=
dpr
[
cur
+
j
],
norm_layer
=
norm_layer
,
sr_ratio
=
sr_ratios
[
i
],
linear
=
linear
)
for
j
in
range
(
depths
[
i
])
])
norm
=
norm_layer
(
embed_dims
[
i
])
cur
+=
depths
[
i
]
setattr
(
self
,
f
"patch_embed
{
i
+
1
}
"
,
patch_embed
)
setattr
(
self
,
f
"block
{
i
+
1
}
"
,
block
)
setattr
(
self
,
f
"norm
{
i
+
1
}
"
,
norm
)
# classification head
self
.
head
=
nn
.
Linear
(
embed_dims
[
3
],
class_num
)
if
class_num
>
0
else
Identity
()
self
.
apply
(
self
.
_init_weights
)
def
_init_weights
(
self
,
m
):
if
isinstance
(
m
,
nn
.
Linear
):
trunc_normal_
(
m
.
weight
)
if
isinstance
(
m
,
nn
.
Linear
)
and
m
.
bias
is
not
None
:
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
LayerNorm
):
zeros_
(
m
.
bias
)
ones_
(
m
.
weight
)
def
forward_features
(
self
,
x
):
B
=
x
.
shape
[
0
]
for
i
in
range
(
self
.
num_stages
):
patch_embed
=
getattr
(
self
,
f
"patch_embed
{
i
+
1
}
"
)
block
=
getattr
(
self
,
f
"block
{
i
+
1
}
"
)
norm
=
getattr
(
self
,
f
"norm
{
i
+
1
}
"
)
x
,
H
,
W
=
patch_embed
(
x
)
for
blk
in
block
:
x
=
blk
(
x
,
H
,
W
)
x
=
norm
(
x
)
if
i
!=
self
.
num_stages
-
1
:
x
=
x
.
reshape
([
B
,
H
,
W
,
-
1
]).
transpose
([
0
,
3
,
1
,
2
])
return
x
.
mean
(
axis
=
1
)
def
forward
(
self
,
x
):
x
=
self
.
forward_features
(
x
)
x
=
self
.
head
(
x
)
return
x
class
DWConv
(
nn
.
Layer
):
def
__init__
(
self
,
dim
=
768
):
super
().
__init__
()
self
.
dwconv
=
nn
.
Conv2D
(
dim
,
dim
,
3
,
1
,
1
,
bias_attr
=
True
,
groups
=
dim
)
def
forward
(
self
,
x
,
H
,
W
):
B
,
N
,
C
=
x
.
shape
x
=
swapdim
(
x
,
1
,
2
)
x
=
x
.
reshape
([
B
,
C
,
H
,
W
])
x
=
self
.
dwconv
(
x
)
x
=
x
.
flatten
(
2
)
x
=
swapdim
(
x
,
1
,
2
)
return
x
def
_load_pretrained
(
pretrained
,
model
,
model_url
,
use_ssld
=
False
):
if
pretrained
is
False
:
pass
elif
pretrained
is
True
:
load_dygraph_pretrain_from_url
(
model
,
model_url
,
use_ssld
=
use_ssld
)
elif
isinstance
(
pretrained
,
str
):
load_dygraph_pretrain
(
model
,
pretrained
)
else
:
raise
RuntimeError
(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def
PVT_V2_B0
(
pretrained
=
False
,
use_ssld
=
False
,
**
kwargs
):
model
=
PyramidVisionTransformerV2
(
patch_size
=
4
,
embed_dims
=
[
32
,
64
,
160
,
256
],
num_heads
=
[
1
,
2
,
5
,
8
],
mlp_ratios
=
[
8
,
8
,
4
,
4
],
qkv_bias
=
True
,
norm_layer
=
partial
(
nn
.
LayerNorm
,
epsilon
=
1e-6
),
depths
=
[
2
,
2
,
2
,
2
],
sr_ratios
=
[
8
,
4
,
2
,
1
],
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"PVT_V2_B0"
],
use_ssld
=
use_ssld
)
return
model
def
PVT_V2_B1
(
pretrained
=
False
,
use_ssld
=
False
,
**
kwargs
):
model
=
PyramidVisionTransformerV2
(
patch_size
=
4
,
embed_dims
=
[
64
,
128
,
320
,
512
],
num_heads
=
[
1
,
2
,
5
,
8
],
mlp_ratios
=
[
8
,
8
,
4
,
4
],
qkv_bias
=
True
,
norm_layer
=
partial
(
nn
.
LayerNorm
,
epsilon
=
1e-6
),
depths
=
[
2
,
2
,
2
,
2
],
sr_ratios
=
[
8
,
4
,
2
,
1
],
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"PVT_V2_B1"
],
use_ssld
=
use_ssld
)
return
model
def
PVT_V2_B2
(
pretrained
=
False
,
use_ssld
=
False
,
**
kwargs
):
model
=
PyramidVisionTransformerV2
(
patch_size
=
4
,
embed_dims
=
[
64
,
128
,
320
,
512
],
num_heads
=
[
1
,
2
,
5
,
8
],
mlp_ratios
=
[
8
,
8
,
4
,
4
],
qkv_bias
=
True
,
norm_layer
=
partial
(
nn
.
LayerNorm
,
epsilon
=
1e-6
),
depths
=
[
3
,
4
,
6
,
3
],
sr_ratios
=
[
8
,
4
,
2
,
1
],
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"PVT_V2_B2"
],
use_ssld
=
use_ssld
)
return
model
def
PVT_V2_B3
(
pretrained
=
False
,
use_ssld
=
False
,
**
kwargs
):
model
=
PyramidVisionTransformerV2
(
patch_size
=
4
,
embed_dims
=
[
64
,
128
,
320
,
512
],
num_heads
=
[
1
,
2
,
5
,
8
],
mlp_ratios
=
[
8
,
8
,
4
,
4
],
qkv_bias
=
True
,
norm_layer
=
partial
(
nn
.
LayerNorm
,
epsilon
=
1e-6
),
depths
=
[
3
,
4
,
18
,
3
],
sr_ratios
=
[
8
,
4
,
2
,
1
],
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"PVT_V2_B3"
],
use_ssld
=
use_ssld
)
return
model
def
PVT_V2_B4
(
pretrained
=
False
,
use_ssld
=
False
,
**
kwargs
):
model
=
PyramidVisionTransformerV2
(
patch_size
=
4
,
embed_dims
=
[
64
,
128
,
320
,
512
],
num_heads
=
[
1
,
2
,
5
,
8
],
mlp_ratios
=
[
8
,
8
,
4
,
4
],
qkv_bias
=
True
,
norm_layer
=
partial
(
nn
.
LayerNorm
,
epsilon
=
1e-6
),
depths
=
[
3
,
8
,
27
,
3
],
sr_ratios
=
[
8
,
4
,
2
,
1
],
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"PVT_V2_B4"
],
use_ssld
=
use_ssld
)
return
model
def
PVT_V2_B5
(
pretrained
=
False
,
use_ssld
=
False
,
**
kwargs
):
model
=
PyramidVisionTransformerV2
(
patch_size
=
4
,
embed_dims
=
[
64
,
128
,
320
,
512
],
num_heads
=
[
1
,
2
,
5
,
8
],
mlp_ratios
=
[
4
,
4
,
4
,
4
],
qkv_bias
=
True
,
norm_layer
=
partial
(
nn
.
LayerNorm
,
epsilon
=
1e-6
),
depths
=
[
3
,
6
,
40
,
3
],
sr_ratios
=
[
8
,
4
,
2
,
1
],
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"PVT_V2_B5"
],
use_ssld
=
use_ssld
)
return
model
def
PVT_V2_B2_Linear
(
pretrained
=
False
,
use_ssld
=
False
,
**
kwargs
):
model
=
PyramidVisionTransformerV2
(
patch_size
=
4
,
embed_dims
=
[
64
,
128
,
320
,
512
],
num_heads
=
[
1
,
2
,
5
,
8
],
mlp_ratios
=
[
8
,
8
,
4
,
4
],
qkv_bias
=
True
,
norm_layer
=
partial
(
nn
.
LayerNorm
,
epsilon
=
1e-6
),
depths
=
[
3
,
4
,
6
,
3
],
sr_ratios
=
[
8
,
4
,
2
,
1
],
linear
=
True
,
**
kwargs
)
_load_pretrained
(
pretrained
,
model
,
MODEL_URLS
[
"PVT_V2_B2_Linear"
],
use_ssld
=
use_ssld
)
return
model
ppcls/configs/ImageNet/PVTV2/pvt_v2_b0.yaml
0 → 100644
浏览文件 @
a53b6368
# global configs
Global
:
checkpoints
:
null
pretrained_model
:
null
output_dir
:
./output/
device
:
gpu
save_interval
:
1
eval_during_train
:
True
eval_interval
:
1
epochs
:
300
print_batch_step
:
10
use_visualdl
:
False
# used for static mode and model export
image_shape
:
[
3
,
224
,
224
]
save_inference_dir
:
./inference
# training model under @to_static
to_static
:
False
# model architecture
Arch
:
name
:
PVT_V2_B0
class_num
:
1000
drop_path_rate
:
0.1
drop_rate
:
0.0
# loss function config for traing/eval process
Loss
:
Train
:
-
CELoss
:
weight
:
1.0
epsilon
:
0.1
Eval
:
-
CELoss
:
weight
:
1.0
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
epsilon
:
1e-8
weight_decay
:
0.05
no_weight_decay_name
:
pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
1e-3
eta_min
:
1e-5
warmup_epoch
:
20
warmup_start_lr
:
1e-6
# data loader for train and eval
DataLoader
:
Train
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/train_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
RandCropImage
:
size
:
224
interpolation
:
bicubic
backend
:
pil
-
RandFlipImage
:
flip_code
:
1
-
TimmAutoAugment
:
config_str
:
rand-m9-mstd0.5-inc1
interpolation
:
bicubic
img_size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
RandomErasing
:
EPSILON
:
0.25
sl
:
0.02
sh
:
1.0/3.0
r1
:
0.3
attempt
:
10
use_log_aspect
:
True
mode
:
pixel
batch_transform_ops
:
-
OpSampler
:
MixupOperator
:
alpha
:
0.8
prob
:
0.5
CutmixOperator
:
alpha
:
1.0
prob
:
0.5
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
True
loader
:
num_workers
:
4
use_shared_memory
:
True
Eval
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/val_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
False
loader
:
num_workers
:
4
use_shared_memory
:
True
Infer
:
infer_imgs
:
docs/images/whl/demo.jpg
batch_size
:
10
transforms
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
ToCHWImage
:
PostProcess
:
name
:
Topk
topk
:
5
class_id_map_file
:
ppcls/utils/imagenet1k_label_list.txt
Metric
:
Eval
:
-
TopkAcc
:
topk
:
[
1
,
5
]
ppcls/configs/ImageNet/PVTV2/pvt_v2_b1.yaml
0 → 100644
浏览文件 @
a53b6368
# global configs
Global
:
checkpoints
:
null
pretrained_model
:
null
output_dir
:
./output/
device
:
gpu
save_interval
:
1
eval_during_train
:
True
eval_interval
:
1
epochs
:
300
print_batch_step
:
10
use_visualdl
:
False
# used for static mode and model export
image_shape
:
[
3
,
224
,
224
]
save_inference_dir
:
./inference
# training model under @to_static
to_static
:
False
# model architecture
Arch
:
name
:
PVT_V2_B1
class_num
:
1000
drop_path_rate
:
0.1
drop_rate
:
0.0
# loss function config for traing/eval process
Loss
:
Train
:
-
CELoss
:
weight
:
1.0
epsilon
:
0.1
Eval
:
-
CELoss
:
weight
:
1.0
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
epsilon
:
1e-8
weight_decay
:
0.05
no_weight_decay_name
:
pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
1e-3
eta_min
:
1e-5
warmup_epoch
:
20
warmup_start_lr
:
1e-6
# data loader for train and eval
DataLoader
:
Train
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/train_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
RandCropImage
:
size
:
224
interpolation
:
bicubic
backend
:
pil
-
RandFlipImage
:
flip_code
:
1
-
TimmAutoAugment
:
config_str
:
rand-m9-mstd0.5-inc1
interpolation
:
bicubic
img_size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
RandomErasing
:
EPSILON
:
0.25
sl
:
0.02
sh
:
1.0/3.0
r1
:
0.3
attempt
:
10
use_log_aspect
:
True
mode
:
pixel
batch_transform_ops
:
-
OpSampler
:
MixupOperator
:
alpha
:
0.8
prob
:
0.5
CutmixOperator
:
alpha
:
1.0
prob
:
0.5
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
True
loader
:
num_workers
:
4
use_shared_memory
:
True
Eval
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/val_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
False
loader
:
num_workers
:
4
use_shared_memory
:
True
Infer
:
infer_imgs
:
docs/images/whl/demo.jpg
batch_size
:
10
transforms
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
ToCHWImage
:
PostProcess
:
name
:
Topk
topk
:
5
class_id_map_file
:
ppcls/utils/imagenet1k_label_list.txt
Metric
:
Eval
:
-
TopkAcc
:
topk
:
[
1
,
5
]
ppcls/configs/ImageNet/PVTV2/pvt_v2_b2.yaml
0 → 100644
浏览文件 @
a53b6368
# global configs
Global
:
checkpoints
:
null
pretrained_model
:
null
output_dir
:
./output/
device
:
gpu
save_interval
:
1
eval_during_train
:
True
eval_interval
:
1
epochs
:
300
print_batch_step
:
10
use_visualdl
:
False
# used for static mode and model export
image_shape
:
[
3
,
224
,
224
]
save_inference_dir
:
./inference
# training model under @to_static
to_static
:
False
# model architecture
Arch
:
name
:
PVT_V2_B2
class_num
:
1000
drop_path_rate
:
0.1
drop_rate
:
0.0
# loss function config for traing/eval process
Loss
:
Train
:
-
CELoss
:
weight
:
1.0
epsilon
:
0.1
Eval
:
-
CELoss
:
weight
:
1.0
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
epsilon
:
1e-8
weight_decay
:
0.05
no_weight_decay_name
:
pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
1e-3
eta_min
:
1e-5
warmup_epoch
:
20
warmup_start_lr
:
1e-6
# data loader for train and eval
DataLoader
:
Train
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/train_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
RandCropImage
:
size
:
224
interpolation
:
bicubic
backend
:
pil
-
RandFlipImage
:
flip_code
:
1
-
TimmAutoAugment
:
config_str
:
rand-m9-mstd0.5-inc1
interpolation
:
bicubic
img_size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
RandomErasing
:
EPSILON
:
0.25
sl
:
0.02
sh
:
1.0/3.0
r1
:
0.3
attempt
:
10
use_log_aspect
:
True
mode
:
pixel
batch_transform_ops
:
-
OpSampler
:
MixupOperator
:
alpha
:
0.8
prob
:
0.5
CutmixOperator
:
alpha
:
1.0
prob
:
0.5
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
True
loader
:
num_workers
:
4
use_shared_memory
:
True
Eval
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/val_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
False
loader
:
num_workers
:
4
use_shared_memory
:
True
Infer
:
infer_imgs
:
docs/images/whl/demo.jpg
batch_size
:
10
transforms
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
ToCHWImage
:
PostProcess
:
name
:
Topk
topk
:
5
class_id_map_file
:
ppcls/utils/imagenet1k_label_list.txt
Metric
:
Eval
:
-
TopkAcc
:
topk
:
[
1
,
5
]
ppcls/configs/ImageNet/PVTV2/pvt_v2_b2_linear.yaml
0 → 100644
浏览文件 @
a53b6368
# global configs
Global
:
checkpoints
:
null
pretrained_model
:
null
output_dir
:
./output/
device
:
gpu
save_interval
:
1
eval_during_train
:
True
eval_interval
:
1
epochs
:
300
print_batch_step
:
10
use_visualdl
:
False
# used for static mode and model export
image_shape
:
[
3
,
224
,
224
]
save_inference_dir
:
./inference
# training model under @to_static
to_static
:
False
# model architecture
Arch
:
name
:
PVT_V2_B2_Linear
class_num
:
1000
drop_path_rate
:
0.1
drop_rate
:
0.0
# loss function config for traing/eval process
Loss
:
Train
:
-
CELoss
:
weight
:
1.0
epsilon
:
0.1
Eval
:
-
CELoss
:
weight
:
1.0
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
epsilon
:
1e-8
weight_decay
:
0.05
no_weight_decay_name
:
pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
1e-3
eta_min
:
1e-5
warmup_epoch
:
20
warmup_start_lr
:
1e-6
# data loader for train and eval
DataLoader
:
Train
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/train_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
RandCropImage
:
size
:
224
interpolation
:
bicubic
backend
:
pil
-
RandFlipImage
:
flip_code
:
1
-
TimmAutoAugment
:
config_str
:
rand-m9-mstd0.5-inc1
interpolation
:
bicubic
img_size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
RandomErasing
:
EPSILON
:
0.25
sl
:
0.02
sh
:
1.0/3.0
r1
:
0.3
attempt
:
10
use_log_aspect
:
True
mode
:
pixel
batch_transform_ops
:
-
OpSampler
:
MixupOperator
:
alpha
:
0.8
prob
:
0.5
CutmixOperator
:
alpha
:
1.0
prob
:
0.5
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
True
loader
:
num_workers
:
4
use_shared_memory
:
True
Eval
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/val_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
False
loader
:
num_workers
:
4
use_shared_memory
:
True
Infer
:
infer_imgs
:
docs/images/whl/demo.jpg
batch_size
:
10
transforms
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
ToCHWImage
:
PostProcess
:
name
:
Topk
topk
:
5
class_id_map_file
:
ppcls/utils/imagenet1k_label_list.txt
Metric
:
Eval
:
-
TopkAcc
:
topk
:
[
1
,
5
]
ppcls/configs/ImageNet/PVTV2/pvt_v2_b3.yaml
0 → 100644
浏览文件 @
a53b6368
# global configs
Global
:
checkpoints
:
null
pretrained_model
:
null
output_dir
:
./output/
device
:
gpu
save_interval
:
1
eval_during_train
:
True
eval_interval
:
1
epochs
:
300
print_batch_step
:
10
use_visualdl
:
False
# used for static mode and model export
image_shape
:
[
3
,
224
,
224
]
save_inference_dir
:
./inference
# training model under @to_static
to_static
:
False
# model architecture
Arch
:
name
:
PVT_V2_B3
class_num
:
1000
drop_path_rate
:
0.3
drop_rate
:
0.0
# loss function config for traing/eval process
Loss
:
Train
:
-
CELoss
:
weight
:
1.0
epsilon
:
0.1
Eval
:
-
CELoss
:
weight
:
1.0
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
epsilon
:
1e-8
weight_decay
:
0.05
clip_grad
:
1.0
no_weight_decay_name
:
pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
1e-3
eta_min
:
1e-5
warmup_epoch
:
20
warmup_start_lr
:
1e-6
# data loader for train and eval
DataLoader
:
Train
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/train_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
RandCropImage
:
size
:
224
interpolation
:
bicubic
backend
:
pil
-
RandFlipImage
:
flip_code
:
1
-
TimmAutoAugment
:
config_str
:
rand-m9-mstd0.5-inc1
interpolation
:
bicubic
img_size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
RandomErasing
:
EPSILON
:
0.25
sl
:
0.02
sh
:
1.0/3.0
r1
:
0.3
attempt
:
10
use_log_aspect
:
True
mode
:
pixel
batch_transform_ops
:
-
OpSampler
:
MixupOperator
:
alpha
:
0.8
prob
:
0.5
CutmixOperator
:
alpha
:
1.0
prob
:
0.5
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
True
loader
:
num_workers
:
4
use_shared_memory
:
True
Eval
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/val_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
False
loader
:
num_workers
:
4
use_shared_memory
:
True
Infer
:
infer_imgs
:
docs/images/whl/demo.jpg
batch_size
:
10
transforms
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
ToCHWImage
:
PostProcess
:
name
:
Topk
topk
:
5
class_id_map_file
:
ppcls/utils/imagenet1k_label_list.txt
Metric
:
Eval
:
-
TopkAcc
:
topk
:
[
1
,
5
]
ppcls/configs/ImageNet/PVTV2/pvt_v2_b4.yaml
0 → 100644
浏览文件 @
a53b6368
# global configs
Global
:
checkpoints
:
null
pretrained_model
:
null
output_dir
:
./output/
device
:
gpu
save_interval
:
1
eval_during_train
:
True
eval_interval
:
1
epochs
:
300
print_batch_step
:
10
use_visualdl
:
False
# used for static mode and model export
image_shape
:
[
3
,
224
,
224
]
save_inference_dir
:
./inference
# training model under @to_static
to_static
:
False
# model architecture
Arch
:
name
:
PVT_V2_B4
class_num
:
1000
drop_path_rate
:
0.3
drop_rate
:
0.0
# loss function config for traing/eval process
Loss
:
Train
:
-
CELoss
:
weight
:
1.0
epsilon
:
0.1
Eval
:
-
CELoss
:
weight
:
1.0
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
epsilon
:
1e-8
weight_decay
:
0.05
clip_grad
:
1.0
no_weight_decay_name
:
pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
1e-3
eta_min
:
1e-5
warmup_epoch
:
20
warmup_start_lr
:
1e-6
# data loader for train and eval
DataLoader
:
Train
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/train_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
RandCropImage
:
size
:
224
interpolation
:
bicubic
backend
:
pil
-
RandFlipImage
:
flip_code
:
1
-
TimmAutoAugment
:
config_str
:
rand-m9-mstd0.5-inc1
interpolation
:
bicubic
img_size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
RandomErasing
:
EPSILON
:
0.25
sl
:
0.02
sh
:
1.0/3.0
r1
:
0.3
attempt
:
10
use_log_aspect
:
True
mode
:
pixel
batch_transform_ops
:
-
OpSampler
:
MixupOperator
:
alpha
:
0.8
prob
:
0.5
CutmixOperator
:
alpha
:
1.0
prob
:
0.5
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
True
loader
:
num_workers
:
4
use_shared_memory
:
True
Eval
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/val_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
False
loader
:
num_workers
:
4
use_shared_memory
:
True
Infer
:
infer_imgs
:
docs/images/whl/demo.jpg
batch_size
:
10
transforms
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
ToCHWImage
:
PostProcess
:
name
:
Topk
topk
:
5
class_id_map_file
:
ppcls/utils/imagenet1k_label_list.txt
Metric
:
Eval
:
-
TopkAcc
:
topk
:
[
1
,
5
]
ppcls/configs/ImageNet/PVTV2/pvt_v2_b5.yaml
0 → 100644
浏览文件 @
a53b6368
# global configs
Global
:
checkpoints
:
null
pretrained_model
:
null
output_dir
:
./output/
device
:
gpu
save_interval
:
1
eval_during_train
:
True
eval_interval
:
1
epochs
:
300
print_batch_step
:
10
use_visualdl
:
False
# used for static mode and model export
image_shape
:
[
3
,
224
,
224
]
save_inference_dir
:
./inference
# training model under @to_static
to_static
:
False
# model architecture
Arch
:
name
:
PVT_V2_B5
class_num
:
1000
drop_path_rate
:
0.3
drop_rate
:
0.0
# loss function config for traing/eval process
Loss
:
Train
:
-
CELoss
:
weight
:
1.0
epsilon
:
0.1
Eval
:
-
CELoss
:
weight
:
1.0
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
epsilon
:
1e-8
weight_decay
:
0.05
clip_grad
:
1.0
no_weight_decay_name
:
pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
1e-3
eta_min
:
1e-5
warmup_epoch
:
20
warmup_start_lr
:
1e-6
# data loader for train and eval
DataLoader
:
Train
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/train_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
RandCropImage
:
size
:
224
interpolation
:
bicubic
backend
:
pil
-
RandFlipImage
:
flip_code
:
1
-
TimmAutoAugment
:
config_str
:
rand-m9-mstd0.5-inc1
interpolation
:
bicubic
img_size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
RandomErasing
:
EPSILON
:
0.25
sl
:
0.02
sh
:
1.0/3.0
r1
:
0.3
attempt
:
10
use_log_aspect
:
True
mode
:
pixel
batch_transform_ops
:
-
OpSampler
:
MixupOperator
:
alpha
:
0.8
prob
:
0.5
CutmixOperator
:
alpha
:
1.0
prob
:
0.5
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
True
loader
:
num_workers
:
4
use_shared_memory
:
True
Eval
:
dataset
:
name
:
ImageNetDataset
image_root
:
./dataset/ILSVRC2012/
cls_label_path
:
./dataset/ILSVRC2012/val_list.txt
transform_ops
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
sampler
:
name
:
DistributedBatchSampler
batch_size
:
128
drop_last
:
False
shuffle
:
False
loader
:
num_workers
:
4
use_shared_memory
:
True
Infer
:
infer_imgs
:
docs/images/whl/demo.jpg
batch_size
:
10
transforms
:
-
DecodeImage
:
to_rgb
:
True
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
interpolation
:
bicubic
backend
:
pil
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
ToCHWImage
:
PostProcess
:
name
:
Topk
topk
:
5
class_id_map_file
:
ppcls/utils/imagenet1k_label_list.txt
Metric
:
Eval
:
-
TopkAcc
:
topk
:
[
1
,
5
]
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录