Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleClas
提交
525b5e1a
P
PaddleClas
项目概览
PaddlePaddle
/
PaddleClas
大约 1 年 前同步成功
通知
115
Star
4999
Fork
1114
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
6
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleClas
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
6
合并请求
6
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
525b5e1a
编写于
3月 31, 2021
作者:
H
huangxu96
提交者:
GitHub
3月 31, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support fp16 training for ResNeXt101_32x4d (#653)
上级
8a469799
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
129 addition
and
24 deletion
+129
-24
configs/ResNeXt/ResNeXt101_32x4d_fp16.yaml
configs/ResNeXt/ResNeXt101_32x4d_fp16.yaml
+91
-0
ppcls/modeling/architectures/resnext.py
ppcls/modeling/architectures/resnext.py
+38
-24
未找到文件。
configs/ResNeXt/ResNeXt101_32x4d_fp16.yaml
0 → 100644
浏览文件 @
525b5e1a
mode
:
'
train'
ARCHITECTURE
:
name
:
'
ResNeXt101_32x4d'
pretrained_model
:
"
"
model_save_dir
:
"
./output/"
classes_num
:
1000
total_images
:
1281167
save_interval
:
1
validate
:
True
valid_interval
:
1
epochs
:
120
topk
:
5
image_shape
:
[
4
,
224
,
224
]
use_dali
:
True
use_gpu
:
True
data_format
:
"
NCHW"
image_channel
:
&image_channel
4
image_shape
:
[
*image_channel
,
224
,
224
]
use_mix
:
False
ls_epsilon
:
-1
# mixed precision training
AMP
:
scale_loss
:
128.0
use_dynamic_loss_scaling
:
True
use_pure_fp16
:
&use_pure_fp16
True
LEARNING_RATE
:
function
:
'
Piecewise'
params
:
lr
:
0.1
decay_epochs
:
[
30
,
60
,
90
]
gamma
:
0.1
OPTIMIZER
:
function
:
'
Momentum'
params
:
momentum
:
0.9
multi_precision
:
*use_pure_fp16
regularizer
:
function
:
'
L2'
factor
:
0.000100
TRAIN
:
batch_size
:
256
num_workers
:
4
file_list
:
"
./dataset/ILSVRC2012/train_list.txt"
data_dir
:
"
./dataset/ILSVRC2012/"
shuffle_seed
:
0
transforms
:
-
DecodeImage
:
to_rgb
:
True
to_np
:
False
channel_first
:
False
-
RandCropImage
:
size
:
224
-
RandFlipImage
:
flip_code
:
1
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
output_fp16
:
*use_pure_fp16
channel_num
:
*image_channel
-
ToCHWImage
:
VALID
:
batch_size
:
64
num_workers
:
4
file_list
:
"
./dataset/ILSVRC2012/val_list.txt"
data_dir
:
"
./dataset/ILSVRC2012/"
shuffle_seed
:
0
transforms
:
-
DecodeImage
:
to_rgb
:
True
to_np
:
False
channel_first
:
False
-
ResizeImage
:
resize_short
:
256
-
CropImage
:
size
:
224
-
NormalizeImage
:
scale
:
1.0/255.0
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
ToCHWImage
:
ppcls/modeling/architectures/resnext.py
浏览文件 @
525b5e1a
...
@@ -41,9 +41,9 @@ class ConvBNLayer(nn.Layer):
...
@@ -41,9 +41,9 @@ class ConvBNLayer(nn.Layer):
stride
=
1
,
stride
=
1
,
groups
=
1
,
groups
=
1
,
act
=
None
,
act
=
None
,
name
=
None
):
name
=
None
,
data_format
=
"NCHW"
):
super
(
ConvBNLayer
,
self
).
__init__
()
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
_conv
=
Conv2D
(
self
.
_conv
=
Conv2D
(
in_channels
=
num_channels
,
in_channels
=
num_channels
,
out_channels
=
num_filters
,
out_channels
=
num_filters
,
...
@@ -52,7 +52,8 @@ class ConvBNLayer(nn.Layer):
...
@@ -52,7 +52,8 @@ class ConvBNLayer(nn.Layer):
padding
=
(
filter_size
-
1
)
//
2
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
bias_attr
=
False
,
data_format
=
data_format
)
if
name
==
"conv1"
:
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
bn_name
=
"bn_"
+
name
else
:
else
:
...
@@ -63,7 +64,8 @@ class ConvBNLayer(nn.Layer):
...
@@ -63,7 +64,8 @@ class ConvBNLayer(nn.Layer):
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
moving_variance_name
=
bn_name
+
'_variance'
,
data_layout
=
data_format
)
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_conv
(
inputs
)
...
@@ -78,15 +80,16 @@ class BottleneckBlock(nn.Layer):
...
@@ -78,15 +80,16 @@ class BottleneckBlock(nn.Layer):
stride
,
stride
,
cardinality
,
cardinality
,
shortcut
=
True
,
shortcut
=
True
,
name
=
None
):
name
=
None
,
data_format
=
"NCHW"
):
super
(
BottleneckBlock
,
self
).
__init__
()
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
self
.
conv0
=
ConvBNLayer
(
num_channels
=
num_channels
,
num_channels
=
num_channels
,
num_filters
=
num_filters
,
num_filters
=
num_filters
,
filter_size
=
1
,
filter_size
=
1
,
act
=
'relu'
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
name
=
name
+
"_branch2a"
,
data_format
=
data_format
)
self
.
conv1
=
ConvBNLayer
(
self
.
conv1
=
ConvBNLayer
(
num_channels
=
num_filters
,
num_channels
=
num_filters
,
num_filters
=
num_filters
,
num_filters
=
num_filters
,
...
@@ -94,13 +97,15 @@ class BottleneckBlock(nn.Layer):
...
@@ -94,13 +97,15 @@ class BottleneckBlock(nn.Layer):
groups
=
cardinality
,
groups
=
cardinality
,
stride
=
stride
,
stride
=
stride
,
act
=
'relu'
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
name
=
name
+
"_branch2b"
,
data_format
=
data_format
)
self
.
conv2
=
ConvBNLayer
(
self
.
conv2
=
ConvBNLayer
(
num_channels
=
num_filters
,
num_channels
=
num_filters
,
num_filters
=
num_filters
*
2
if
cardinality
==
32
else
num_filters
,
num_filters
=
num_filters
*
2
if
cardinality
==
32
else
num_filters
,
filter_size
=
1
,
filter_size
=
1
,
act
=
None
,
act
=
None
,
name
=
name
+
"_branch2c"
)
name
=
name
+
"_branch2c"
,
data_format
=
data_format
)
if
not
shortcut
:
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
self
.
short
=
ConvBNLayer
(
...
@@ -109,7 +114,8 @@ class BottleneckBlock(nn.Layer):
...
@@ -109,7 +114,8 @@ class BottleneckBlock(nn.Layer):
if
cardinality
==
32
else
num_filters
,
if
cardinality
==
32
else
num_filters
,
filter_size
=
1
,
filter_size
=
1
,
stride
=
stride
,
stride
=
stride
,
name
=
name
+
"_branch1"
)
name
=
name
+
"_branch1"
,
data_format
=
data_format
)
self
.
shortcut
=
shortcut
self
.
shortcut
=
shortcut
...
@@ -129,10 +135,12 @@ class BottleneckBlock(nn.Layer):
...
@@ -129,10 +135,12 @@ class BottleneckBlock(nn.Layer):
class
ResNeXt
(
nn
.
Layer
):
class
ResNeXt
(
nn
.
Layer
):
def
__init__
(
self
,
layers
=
50
,
class_dim
=
1000
,
cardinality
=
32
):
def
__init__
(
self
,
layers
=
50
,
class_dim
=
1000
,
cardinality
=
32
,
input_image_channel
=
3
,
data_format
=
"NCHW"
):
super
(
ResNeXt
,
self
).
__init__
()
super
(
ResNeXt
,
self
).
__init__
()
self
.
layers
=
layers
self
.
layers
=
layers
self
.
data_format
=
data_format
self
.
input_image_channel
=
input_image_channel
self
.
cardinality
=
cardinality
self
.
cardinality
=
cardinality
supported_layers
=
[
50
,
101
,
152
]
supported_layers
=
[
50
,
101
,
152
]
assert
layers
in
supported_layers
,
\
assert
layers
in
supported_layers
,
\
...
@@ -153,13 +161,14 @@ class ResNeXt(nn.Layer):
...
@@ -153,13 +161,14 @@ class ResNeXt(nn.Layer):
1024
]
if
cardinality
==
32
else
[
256
,
512
,
1024
,
2048
]
1024
]
if
cardinality
==
32
else
[
256
,
512
,
1024
,
2048
]
self
.
conv
=
ConvBNLayer
(
self
.
conv
=
ConvBNLayer
(
num_channels
=
3
,
num_channels
=
self
.
input_image_channel
,
num_filters
=
64
,
num_filters
=
64
,
filter_size
=
7
,
filter_size
=
7
,
stride
=
2
,
stride
=
2
,
act
=
'relu'
,
act
=
'relu'
,
name
=
"res_conv1"
)
name
=
"res_conv1"
,
self
.
pool2d_max
=
MaxPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
data_format
=
self
.
data_format
)
self
.
pool2d_max
=
MaxPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
data_format
=
self
.
data_format
)
self
.
block_list
=
[]
self
.
block_list
=
[]
for
block
in
range
(
len
(
depth
)):
for
block
in
range
(
len
(
depth
)):
...
@@ -181,11 +190,12 @@ class ResNeXt(nn.Layer):
...
@@ -181,11 +190,12 @@ class ResNeXt(nn.Layer):
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
cardinality
=
self
.
cardinality
,
cardinality
=
self
.
cardinality
,
shortcut
=
shortcut
,
shortcut
=
shortcut
,
name
=
conv_name
))
name
=
conv_name
,
data_format
=
self
.
data_format
))
self
.
block_list
.
append
(
bottleneck_block
)
self
.
block_list
.
append
(
bottleneck_block
)
shortcut
=
True
shortcut
=
True
self
.
pool2d_avg
=
AdaptiveAvgPool2D
(
1
)
self
.
pool2d_avg
=
AdaptiveAvgPool2D
(
1
,
data_format
=
self
.
data_format
)
self
.
pool2d_avg_channels
=
num_channels
[
-
1
]
*
2
self
.
pool2d_avg_channels
=
num_channels
[
-
1
]
*
2
...
@@ -199,6 +209,10 @@ class ResNeXt(nn.Layer):
...
@@ -199,6 +209,10 @@ class ResNeXt(nn.Layer):
bias_attr
=
ParamAttr
(
name
=
"fc_offset"
))
bias_attr
=
ParamAttr
(
name
=
"fc_offset"
))
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
with
paddle
.
static
.
amp
.
fp16_guard
():
if
self
.
data_format
==
"NHWC"
:
inputs
=
paddle
.
tensor
.
transpose
(
inputs
,
[
0
,
2
,
3
,
1
])
inputs
.
stop_gradient
=
True
y
=
self
.
conv
(
inputs
)
y
=
self
.
conv
(
inputs
)
y
=
self
.
pool2d_max
(
y
)
y
=
self
.
pool2d_max
(
y
)
for
block
in
self
.
block_list
:
for
block
in
self
.
block_list
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录