未验证 提交 8763d270 编写于 作者: P pangyoki 提交者: GitHub

Add AMP for ResNet200_vd model (#5070)

* add amp for ResNet200_vd

* change use_fp16 to use_amp

* fix script format

* change batch_size
上级 bec2933a
......@@ -15,7 +15,7 @@ import paddle
import paddle.fluid as fluid
import utils.utility as utility
AMP_MODEL_LIST = ["ResNet50", "SE_ResNet50_vd"]
AMP_MODEL_LIST = ["ResNet50", "SE_ResNet50_vd", "ResNet200_vd"]
def _calc_label_smoothing_loss(softmax_out, label, class_dim, epsilon):
......
......@@ -23,7 +23,8 @@ import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
__all__ = [
"ResNet", "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd", "ResNet200_vd"
"ResNet", "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd",
"ResNet152_vd", "ResNet200_vd"
]
......@@ -32,7 +33,7 @@ class ResNet():
self.layers = layers
self.is_3x3 = is_3x3
def net(self, input, class_dim=1000):
def net(self, input, class_dim=1000, data_format="NCHW"):
is_3x3 = self.is_3x3
layers = self.layers
supported_layers = [18, 34, 50, 101, 152, 200]
......@@ -40,7 +41,7 @@ class ResNet():
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
......@@ -56,7 +57,8 @@ class ResNet():
num_filters=64,
filter_size=7,
stride=2,
act='relu')
act='relu',
data_format=data_format)
else:
conv = self.conv_bn_layer(
input=input,
......@@ -64,29 +66,33 @@ class ResNet():
filter_size=3,
stride=2,
act='relu',
name='conv1_1')
name='conv1_1',
data_format=data_format)
conv = self.conv_bn_layer(
input=conv,
num_filters=32,
filter_size=3,
stride=1,
act='relu',
name='conv1_2')
name='conv1_2',
data_format=data_format)
conv = self.conv_bn_layer(
input=conv,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name='conv1_3')
name='conv1_3',
data_format=data_format)
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
pool_type='max',
data_format=data_format)
if layers >= 50:
for block in range(len(depth)):
for i in range(depth[block]):
......@@ -101,22 +107,29 @@ class ResNet():
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
if_first=block==i==0,
name=conv_name)
if_first=block == i == 0,
name=conv_name,
data_format=data_format)
else:
for block in range(len(depth)):
for i in range(depth[block]):
conv_name="res"+str(block+2)+chr(97+i)
conv_name = "res" + str(block + 2) + chr(97 + i)
conv = self.basic_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
if_first=block==i==0,
name=conv_name)
if_first=block == i == 0,
name=conv_name,
data_format=data_format)
pool = fluid.layers.pool2d(
input=conv, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
input=conv,
pool_type='avg',
global_pooling=True,
data_format=data_format)
pool_channel = pool.shape[1] if data_format == "NCHW" else pool.shape[
-1]
stdv = 1.0 / math.sqrt(pool_channel * 1.0)
out = fluid.layers.fc(
input=pool,
......@@ -133,7 +146,8 @@ class ResNet():
stride=1,
groups=1,
act=None,
name=None):
name=None,
data_format="NCHW"):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
......@@ -143,7 +157,8 @@ class ResNet():
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
bias_attr=False,
data_format=data_format)
if name == "conv1":
bn_name = "bn_" + name
else:
......@@ -154,7 +169,8 @@ class ResNet():
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
moving_variance_name=bn_name + '_variance',
data_layout=data_format)
def conv_bn_layer_new(self,
input,
......@@ -163,14 +179,16 @@ class ResNet():
stride=1,
groups=1,
act=None,
name=None):
name=None,
data_format="NCHW"):
pool = fluid.layers.pool2d(
input=input,
pool_size=2,
pool_stride=2,
pool_padding=0,
pool_type='avg',
ceil_mode=True)
ceil_mode=True,
data_format=data_format)
conv = fluid.layers.conv2d(
input=pool,
......@@ -181,7 +199,8 @@ class ResNet():
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
bias_attr=False,
data_format=data_format)
if name == "conv1":
bn_name = "bn_" + name
else:
......@@ -192,81 +211,114 @@ class ResNet():
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
moving_variance_name=bn_name + '_variance',
data_layout=data_format)
def shortcut(self, input, ch_out, stride, name, if_first=False):
ch_in = input.shape[1]
def shortcut(self,
input,
ch_out,
stride,
name,
if_first=False,
data_format="NCHW"):
ch_in = input.shape[1] if data_format == "NCHW" else input.shape[-1]
if ch_in != ch_out or stride != 1:
if if_first:
return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
return self.conv_bn_layer(
input,
ch_out,
1,
stride,
name=name,
data_format=data_format)
else:
return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name)
return self.conv_bn_layer_new(
input,
ch_out,
1,
stride,
name=name,
data_format=data_format)
elif if_first:
return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
return self.conv_bn_layer(
input, ch_out, 1, stride, name=name, data_format=data_format)
else:
return input
def bottleneck_block(self, input, num_filters, stride, name, if_first):
def bottleneck_block(self,
input,
num_filters,
stride,
name,
if_first,
data_format="NCHW"):
conv0 = self.conv_bn_layer(
input=input,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
name=name + "_branch2a",
data_format=data_format)
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
name=name + "_branch2b",
data_format=data_format)
conv2 = self.conv_bn_layer(
input=conv1,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_branch2c")
name=name + "_branch2c",
data_format=data_format)
short = self.shortcut(
input,
num_filters * 4,
stride,
if_first=if_first,
name=name + "_branch1")
name=name + "_branch1",
data_format=data_format)
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
def basic_block(self, input, num_filters, stride, name, if_first):
def basic_block(self, input, num_filters, stride, name, if_first,
data_format):
conv0 = self.conv_bn_layer(
input=input,
num_filters=num_filters,
filter_size=3,
act='relu',
input=input,
num_filters=num_filters,
filter_size=3,
act='relu',
stride=stride,
name=name+"_branch2a")
name=name + "_branch2a",
data_format=data_format)
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
act=None,
name=name+"_branch2b")
input=conv0,
num_filters=num_filters,
filter_size=3,
act=None,
name=name + "_branch2b",
data_format=data_format)
short = self.shortcut(
input,
num_filters,
stride,
if_first=if_first,
name=name + "_branch1")
input,
num_filters,
stride,
if_first=if_first,
name=name + "_branch1",
data_format=data_format)
return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
def ResNet18_vd():
model=ResNet(layers=18, is_3x3=True)
model = ResNet(layers=18, is_3x3=True)
return model
def ResNet34_vd():
model=ResNet(layers=34, is_3x3=True)
model = ResNet(layers=34, is_3x3=True)
return model
......
#!/bin/bash -ex
#Training details
export FLAGS_conv_workspace_size_limit=4000 #MB
export FLAGS_cudnn_exhaustive_search=1
export FLAGS_cudnn_batchnorm_spatial_persistent=1
DATA_DIR="Your image dataset path, e.g. ./data/ILSVRC2012/"
DATA_FORMAT="NHWC"
USE_AMP=true #whether to use amp
USE_DALI=true
USE_ADDTO=true
if ${USE_ADDTO} ;then
export FLAGS_max_inplace_grad_add=8
fi
if ${USE_DALI}; then
export FLAGS_fraction_of_gpu_memory_to_use=0.8
fi
python train.py \
--model=ResNet200_vd \
--data_dir=${DATA_DIR} \
--batch_size=64 \
--num_epochs=200 \
--total_images=1281167 \
--image_shape 4 224 224 \
--class_dim=1000 \
--print_step=10 \
--model_save_dir=output/ \
--lr_strategy=cosine_decay \
--use_amp=${USE_AMP} \
--scale_loss=128.0 \
--use_dynamic_loss_scaling=true \
--data_format=${DATA_FORMAT} \
--fuse_elewise_add_act_ops=true \
--fuse_bn_act_ops=true \
--fuse_bn_add_act_ops=true \
--enable_addto=${USE_ADDTO} \
--validate=true \
--is_profiler=false \
--profiler_path=profile/ \
--reader_thread=10 \
--reader_buf_size=4000 \
--use_dali=${USE_DALI} \
--lr=0.1 \
--l2_decay=1e-4 \
--use_label_smoothing=True \
--label_smoothing_epsilon=0.1
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册