未验证 提交 b480de5d 编写于 作者: Z Zhang Ting 提交者: GitHub

Revert "add fuse_bn_add_act_ops args" (#4914)

* Revert "add fuse_bn_add_act_ops args (#4864)"
上级 60d045d3
...@@ -39,14 +39,9 @@ def _basic_model(data, model, args, is_train): ...@@ -39,14 +39,9 @@ def _basic_model(data, model, args, is_train):
image_in = fluid.layers.transpose( image_in = fluid.layers.transpose(
image, [0, 2, 3, 1]) if args.data_format == 'NHWC' else image image, [0, 2, 3, 1]) if args.data_format == 'NHWC' else image
image_in.stop_gradient = image.stop_gradient image_in.stop_gradient = image.stop_gradient
# fuse_bn_add_act only supports amp training
fuse_bn_add_act=False
if is_train and args.fuse_bn_add_act_ops:
fuse_bn_add_act=True
net_out = model.net(input=image_in, net_out = model.net(input=image_in,
class_dim=args.class_dim, class_dim=args.class_dim,
data_format=args.data_format, data_format=args.data_format)
fuse_bn_add_act=fuse_bn_add_act)
else: else:
net_out = model.net(input=image, class_dim=args.class_dim) net_out = model.net(input=image, class_dim=args.class_dim)
softmax_out = fluid.layers.softmax(net_out, use_cudnn=False) softmax_out = fluid.layers.softmax(net_out, use_cudnn=False)
......
...@@ -31,7 +31,7 @@ class ResNet(): ...@@ -31,7 +31,7 @@ class ResNet():
def __init__(self, layers=50): def __init__(self, layers=50):
self.layers = layers self.layers = layers
def net(self, input, class_dim=1000, data_format="NCHW", fuse_bn_add_act=False): def net(self, input, class_dim=1000, data_format="NCHW"):
layers = self.layers layers = self.layers
supported_layers = [18, 34, 50, 101, 152] supported_layers = [18, 34, 50, 101, 152]
assert layers in supported_layers, \ assert layers in supported_layers, \
...@@ -77,8 +77,7 @@ class ResNet(): ...@@ -77,8 +77,7 @@ class ResNet():
num_filters=num_filters[block], num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
name=conv_name, name=conv_name,
data_format=data_format, data_format=data_format)
fuse_bn_add_act=fuse_bn_add_act)
pool = fluid.layers.pool2d( pool = fluid.layers.pool2d(
input=conv, pool_type='avg', global_pooling=True, data_format=data_format) input=conv, pool_type='avg', global_pooling=True, data_format=data_format)
...@@ -98,8 +97,7 @@ class ResNet(): ...@@ -98,8 +97,7 @@ class ResNet():
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
is_first=block == i == 0, is_first=block == i == 0,
name=conv_name, name=conv_name,
data_format=data_format, data_format=data_format)
fuse_bn_add_act=fuse_bn_add_act)
pool = fluid.layers.pool2d( pool = fluid.layers.pool2d(
input=conv, pool_type='avg', global_pooling=True, data_format=data_format) input=conv, pool_type='avg', global_pooling=True, data_format=data_format)
...@@ -157,7 +155,7 @@ class ResNet(): ...@@ -157,7 +155,7 @@ class ResNet():
else: else:
return input return input
def bottleneck_block(self, input, num_filters, stride, name, data_format, fuse_bn_add_act): def bottleneck_block(self, input, num_filters, stride, name, data_format):
conv0 = self.conv_bn_layer( conv0 = self.conv_bn_layer(
input=input, input=input,
num_filters=num_filters, num_filters=num_filters,
...@@ -173,56 +171,26 @@ class ResNet(): ...@@ -173,56 +171,26 @@ class ResNet():
act='relu', act='relu',
name=name + "_branch2b", name=name + "_branch2b",
data_format=data_format) data_format=data_format)
if not fuse_bn_add_act: conv2 = self.conv_bn_layer(
conv2 = self.conv_bn_layer( input=conv1,
input=conv1, num_filters=num_filters * 4,
num_filters=num_filters * 4, filter_size=1,
filter_size=1, act=None,
act=None, name=name + "_branch2c",
name=name + "_branch2c", data_format=data_format)
data_format=data_format)
short = self.shortcut(
input,
num_filters * 4,
stride,
is_first=False,
name=name + "_branch1",
data_format=data_format)
return fluid.layers.elementwise_add( short = self.shortcut(
x=short, y=conv2, act='relu', name=name + ".add.output.5") input,
else: num_filters * 4,
conv2 = fluid.layers.conv2d( stride,
input=conv1, is_first=False,
num_filters=num_filters * 4, name=name + "_branch1",
filter_size=1, data_format=data_format)
act=None,
param_attr=ParamAttr(name=name + "_branch2c" + "_weights"),
bias_attr=False,
name=name + '_branch2c' + '.conv2d.output.1',
data_format=data_format)
short = self.shortcut(
input,
num_filters * 4,
stride,
is_first=False,
name=name + "_branch1",
data_format=data_format)
name = name + "_branch2c"
bn_name = "bn" + name[3:]
short = fluid.contrib.layers.fused_bn_add_act(
conv2,
short,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance',
name=name + ".add.output.5")
return short return fluid.layers.elementwise_add(
x=short, y=conv2, act='relu', name=name + ".add.output.5")
def basic_block(self, input, num_filters, stride, is_first, name, def basic_block(self, input, num_filters, stride, is_first, name, data_format):
data_format, fuse_bn_add_act):
conv0 = self.conv_bn_layer( conv0 = self.conv_bn_layer(
input=input, input=input,
num_filters=num_filters, num_filters=num_filters,
...@@ -231,54 +199,16 @@ class ResNet(): ...@@ -231,54 +199,16 @@ class ResNet():
stride=stride, stride=stride,
name=name + "_branch2a", name=name + "_branch2a",
data_format=data_format) data_format=data_format)
if not fuse_bn_add_act: conv1 = self.conv_bn_layer(
conv1 = self.conv_bn_layer( input=conv0,
input=conv0, num_filters=num_filters,
num_filters=num_filters, filter_size=3,
filter_size=3, act=None,
act=None, name=name + "_branch2b",
name=name + "_branch2b", data_format=data_format)
data_format=data_format) short = self.shortcut(
short = self.shortcut( input, num_filters, stride, is_first, name=name + "_branch1", data_format=data_format)
input, return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
num_filters,
stride,
is_first,
name=name + "_branch1",
data_format=data_format)
return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
else:
conv1 = fluid.layers.conv2d(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=1,
padding=1,
groups=1,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False,
name=name + '_branch2b' + '.conv2d.output.1',
data_format=data_format)
short = self.shortcut(
input,
num_filters,
stride,
is_first,
name=name + "_branch1",
data_format=data_format)
name = name + "_branch2b"
bn_name = "bn" + name[3:]
short = fluid.contrib.layers.fused_bn_add_act(
conv1,
short,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
return short
def ResNet18(): def ResNet18():
......
...@@ -147,7 +147,7 @@ def parse_args(): ...@@ -147,7 +147,7 @@ def parse_args():
add_arg('fuse_bn_act_ops', bool, False, "Whether to use batch_norm and act fusion.") add_arg('fuse_bn_act_ops', bool, False, "Whether to use batch_norm and act fusion.")
add_arg('fuse_bn_add_act_ops', bool, False, "Whether to use batch_norm, elementwise_add and act fusion. This is only used for AMP training.") add_arg('fuse_bn_add_act_ops', bool, False, "Whether to use batch_norm, elementwise_add and act fusion. This is only used for AMP training.")
add_arg('enable_addto', bool, False, "Whether to enable the addto strategy for gradient accumulation or not. This is only used for AMP training.") add_arg('enable_addto', bool, False, "Whether to enable the addto strategy for gradient accumulation or not. This is only used for AMP training.")
add_arg('use_label_smoothing', bool, False, "Whether to use label_smoothing") add_arg('use_label_smoothing', bool, False, "Whether to use label_smoothing")
add_arg('label_smoothing_epsilon', float, 0.1, "The value of label_smoothing_epsilon parameter") add_arg('label_smoothing_epsilon', float, 0.1, "The value of label_smoothing_epsilon parameter")
#NOTE: (2019/08/08) temporary disable use_distill #NOTE: (2019/08/08) temporary disable use_distill
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册