diff --git a/PaddleCV/image_classification/build_model.py b/PaddleCV/image_classification/build_model.py index 59d4a20c5bd7e14404c74ac50d02235f8806c87b..3f7a3a80753fc3bd4a51b78ff995112d5d45eafe 100644 --- a/PaddleCV/image_classification/build_model.py +++ b/PaddleCV/image_classification/build_model.py @@ -39,14 +39,9 @@ def _basic_model(data, model, args, is_train): image_in = fluid.layers.transpose( image, [0, 2, 3, 1]) if args.data_format == 'NHWC' else image image_in.stop_gradient = image.stop_gradient - # fuse_bn_add_act only supports amp training - fuse_bn_add_act=False - if is_train and args.fuse_bn_add_act_ops: - fuse_bn_add_act=True net_out = model.net(input=image_in, class_dim=args.class_dim, - data_format=args.data_format, - fuse_bn_add_act=fuse_bn_add_act) + data_format=args.data_format) else: net_out = model.net(input=image, class_dim=args.class_dim) softmax_out = fluid.layers.softmax(net_out, use_cudnn=False) diff --git a/PaddleCV/image_classification/models/resnet.py b/PaddleCV/image_classification/models/resnet.py index ff730a48841c5bc9f5adf35d1fd6dabefa6ed71f..fcf453588ff13e8c53d185940cfc2b060ec4e1ac 100644 --- a/PaddleCV/image_classification/models/resnet.py +++ b/PaddleCV/image_classification/models/resnet.py @@ -31,7 +31,7 @@ class ResNet(): def __init__(self, layers=50): self.layers = layers - def net(self, input, class_dim=1000, data_format="NCHW", fuse_bn_add_act=False): + def net(self, input, class_dim=1000, data_format="NCHW"): layers = self.layers supported_layers = [18, 34, 50, 101, 152] assert layers in supported_layers, \ @@ -77,8 +77,7 @@ class ResNet(): num_filters=num_filters[block], stride=2 if i == 0 and block != 0 else 1, name=conv_name, - data_format=data_format, - fuse_bn_add_act=fuse_bn_add_act) + data_format=data_format) pool = fluid.layers.pool2d( input=conv, pool_type='avg', global_pooling=True, data_format=data_format) @@ -98,8 +97,7 @@ class ResNet(): stride=2 if i == 0 and block != 0 else 1, is_first=block == i == 0, name=conv_name, - data_format=data_format, - fuse_bn_add_act=fuse_bn_add_act) + data_format=data_format) pool = fluid.layers.pool2d( input=conv, pool_type='avg', global_pooling=True, data_format=data_format) @@ -157,7 +155,7 @@ class ResNet(): else: return input - def bottleneck_block(self, input, num_filters, stride, name, data_format, fuse_bn_add_act): + def bottleneck_block(self, input, num_filters, stride, name, data_format): conv0 = self.conv_bn_layer( input=input, num_filters=num_filters, @@ -173,56 +171,26 @@ class ResNet(): act='relu', name=name + "_branch2b", data_format=data_format) - if not fuse_bn_add_act: - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c", - data_format=data_format) - short = self.shortcut( - input, - num_filters * 4, - stride, - is_first=False, - name=name + "_branch1", - data_format=data_format) + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c", + data_format=data_format) - return fluid.layers.elementwise_add( - x=short, y=conv2, act='relu', name=name + ".add.output.5") - else: - conv2 = fluid.layers.conv2d( - input=conv1, - num_filters=num_filters * 4, - filter_size=1, - act=None, - param_attr=ParamAttr(name=name + "_branch2c" + "_weights"), - bias_attr=False, - name=name + '_branch2c' + '.conv2d.output.1', - data_format=data_format) - short = self.shortcut( - input, - num_filters * 4, - stride, - is_first=False, - name=name + "_branch1", - data_format=data_format) - name = name + "_branch2c" - bn_name = "bn" + name[3:] - short = fluid.contrib.layers.fused_bn_add_act( - conv2, - short, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - name=name + ".add.output.5") + short = self.shortcut( + input, + num_filters * 4, + stride, + is_first=False, + name=name + "_branch1", + data_format=data_format) - return short + return fluid.layers.elementwise_add( + x=short, y=conv2, act='relu', name=name + ".add.output.5") - def basic_block(self, input, num_filters, stride, is_first, name, - data_format, fuse_bn_add_act): + def basic_block(self, input, num_filters, stride, is_first, name, data_format): conv0 = self.conv_bn_layer( input=input, num_filters=num_filters, @@ -231,54 +199,16 @@ class ResNet(): stride=stride, name=name + "_branch2a", data_format=data_format) - if not fuse_bn_add_act: - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b", - data_format=data_format) - short = self.shortcut( - input, - num_filters, - stride, - is_first, - name=name + "_branch1", - data_format=data_format) - - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - else: - conv1 = fluid.layers.conv2d( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=1, - padding=1, - groups=1, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - name=name + '_branch2b' + '.conv2d.output.1', - data_format=data_format) - short = self.shortcut( - input, - num_filters, - stride, - is_first, - name=name + "_branch1", - data_format=data_format) - name = name + "_branch2b" - bn_name = "bn" + name[3:] - short = fluid.contrib.layers.fused_bn_add_act( - conv1, - short, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - return short + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b", + data_format=data_format) + short = self.shortcut( + input, num_filters, stride, is_first, name=name + "_branch1", data_format=data_format) + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') def ResNet18(): diff --git a/PaddleCV/image_classification/utils/utility.py b/PaddleCV/image_classification/utils/utility.py index c744bc436f6eb6a10a47088f184686f5c65c26d3..bef22147472e74f519b5a9be6d7b310106dcd502 100644 --- a/PaddleCV/image_classification/utils/utility.py +++ b/PaddleCV/image_classification/utils/utility.py @@ -147,7 +147,7 @@ def parse_args(): add_arg('fuse_bn_act_ops', bool, False, "Whether to use batch_norm and act fusion.") add_arg('fuse_bn_add_act_ops', bool, False, "Whether to use batch_norm, elementwise_add and act fusion. This is only used for AMP training.") add_arg('enable_addto', bool, False, "Whether to enable the addto strategy for gradient accumulation or not. This is only used for AMP training.") - + add_arg('use_label_smoothing', bool, False, "Whether to use label_smoothing") add_arg('label_smoothing_epsilon', float, 0.1, "The value of label_smoothing_epsilon parameter") #NOTE: (2019/08/08) temporary disable use_distill