From faa8e30a14accb9431faa77e522f48a0a04a6b6b Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Sat, 12 Oct 2019 08:36:19 +0800 Subject: [PATCH] Set batch norm and data norm argument 'do_model_average_for_mean_and_var' default as True (#20421) * fix_norm_model_average_bug * test=develop * refine comment test=develop * refine comment test=develop --- paddle/fluid/API.spec | 8 ++++---- python/paddle/fluid/dygraph/nn.py | 5 +++-- python/paddle/fluid/layers/nn.py | 10 ++++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index a2dc5d2c157..049b9ae2897 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -146,9 +146,9 @@ paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'po paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive', 'data_format'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True, 'NCDHW')), ('document', 'df8edcb8dd020fdddf778c9f613dc650')) paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', 'd873fdd73bcd74f9203d347cfb90de75')) paddle.fluid.layers.adaptive_pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', 'a94ed07bf4828e318aaaedb8b037579a')) -paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', '1400433bae7876d0407ae205be39b7a1')) +paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, True, False, False)), ('document', 'dc68d19bdd820eb4f3f00fa460f53203')) paddle.fluid.layers.instance_norm (ArgSpec(args=['input', 'epsilon', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None)), ('document', '23d6fba8ad8495f67a66d8878be5b0be')) -paddle.fluid.layers.data_norm (ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, None, None, None, False)), ('document', '5ba4cdb4ea5c03382da545335ffc05b7')) +paddle.fluid.layers.data_norm (ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, None, None, None, True)), ('document', '90e45b0336758c26a2031b4e275d650e')) paddle.fluid.layers.beam_search_decode (ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'eafa177a7fed6178a51c1affa7f46a40')) paddle.fluid.layers.conv2d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name', 'data_format'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None, 'NCHW')), ('document', 'ed24c2d0f82cd9a3b40488157285a584')) paddle.fluid.layers.conv3d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name', 'data_format'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None, 'NCDHW')), ('document', 'efb1e3bc87339cb26faa2edae210e8b0')) @@ -655,8 +655,8 @@ paddle.fluid.dygraph.FC.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_su paddle.fluid.dygraph.FC.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.FC.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.FC.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.dygraph.BatchNorm ('paddle.fluid.dygraph.nn.BatchNorm', ('document', 'f26599d75e3eba36c5dd3224a33009d8')) -paddle.fluid.dygraph.BatchNorm.__init__ (ArgSpec(args=['self', 'name_scope', 'num_channels', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'dtype', 'data_layout', 'in_place', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats', 'trainable_statistics'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'float32', 'NCHW', False, None, None, False, False, False, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.BatchNorm ('paddle.fluid.dygraph.nn.BatchNorm', ('document', 'fb93cdf32f21a1c26e784d61a80051a7')) +paddle.fluid.dygraph.BatchNorm.__init__ (ArgSpec(args=['self', 'name_scope', 'num_channels', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'dtype', 'data_layout', 'in_place', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats', 'trainable_statistics'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'float32', 'NCHW', False, None, None, True, False, False, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.BatchNorm.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1')) paddle.fluid.dygraph.BatchNorm.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995')) paddle.fluid.dygraph.BatchNorm.backward (ArgSpec(args=['self'], varargs='inputs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index eb76463292e..f65515aa35d 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -1198,7 +1198,8 @@ class BatchNorm(layers.Layer): in_place(bool, optional): Make the input and output of batch norm reuse memory. Default: False. moving_mean_name(str, optional): The name of moving_mean which store the global Mean. Default: None. moving_variance_name(str, optional): The name of the moving_variance which store the global Variance. Default: None. - do_model_average_for_mean_and_var(bool, optional): Do model average for mean and variance or not. Default: False. + do_model_average_for_mean_and_var(bool, optional): Whether parameter mean and variance should do model + average when model average is enabled. Default: True. fuse_with_relu (bool, optional): When setting fuse_with_relu True, this OP performs relu after batch norm. Default: False. use_global_stats(bool, optional): Whether to use global mean and @@ -1241,7 +1242,7 @@ class BatchNorm(layers.Layer): in_place=False, moving_mean_name=None, moving_variance_name=None, - do_model_average_for_mean_and_var=False, + do_model_average_for_mean_and_var=True, fuse_with_relu=False, use_global_stats=False, trainable_statistics=False): diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 001a186d7a7..f0c955bb562 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -4080,7 +4080,7 @@ def batch_norm(input, name=None, moving_mean_name=None, moving_variance_name=None, - do_model_average_for_mean_and_var=False, + do_model_average_for_mean_and_var=True, fuse_with_relu=False, use_global_stats=False): """ @@ -4164,7 +4164,8 @@ def batch_norm(input, moving_variance_name(str, Default None): The name of the moving_variance which store the global Variance. If it is set to None, batch_norm will save global variance with a random name, otherwise, batch_norm will save global variance with the string. - do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not. + do_model_average_for_mean_and_var(bool, Default True): Whether parameter mean and variance should do model + average when model average is enabled. fuse_with_relu (bool): if True, this OP performs relu after batch norm. use_global_stats(bool, Default False): Whether to use global mean and variance. In inference or test mode, set use_global_stats to true @@ -4411,7 +4412,7 @@ def data_norm(input, name=None, moving_mean_name=None, moving_variance_name=None, - do_model_average_for_mean_and_var=False): + do_model_average_for_mean_and_var=True): """ **Data Normalization Layer** @@ -4445,7 +4446,8 @@ def data_norm(input, will be named automatically. moving_mean_name(string, Default None): The name of moving_mean which store the global Mean. moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance. - do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not. + do_model_average_for_mean_and_var(bool, Default True): Whether parameter mean and variance + should do model average when model average is enabled. Returns: Variable: A tensor variable which is the result after applying data normalization on the input. -- GitLab