diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 370a477932361e2f111ddb586c294c839aed9ddc..6120d66c124788e197ab4328bdd530ae30fc35f2 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1155,7 +1155,7 @@ class Parameter(Variable): self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None) - self.average = kwargs.get('average', True) + self.do_model_average = kwargs.get('do_model_average', None) def __str__(self): return self.to_string(True) @@ -1177,7 +1177,7 @@ class Parameter(Variable): if with_details: res_str = Variable.to_string(self, throw_on_error, True) additional_attr = ("trainable", "optimize_attr", "regularizer", - "gradient_clip_attr", "average") + "gradient_clip_attr", "do_model_average") for attr_name in additional_attr: res_str += "%s: %s\n" % (attr_name, str(getattr(self, attr_name))) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index e5ae10636dfacab1cb3c67dc3de0618b915cb2b1..37ce73827532d4fbaeffc5dc798b0149de371be1 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1489,8 +1489,7 @@ def batch_norm(input, name=None, moving_mean_name=None, moving_variance_name=None, - average_mean=True, - average_variance=True): + do_model_average_for_mean_and_var=False): """ This function helps create an operator to implement the BatchNorm layer using the configurations from the input parameters. @@ -1519,12 +1518,15 @@ def batch_norm(input, bias = helper.create_parameter( attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True) + if do_model_average_for_mean_and_var: + do_model_average_for_mean_and_var = None + mean = helper.create_parameter( attr=ParamAttr( name=moving_mean_name, initializer=Constant(0.0), trainable=False, - average=average_variance), + do_model_average=do_model_average_for_mean_and_var), shape=param_shape, dtype=input.dtype) mean.stop_gradient = True @@ -1534,7 +1536,7 @@ def batch_norm(input, name=moving_variance_name, initializer=Constant(1.0), trainable=False, - average=average_mean), + do_model_average=do_model_average_for_mean_and_var), shape=param_shape, dtype=input.dtype) variance.stop_gradient = True @@ -3352,14 +3354,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): Here are some examples to explain it. 1. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape - is [6, 8], the reshape operator will transform x into a 2-D tensor with + is [6, 8], the reshape operator will transform x into a 2-D tensor with shape [6, 8] and leaving x's data unchanged. 2. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape specified is [2, 3, -1, 2], the reshape operator will transform x into a 4-D tensor with shape [2, 3, 4, 2] and leaving x's data unchanged. In this - case, one dimension of the target shape is set to -1, the value of this - dimension is inferred from the total element number of x and remaining + case, one dimension of the target shape is set to -1, the value of this + dimension is inferred from the total element number of x and remaining dimensions. 3. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape @@ -3593,7 +3595,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None): def pad(x, paddings, pad_value=0., name=None): """ Pads a tensor with a constant value given by :attr:`pad_value`, and the - padded width is specified by :attr:`paddings`. + padded width is specified by :attr:`paddings`. Specifically, the number of values padded before the contents of :attr:`x` in dimension :attr:`i` is indicated by :attr:`paddings[i]`, and the number @@ -3621,7 +3623,7 @@ def pad(x, paddings, pad_value=0., name=None): x (Variable): The input tensor variable. paddings (list): A list of integers. Its elements specify the padded width before and after for each dimension in turn. - The length of :attr:paddings must be + The length of :attr:paddings must be :math:`rank(x) \\times 2`. pad_value (float): The constant value used to pad. name(str|None): A name for this layer(optional). If set None, the layer diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 560257a35618c4ba37b19d711d5c04f450e5a4fe..1917b7d044c565d351f0a9de72437571064221b2 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -840,7 +840,7 @@ class ModelAverage(Optimizer): """ def __init__(self, - average_window_rate=0.15, + average_window_rate, params_grads=None, min_average_window=10000, max_average_window=10000, @@ -856,7 +856,7 @@ class ModelAverage(Optimizer): params[param.name] = (param, grad) for param in framework.default_main_program().global_block( ).all_parameters(): - if param.name not in params and param.average: + if param.name not in params and param.do_model_average != False: grad = param.block.create_var( name=unique_name.generate(".".join([param.name, 'tmp'])), dtype=param.dtype, diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index 74b968f8ee7ee6869bae99947e2a5bf037a4a2bd..1c6970441bccdc1c1221503256c30c83502bd123 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -29,14 +29,14 @@ class ParamAttr(object): regularizer=None, trainable=True, gradient_clip=None, - average=True): + do_model_average=None): self.name = name self.initializer = initializer self.learning_rate = learning_rate self.regularizer = regularizer self.trainable = trainable self.gradient_clip = gradient_clip - self.average = average + self.model_average = do_model_average def set_default_initializer(self, initializer): if initializer is None: @@ -83,7 +83,7 @@ class ParamAttr(object): 'regularizer': self.regularizer, 'trainable': self.trainable, 'gradient_clip_attr': self.gradient_clip, - 'average': self.average + 'model_average': self.model_average } if with_initializer: kwargs['initializer'] = self.initializer