提交 2e40660e 编写于 作者: W wanghaoshuang

Fix some issues.

上级 19db989e
...@@ -1155,7 +1155,7 @@ class Parameter(Variable): ...@@ -1155,7 +1155,7 @@ class Parameter(Variable):
self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None) self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None)
self.average = kwargs.get('average', True) self.do_model_average = kwargs.get('do_model_average', None)
def __str__(self): def __str__(self):
return self.to_string(True) return self.to_string(True)
...@@ -1177,7 +1177,7 @@ class Parameter(Variable): ...@@ -1177,7 +1177,7 @@ class Parameter(Variable):
if with_details: if with_details:
res_str = Variable.to_string(self, throw_on_error, True) res_str = Variable.to_string(self, throw_on_error, True)
additional_attr = ("trainable", "optimize_attr", "regularizer", additional_attr = ("trainable", "optimize_attr", "regularizer",
"gradient_clip_attr", "average") "gradient_clip_attr", "do_model_average")
for attr_name in additional_attr: for attr_name in additional_attr:
res_str += "%s: %s\n" % (attr_name, res_str += "%s: %s\n" % (attr_name,
str(getattr(self, attr_name))) str(getattr(self, attr_name)))
......
...@@ -1489,8 +1489,7 @@ def batch_norm(input, ...@@ -1489,8 +1489,7 @@ def batch_norm(input,
name=None, name=None,
moving_mean_name=None, moving_mean_name=None,
moving_variance_name=None, moving_variance_name=None,
average_mean=True, do_model_average_for_mean_and_var=False):
average_variance=True):
""" """
This function helps create an operator to implement This function helps create an operator to implement
the BatchNorm layer using the configurations from the input parameters. the BatchNorm layer using the configurations from the input parameters.
...@@ -1519,12 +1518,15 @@ def batch_norm(input, ...@@ -1519,12 +1518,15 @@ def batch_norm(input,
bias = helper.create_parameter( bias = helper.create_parameter(
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True) attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True)
if do_model_average_for_mean_and_var:
do_model_average_for_mean_and_var = None
mean = helper.create_parameter( mean = helper.create_parameter(
attr=ParamAttr( attr=ParamAttr(
name=moving_mean_name, name=moving_mean_name,
initializer=Constant(0.0), initializer=Constant(0.0),
trainable=False, trainable=False,
average=average_variance), do_model_average=do_model_average_for_mean_and_var),
shape=param_shape, shape=param_shape,
dtype=input.dtype) dtype=input.dtype)
mean.stop_gradient = True mean.stop_gradient = True
...@@ -1534,7 +1536,7 @@ def batch_norm(input, ...@@ -1534,7 +1536,7 @@ def batch_norm(input,
name=moving_variance_name, name=moving_variance_name,
initializer=Constant(1.0), initializer=Constant(1.0),
trainable=False, trainable=False,
average=average_mean), do_model_average=do_model_average_for_mean_and_var),
shape=param_shape, shape=param_shape,
dtype=input.dtype) dtype=input.dtype)
variance.stop_gradient = True variance.stop_gradient = True
...@@ -3352,14 +3354,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): ...@@ -3352,14 +3354,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
Here are some examples to explain it. Here are some examples to explain it.
1. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape 1. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape
is [6, 8], the reshape operator will transform x into a 2-D tensor with is [6, 8], the reshape operator will transform x into a 2-D tensor with
shape [6, 8] and leaving x's data unchanged. shape [6, 8] and leaving x's data unchanged.
2. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape 2. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape
specified is [2, 3, -1, 2], the reshape operator will transform x into a specified is [2, 3, -1, 2], the reshape operator will transform x into a
4-D tensor with shape [2, 3, 4, 2] and leaving x's data unchanged. In this 4-D tensor with shape [2, 3, 4, 2] and leaving x's data unchanged. In this
case, one dimension of the target shape is set to -1, the value of this case, one dimension of the target shape is set to -1, the value of this
dimension is inferred from the total element number of x and remaining dimension is inferred from the total element number of x and remaining
dimensions. dimensions.
3. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape 3. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape
...@@ -3593,7 +3595,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None): ...@@ -3593,7 +3595,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None):
def pad(x, paddings, pad_value=0., name=None): def pad(x, paddings, pad_value=0., name=None):
""" """
Pads a tensor with a constant value given by :attr:`pad_value`, and the Pads a tensor with a constant value given by :attr:`pad_value`, and the
padded width is specified by :attr:`paddings`. padded width is specified by :attr:`paddings`.
Specifically, the number of values padded before the contents of :attr:`x` Specifically, the number of values padded before the contents of :attr:`x`
in dimension :attr:`i` is indicated by :attr:`paddings[i]`, and the number in dimension :attr:`i` is indicated by :attr:`paddings[i]`, and the number
...@@ -3621,7 +3623,7 @@ def pad(x, paddings, pad_value=0., name=None): ...@@ -3621,7 +3623,7 @@ def pad(x, paddings, pad_value=0., name=None):
x (Variable): The input tensor variable. x (Variable): The input tensor variable.
paddings (list): A list of integers. Its elements specify the padded paddings (list): A list of integers. Its elements specify the padded
width before and after for each dimension in turn. width before and after for each dimension in turn.
The length of :attr:paddings must be The length of :attr:paddings must be
:math:`rank(x) \\times 2`. :math:`rank(x) \\times 2`.
pad_value (float): The constant value used to pad. pad_value (float): The constant value used to pad.
name(str|None): A name for this layer(optional). If set None, the layer name(str|None): A name for this layer(optional). If set None, the layer
......
...@@ -840,7 +840,7 @@ class ModelAverage(Optimizer): ...@@ -840,7 +840,7 @@ class ModelAverage(Optimizer):
""" """
def __init__(self, def __init__(self,
average_window_rate=0.15, average_window_rate,
params_grads=None, params_grads=None,
min_average_window=10000, min_average_window=10000,
max_average_window=10000, max_average_window=10000,
...@@ -856,7 +856,7 @@ class ModelAverage(Optimizer): ...@@ -856,7 +856,7 @@ class ModelAverage(Optimizer):
params[param.name] = (param, grad) params[param.name] = (param, grad)
for param in framework.default_main_program().global_block( for param in framework.default_main_program().global_block(
).all_parameters(): ).all_parameters():
if param.name not in params and param.average: if param.name not in params and param.do_model_average != False:
grad = param.block.create_var( grad = param.block.create_var(
name=unique_name.generate(".".join([param.name, 'tmp'])), name=unique_name.generate(".".join([param.name, 'tmp'])),
dtype=param.dtype, dtype=param.dtype,
......
...@@ -29,14 +29,14 @@ class ParamAttr(object): ...@@ -29,14 +29,14 @@ class ParamAttr(object):
regularizer=None, regularizer=None,
trainable=True, trainable=True,
gradient_clip=None, gradient_clip=None,
average=True): do_model_average=None):
self.name = name self.name = name
self.initializer = initializer self.initializer = initializer
self.learning_rate = learning_rate self.learning_rate = learning_rate
self.regularizer = regularizer self.regularizer = regularizer
self.trainable = trainable self.trainable = trainable
self.gradient_clip = gradient_clip self.gradient_clip = gradient_clip
self.average = average self.model_average = do_model_average
def set_default_initializer(self, initializer): def set_default_initializer(self, initializer):
if initializer is None: if initializer is None:
...@@ -83,7 +83,7 @@ class ParamAttr(object): ...@@ -83,7 +83,7 @@ class ParamAttr(object):
'regularizer': self.regularizer, 'regularizer': self.regularizer,
'trainable': self.trainable, 'trainable': self.trainable,
'gradient_clip_attr': self.gradient_clip, 'gradient_clip_attr': self.gradient_clip,
'average': self.average 'model_average': self.model_average
} }
if with_initializer: if with_initializer:
kwargs['initializer'] = self.initializer kwargs['initializer'] = self.initializer
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册