提交 9708b21f 编写于 作者: W wanghaoshuang

Refine average model option

1. Add attr 'average' into ParamAttr.
2. Make 'params_grads' optional for AverageModel.
3. Add option 'average_mean' and 'average_variance' for batch_normal.
上级 d1a7b47e
......@@ -1137,6 +1137,8 @@ class Parameter(Variable):
self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None)
self.average = kwargs.get('average', True)
def __str__(self):
return self.to_string(True)
......@@ -1157,7 +1159,7 @@ class Parameter(Variable):
if with_details:
res_str = Variable.to_string(self, throw_on_error, True)
additional_attr = ("trainable", "optimize_attr", "regularizer",
"gradient_clip_attr")
"gradient_clip_attr", "average")
for attr_name in additional_attr:
res_str += "%s: %s\n" % (attr_name,
str(getattr(self, attr_name)))
......
......@@ -1486,7 +1486,9 @@ def batch_norm(input,
in_place=False,
name=None,
moving_mean_name=None,
moving_variance_name=None):
moving_variance_name=None,
average_mean=True,
average_variance=True):
"""
This function helps create an operator to implement
the BatchNorm layer using the configurations from the input parameters.
......@@ -1517,7 +1519,10 @@ def batch_norm(input,
mean = helper.create_parameter(
attr=ParamAttr(
name=moving_mean_name, initializer=Constant(0.0), trainable=False),
name=moving_mean_name,
initializer=Constant(0.0),
trainable=False,
average=average_variance),
shape=param_shape,
dtype=input.dtype)
mean.stop_gradient = True
......@@ -1526,7 +1531,8 @@ def batch_norm(input,
attr=ParamAttr(
name=moving_variance_name,
initializer=Constant(1.0),
trainable=False),
trainable=False,
average=average_mean),
shape=param_shape,
dtype=input.dtype)
variance.stop_gradient = True
......
......@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from collections import defaultdict
from paddle.fluid.framework import Program
import framework
......@@ -818,8 +818,8 @@ class ModelAverage(Optimizer):
min_average_window, max_average_window and current update times.
Args:
params_grads: A list of parameter-grad variable pairs.
average_window_rate: The rate of average window.
params_grads: A list of parameter-grad variable pairs.
min_average_window: The minimum size of average window.
max_average_window: The maximum size of average window.
......@@ -840,8 +840,8 @@ class ModelAverage(Optimizer):
"""
def __init__(self,
params_grads,
average_window_rate,
average_window_rate=0.15,
params_grads=None,
min_average_window=10000,
max_average_window=10000,
**kwargs):
......@@ -849,25 +849,21 @@ class ModelAverage(Optimizer):
self.average_window = average_window_rate
self.min_average_window = min_average_window
self.max_average_window = max_average_window
self.params_grads = params_grads
# append 'moving mean' and 'moving variance' to self.params_grads
pattern = re.compile(r"batch_norm_\d+\.w_[1,2]")
self.params_grads = [] if params_grads is None else params_grads
params = {}
for param, grad in self.params_grads:
params[param.name] = (param, grad)
for param in framework.default_main_program().global_block(
).all_parameters():
if pattern.match(param.name) is not None:
self.params_grads.append((param, None))
# create a tmp gradient variable to backup parameter value
# for parameter whose grad is None
for i, param_grad in enumerate(self.params_grads):
param, grad = param_grad
if grad is None:
if param.name not in params and param.average:
grad = param.block.create_var(
name=unique_name.generate(".".join([param.name, 'tmp'])),
dtype=param.dtype,
persistable=False,
stop_gradient=stop_gradient)
self.params_grads[i] = (param, grad)
stop_gradient=True)
params[param.name] = (param, grad)
self.params_grads = params.values()
for param, grad in self.params_grads:
self._append_average_accumulate_op(param)
......
......@@ -28,13 +28,15 @@ class ParamAttr(object):
learning_rate=1.0,
regularizer=None,
trainable=True,
gradient_clip=None):
gradient_clip=None,
average=True):
self.name = name
self.initializer = initializer
self.learning_rate = learning_rate
self.regularizer = regularizer
self.trainable = trainable
self.gradient_clip = gradient_clip
self.average = average
def set_default_initializer(self, initializer):
if initializer is None:
......@@ -80,7 +82,8 @@ class ParamAttr(object):
},
'regularizer': self.regularizer,
'trainable': self.trainable,
'gradient_clip_attr': self.gradient_clip
'gradient_clip_attr': self.gradient_clip,
'average': self.average
}
if with_initializer:
kwargs['initializer'] = self.initializer
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册