未验证 提交 6125a588 编写于 作者: Y Yibing Liu 提交者: GitHub

Fix ema's example & fp16 update (#18273) (#18275)

test=release/1.5
上级 575bc572
...@@ -2458,36 +2458,50 @@ class ExponentialMovingAverage(object): ...@@ -2458,36 +2458,50 @@ class ExponentialMovingAverage(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import numpy
import paddle
data = fluid.layers.data(name='x', shape=[5], dtype='float32') import paddle.fluid as fluid
hidden = fluid.layers.fc(input=data, size=10)
cost = fluid.layers.mean(hidden) data = fluid.layers.data(name='x', shape=[5], dtype='float32')
hidden = fluid.layers.fc(input=data, size=10)
optimizer = fluid.optimizer.Adam(learning_rate=0.001) cost = fluid.layers.mean(hidden)
optimizer.minimize(cost)
test_program = fluid.default_main_program().clone(for_test=True)
global_steps = fluid.layers.learning_rate_scheduler._decay_step_counter()
ema = fluid.optimizer.ExponentialMovingAverage(0.999, thres_steps=global_steps) optimizer = fluid.optimizer.Adam(learning_rate=0.001)
ema.update() optimizer.minimize(cost)
# pseudo code global_steps = fluid.layers.learning_rate_scheduler._decay_step_counter()
for pass_id in range(args.pass_num): ema = fluid.optimizer.ExponentialMovingAverage(0.999, thres_steps=global_steps)
for data in train_reader(): ema.update()
exe.run(fluid.default_main_program()...)
place = fluid.CPUPlace()
# usage 1 exe = fluid.Executor(place)
with ema.apply(exe): exe.run(fluid.default_startup_program())
for data in test_reader():
exe.run(inference_program...) for pass_id in range(3):
for batch_id in range(6):
# usage 2 data = numpy.random.random(size=(10, 5)).astype('float32')
with ema.apply(exe, need_restore=False): exe.run(program=fluid.default_main_program(),
for data in test_reader(): feed={'x': data},
exe.run(inference_program...) fetch_list=[cost.name])
...
ema.restore(exe) # usage 1
with ema.apply(exe):
data = numpy.random.random(size=(10, 5)).astype('float32')
exe.run(program=test_program,
feed={'x': data},
fetch_list=[hidden.name])
# usage 2
with ema.apply(exe, need_restore=False):
data = numpy.random.random(size=(10, 5)).astype('float32')
exe.run(program=test_program,
feed={'x': data},
fetch_list=[hidden.name])
ema.restore(exe)
""" """
def __init__(self, decay=0.999, thres_steps=None, name=None): def __init__(self, decay=0.999, thres_steps=None, name=None):
...@@ -2576,13 +2590,29 @@ class ExponentialMovingAverage(object): ...@@ -2576,13 +2590,29 @@ class ExponentialMovingAverage(object):
Update Exponential Moving Average. Should only call this method in Update Exponential Moving Average. Should only call this method in
train program. train program.
""" """
param_master_emas = []
for param, tmp in self._params_tmps: for param, tmp in self._params_tmps:
with param.block.program._optimized_guard( with param.block.program._optimized_guard(
[param, tmp]), name_scope('moving_average'): [param, tmp]), name_scope('moving_average'):
param_ema = self._ema_vars[param.name] param_ema = self._ema_vars[param.name]
ema_t = param_ema * self._decay_var + param * (1 - if self._ema_vars.has_key(param.name + '.master'):
self._decay_var) master_ema = self._ema_vars[param.name + '.master']
layers.assign(input=ema_t, output=param_ema) param_master_emas.append([param_ema, master_ema])
else:
ema_t = param_ema * self._decay_var + param * (
1 - self._decay_var)
layers.assign(input=ema_t, output=param_ema)
# for fp16 params
for param_ema, master_ema in param_master_emas:
default_main_program().global_block().append_op(
type="cast",
inputs={"X": master_ema},
outputs={"Out": param_ema},
attrs={
"in_dtype": master_ema.dtype,
"out_dtype": param_ema.dtype
})
@signature_safe_contextmanager @signature_safe_contextmanager
def apply(self, executor, need_restore=True): def apply(self, executor, need_restore=True):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册