未验证 提交 6125a588 编写于 作者: Y Yibing Liu 提交者: GitHub

Fix ema's example & fp16 update (#18273) (#18275)

test=release/1.5
上级 575bc572
...@@ -2459,12 +2459,16 @@ class ExponentialMovingAverage(object): ...@@ -2459,12 +2459,16 @@ class ExponentialMovingAverage(object):
.. code-block:: python .. code-block:: python
import numpy
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
data = fluid.layers.data(name='x', shape=[5], dtype='float32') data = fluid.layers.data(name='x', shape=[5], dtype='float32')
hidden = fluid.layers.fc(input=data, size=10) hidden = fluid.layers.fc(input=data, size=10)
cost = fluid.layers.mean(hidden) cost = fluid.layers.mean(hidden)
test_program = fluid.default_main_program().clone(for_test=True)
optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer = fluid.optimizer.Adam(learning_rate=0.001)
optimizer.minimize(cost) optimizer.minimize(cost)
...@@ -2472,21 +2476,31 @@ class ExponentialMovingAverage(object): ...@@ -2472,21 +2476,31 @@ class ExponentialMovingAverage(object):
ema = fluid.optimizer.ExponentialMovingAverage(0.999, thres_steps=global_steps) ema = fluid.optimizer.ExponentialMovingAverage(0.999, thres_steps=global_steps)
ema.update() ema.update()
# pseudo code place = fluid.CPUPlace()
for pass_id in range(args.pass_num): exe = fluid.Executor(place)
for data in train_reader(): exe.run(fluid.default_startup_program())
exe.run(fluid.default_main_program()...)
for pass_id in range(3):
for batch_id in range(6):
data = numpy.random.random(size=(10, 5)).astype('float32')
exe.run(program=fluid.default_main_program(),
feed={'x': data},
fetch_list=[cost.name])
# usage 1 # usage 1
with ema.apply(exe): with ema.apply(exe):
for data in test_reader(): data = numpy.random.random(size=(10, 5)).astype('float32')
exe.run(inference_program...) exe.run(program=test_program,
feed={'x': data},
fetch_list=[hidden.name])
# usage 2 # usage 2
with ema.apply(exe, need_restore=False): with ema.apply(exe, need_restore=False):
for data in test_reader(): data = numpy.random.random(size=(10, 5)).astype('float32')
exe.run(inference_program...) exe.run(program=test_program,
... feed={'x': data},
fetch_list=[hidden.name])
ema.restore(exe) ema.restore(exe)
""" """
...@@ -2576,14 +2590,30 @@ class ExponentialMovingAverage(object): ...@@ -2576,14 +2590,30 @@ class ExponentialMovingAverage(object):
Update Exponential Moving Average. Should only call this method in Update Exponential Moving Average. Should only call this method in
train program. train program.
""" """
param_master_emas = []
for param, tmp in self._params_tmps: for param, tmp in self._params_tmps:
with param.block.program._optimized_guard( with param.block.program._optimized_guard(
[param, tmp]), name_scope('moving_average'): [param, tmp]), name_scope('moving_average'):
param_ema = self._ema_vars[param.name] param_ema = self._ema_vars[param.name]
ema_t = param_ema * self._decay_var + param * (1 - if self._ema_vars.has_key(param.name + '.master'):
self._decay_var) master_ema = self._ema_vars[param.name + '.master']
param_master_emas.append([param_ema, master_ema])
else:
ema_t = param_ema * self._decay_var + param * (
1 - self._decay_var)
layers.assign(input=ema_t, output=param_ema) layers.assign(input=ema_t, output=param_ema)
# for fp16 params
for param_ema, master_ema in param_master_emas:
default_main_program().global_block().append_op(
type="cast",
inputs={"X": master_ema},
outputs={"Out": param_ema},
attrs={
"in_dtype": master_ema.dtype,
"out_dtype": param_ema.dtype
})
@signature_safe_contextmanager @signature_safe_contextmanager
def apply(self, executor, need_restore=True): def apply(self, executor, need_restore=True):
""" """
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册