diff --git a/tools/ema.py b/tools/ema.py new file mode 100644 index 0000000000000000000000000000000000000000..bbd5af2ed4af566406a562a0759a2d845f86f6b8 --- /dev/null +++ b/tools/ema.py @@ -0,0 +1,165 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.fluid.wrapped_decorator import signature_safe_contextmanager +from paddle.fluid.framework import Program, program_guard, name_scope, default_main_program +from paddle.fluid import unique_name, layers + + +class ExponentialMovingAverage(object): + def __init__(self, + decay=0.999, + thres_steps=None, + zero_debias=False, + name=None): + self._decay = decay + self._thres_steps = thres_steps + self._name = name if name is not None else '' + self._decay_var = self._get_ema_decay() + + self._params_tmps = [] + for param in default_main_program().global_block().all_parameters(): + if param.do_model_average != False: + tmp = param.block.create_var( + name=unique_name.generate(".".join( + [self._name + param.name, 'ema_tmp'])), + dtype=param.dtype, + persistable=False, + stop_gradient=True) + self._params_tmps.append((param, tmp)) + + self._ema_vars = {} + for param, tmp in self._params_tmps: + with param.block.program._optimized_guard( + [param, tmp]), name_scope('moving_average'): + self._ema_vars[param.name] = self._create_ema_vars(param) + + self.apply_program = Program() + block = self.apply_program.global_block() + with program_guard(main_program=self.apply_program): + decay_pow = self._get_decay_pow(block) + for param, tmp in self._params_tmps: + param = block._clone_variable(param) + tmp = block._clone_variable(tmp) + ema = block._clone_variable(self._ema_vars[param.name]) + layers.assign(input=param, output=tmp) + # bias correction + if zero_debias: + ema = ema / (1.0 - decay_pow) + layers.assign(input=ema, output=param) + + self.restore_program = Program() + block = self.restore_program.global_block() + with program_guard(main_program=self.restore_program): + for param, tmp in self._params_tmps: + tmp = block._clone_variable(tmp) + param = block._clone_variable(param) + layers.assign(input=tmp, output=param) + + def _get_ema_decay(self): + with default_main_program()._lr_schedule_guard(): + decay_var = layers.tensor.create_global_var( + shape=[1], + value=self._decay, + dtype='float32', + persistable=True, + name="scheduled_ema_decay_rate") + + if self._thres_steps is not None: + decay_t = (self._thres_steps + 1.0) / (self._thres_steps + 10.0) + with layers.control_flow.Switch() as switch: + with switch.case(decay_t < self._decay): + layers.tensor.assign(decay_t, decay_var) + with switch.default(): + layers.tensor.assign( + np.array( + [self._decay], dtype=np.float32), + decay_var) + return decay_var + + def _get_decay_pow(self, block): + global_steps = layers.learning_rate_scheduler._decay_step_counter() + decay_var = block._clone_variable(self._decay_var) + decay_pow_acc = layers.elementwise_pow(decay_var, global_steps + 1) + return decay_pow_acc + + def _create_ema_vars(self, param): + param_ema = layers.create_global_var( + name=unique_name.generate(self._name + param.name + '_ema'), + shape=param.shape, + value=0.0, + dtype=param.dtype, + persistable=True) + + return param_ema + + def update(self): + """ + Update Exponential Moving Average. Should only call this method in + train program. + """ + param_master_emas = [] + for param, tmp in self._params_tmps: + with param.block.program._optimized_guard( + [param, tmp]), name_scope('moving_average'): + param_ema = self._ema_vars[param.name] + if param.name + '.master' in self._ema_vars: + master_ema = self._ema_vars[param.name + '.master'] + param_master_emas.append([param_ema, master_ema]) + else: + ema_t = param_ema * self._decay_var + param * ( + 1 - self._decay_var) + layers.assign(input=ema_t, output=param_ema) + + # for fp16 params + for param_ema, master_ema in param_master_emas: + default_main_program().global_block().append_op( + type="cast", + inputs={"X": master_ema}, + outputs={"Out": param_ema}, + attrs={ + "in_dtype": master_ema.dtype, + "out_dtype": param_ema.dtype + }) + + @signature_safe_contextmanager + def apply(self, executor, need_restore=True): + """ + Apply moving average to parameters for evaluation. + Args: + executor (Executor): The Executor to execute applying. + need_restore (bool): Whether to restore parameters after applying. + """ + executor.run(self.apply_program) + try: + yield + finally: + if need_restore: + self.restore(executor) + + def restore(self, executor): + """Restore parameters. + Args: + executor (Executor): The Executor to execute restoring. + """ + executor.run(self.restore_program) diff --git a/tools/ema_clean.py b/tools/ema_clean.py new file mode 100644 index 0000000000000000000000000000000000000000..e8ddc2e14566b9037908a78497f71f98c8606787 --- /dev/null +++ b/tools/ema_clean.py @@ -0,0 +1,42 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import argparse +import functools +import shutil +import sys + +def main(): + cleaned_model_dir = sys.argv[1] + ema_model_dir = sys.argv[2] + if not os.path.exists(cleaned_model_dir): + os.makedirs(cleaned_model_dir) + + items = os.listdir(ema_model_dir) + for item in items: + if item.find('ema') > -1: + item_clean = item.replace('_ema_0', '') + shutil.copyfile(os.path.join(ema_model_dir, item), + os.path.join(cleaned_model_dir, item_clean)) + elif item.find('mean') > -1 or item.find('variance') > -1: + shutil.copyfile(os.path.join(ema_model_dir, item), + os.path.join(cleaned_model_dir, item)) + +if __name__ == '__main__': + main() diff --git a/tools/program.py b/tools/program.py index 50c609de17186b234f8ce3632a94cf7f59454dcd..f36c8c5c705d9dabb619947d2eb79b3d6f96188e 100644 --- a/tools/program.py +++ b/tools/program.py @@ -86,7 +86,7 @@ def create_dataloader(feeds): return dataloader -def create_model(architecture, image, classes_num): +def create_model(architecture, image, classes_num, is_train): """ Create a model @@ -101,6 +101,8 @@ def create_model(architecture, image, classes_num): """ name = architecture["name"] params = architecture.get("params", {}) + params['is_test'] = not is_train + print(params) model = architectures.__dict__[name](**params) out = model.net(input=image, class_dim=classes_num) return out @@ -323,7 +325,7 @@ def build(config, main_prog, startup_prog, is_train=True): feeds = create_feeds(config.image_shape, use_mix=use_mix) dataloader = create_dataloader(feeds.values()) out = create_model(config.ARCHITECTURE, feeds['image'], - config.classes_num) + config.classes_num, is_train) fetchs = create_fetchs( out, feeds, @@ -339,6 +341,12 @@ def build(config, main_prog, startup_prog, is_train=True): fetchs['lr'] = (lr, AverageMeter('lr', 'f', need_avg=False)) optimizer = dist_optimizer(config, optimizer) optimizer.minimize(fetchs['loss'][0]) + if config.get('use_ema'): + + global_steps = fluid.layers.learning_rate_scheduler._decay_step_counter() + ema = ExponentialMovingAverage(config.get('ema_decay'), thres_steps=global_steps) + ema.update() + fetchs['ema'] = ema return dataloader, fetchs diff --git a/tools/train.py b/tools/train.py index ab7752fd17c6621c605365d5529bf20d91837c89..b188646a3fce909fa7fc9c1cfe96f36fcfd47050 100644 --- a/tools/train.py +++ b/tools/train.py @@ -98,6 +98,14 @@ def main(args): if int(os.getenv("PADDLE_TRAINER_ID", 0)) == 0: # 2. validate with validate dataset if config.validate and epoch_id % config.valid_interval == 0: + if config.get('use_ema'): + logger.info(logger.coloring("EMA validate start...")) + with train_fetchs('ema').apply(exe): + top1_acc = program.run(valid_dataloader, exe, + compiled_valid_prog, valid_fetchs, + epoch_id, 'valid') + logger.info(logger.coloring("EMA validate over!")) + top1_acc = program.run(valid_dataloader, exe, compiled_valid_prog, valid_fetchs, epoch_id, 'valid')