From a7a825b4cdf1eaca84cedaec0af58c7f6397f7e2 Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Wed, 23 Oct 2019 21:46:30 +0800 Subject: [PATCH] Update parameter loadding (#3735) --- README.md | 2 +- docs/GETTING_STARTED_cn.md | 4 ++++ ppdet/utils/checkpoint.py | 10 +++++++--- tools/train.py | 8 ++++---- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 654363534..f5a2ce931 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ Advanced Features: ## Get Started - [Installation guide](docs/INSTALL.md) -- [Quick Start on small dataset](docs/QUICK_STARTED.md) +- [Quick start on small dataset](docs/QUICK_STARTED.md) - [Guide to traing, evaluate and arguments description](docs/GETTING_STARTED.md) - [Guide to preprocess pipeline and custom dataset](docs/DATA.md) - [Introduction to the configuration workflow](docs/CONFIG.md) diff --git a/docs/GETTING_STARTED_cn.md b/docs/GETTING_STARTED_cn.md index 9180126f3..a32bfa7a8 100644 --- a/docs/GETTING_STARTED_cn.md +++ b/docs/GETTING_STARTED_cn.md @@ -180,3 +180,7 @@ batch size可以达到每GPU 4 (Tesla V100 16GB)。 **Q:** 如何修改数据预处理?
**A:** 可在配置文件中设置 `sample_transform`。注意需要在配置文件中加入**完整预处理** 例如RCNN模型中`DecodeImage`, `NormalizeImage` and `Permute`。更多详细描述请参考[配置案例](config_example)。 + + +**Q:** affine_channel和batch norm是什么关系? +**A:** 在RCNN系列模型加载预训练模型初始化,有时候会固定住batch norm的参数, 使用预训练模型中的全局均值和方式,并且batch norm的scale和bias参数不更新,已发布的大多ResNet系列的RCNN模型采用这种方式。这种情况下可以在config中设置norm_type为bn或affine_channel, freeze_norm为true (默认为true),两种方式等价。affne_channel的计算方式为`scale * x + bias`。只不过设置affine_channel时,内部对batch norm的参数自动做了融合。如果训练使用的affine_channel,用保存的模型做初始化,训练其他任务时,即可使用affine_channel, 也可使用batch norm, 参数均可正确加载。 diff --git a/ppdet/utils/checkpoint.py b/ppdet/utils/checkpoint.py index f5d7c5f56..54c364812 100644 --- a/ppdet/utils/checkpoint.py +++ b/ppdet/utils/checkpoint.py @@ -177,7 +177,8 @@ def load_and_fusebn(exe, prog, path): prog (fluid.Program): save weight from which Program object. path (string): the path to save model. """ - logger.info('Load model and fuse batch norm from {}...'.format(path)) + logger.info('Load model and fuse batch norm if have from {}...'.format( + path)) if is_url(path): path = _get_weight_path(path) @@ -253,8 +254,11 @@ def load_and_fusebn(exe, prog, path): [scale_name, bias_name, mean_name, variance_name]) if not bn_in_path: - raise ValueError("There is no params of batch norm in model {}.".format( - path)) + fluid.io.load_vars(exe, path, prog, vars=all_vars) + logger.warning( + "There is no paramters of batch norm in model {}. " + "Skip to fuse batch norm. And load paramters done.".format(path)) + return # load running mean and running variance on cpu place into global scope. place = fluid.CPUPlace() diff --git a/tools/train.py b/tools/train.py index 4f581d912..08e1fc634 100644 --- a/tools/train.py +++ b/tools/train.py @@ -77,9 +77,6 @@ def main(): if 'log_iter' not in cfg: cfg.log_iter = 20 - ignore_params = cfg.finetune_exclude_pretrained_params \ - if 'finetune_exclude_pretrained_params' in cfg else [] - # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) if not FLAGS.dist or trainer_id == 0: @@ -193,8 +190,11 @@ def main(): compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog) fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel' - start_iter = 0 + ignore_params = cfg.finetune_exclude_pretrained_params \ + if 'finetune_exclude_pretrained_params' in cfg else [] + + start_iter = 0 if FLAGS.resume_checkpoint: checkpoint.load_checkpoint(exe, train_prog, FLAGS.resume_checkpoint) start_iter = checkpoint.global_step() -- GitLab