From a7a825b4cdf1eaca84cedaec0af58c7f6397f7e2 Mon Sep 17 00:00:00 2001
From: qingqing01 <dangqingqing@baidu.com>
Date: Wed, 23 Oct 2019 21:46:30 +0800
Subject: [PATCH] Update parameter loadding (#3735)

---
 README.md                  |  2 +-
 docs/GETTING_STARTED_cn.md |  4 ++++
 ppdet/utils/checkpoint.py  | 10 +++++++---
 tools/train.py             |  8 ++++----
 4 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/README.md b/README.md
index 654363534..f5a2ce931 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ Advanced Features:
 ## Get Started
 
 - [Installation guide](docs/INSTALL.md)
-- [Quick Start on small dataset](docs/QUICK_STARTED.md)
+- [Quick start on small dataset](docs/QUICK_STARTED.md)
 - [Guide to traing, evaluate and arguments description](docs/GETTING_STARTED.md)
 - [Guide to preprocess pipeline and custom dataset](docs/DATA.md)
 - [Introduction to the configuration workflow](docs/CONFIG.md)
diff --git a/docs/GETTING_STARTED_cn.md b/docs/GETTING_STARTED_cn.md
index 9180126f3..a32bfa7a8 100644
--- a/docs/GETTING_STARTED_cn.md
+++ b/docs/GETTING_STARTED_cn.md
@@ -180,3 +180,7 @@ batch size可以达到每GPU 4 (Tesla V100 16GB)。
 **Q:**  如何修改数据预处理? </br>
 **A:**  可在配置文件中设置 `sample_transform`。注意需要在配置文件中加入**完整预处理**
 例如RCNN模型中`DecodeImage`, `NormalizeImage` and `Permute`。更多详细描述请参考[配置案例](config_example)。
+
+
+**Q:** affine_channel和batch norm是什么关系?
+**A:** 在RCNN系列模型加载预训练模型初始化，有时候会固定住batch norm的参数, 使用预训练模型中的全局均值和方式，并且batch norm的scale和bias参数不更新，已发布的大多ResNet系列的RCNN模型采用这种方式。这种情况下可以在config中设置norm_type为bn或affine_channel, freeze_norm为true (默认为true)，两种方式等价。affne_channel的计算方式为`scale * x + bias`。只不过设置affine_channel时，内部对batch norm的参数自动做了融合。如果训练使用的affine_channel，用保存的模型做初始化，训练其他任务时，即可使用affine_channel, 也可使用batch norm, 参数均可正确加载。
diff --git a/ppdet/utils/checkpoint.py b/ppdet/utils/checkpoint.py
index f5d7c5f56..54c364812 100644
--- a/ppdet/utils/checkpoint.py
+++ b/ppdet/utils/checkpoint.py
@@ -177,7 +177,8 @@ def load_and_fusebn(exe, prog, path):
         prog (fluid.Program): save weight from which Program object.
         path (string): the path to save model.
     """
-    logger.info('Load model and fuse batch norm from {}...'.format(path))
+    logger.info('Load model and fuse batch norm if have from {}...'.format(
+        path))
 
     if is_url(path):
         path = _get_weight_path(path)
@@ -253,8 +254,11 @@ def load_and_fusebn(exe, prog, path):
                         [scale_name, bias_name, mean_name, variance_name])
 
     if not bn_in_path:
-        raise ValueError("There is no params of batch norm in model {}.".format(
-            path))
+        fluid.io.load_vars(exe, path, prog, vars=all_vars)
+        logger.warning(
+            "There is no paramters of batch norm in model {}. "
+            "Skip to fuse batch norm. And load paramters done.".format(path))
+        return
 
     # load running mean and running variance on cpu place into global scope.
     place = fluid.CPUPlace()
diff --git a/tools/train.py b/tools/train.py
index 4f581d912..08e1fc634 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -77,9 +77,6 @@ def main():
     if 'log_iter' not in cfg:
         cfg.log_iter = 20
 
-    ignore_params = cfg.finetune_exclude_pretrained_params \
-                 if 'finetune_exclude_pretrained_params' in cfg else []
-
     # check if set use_gpu=True in paddlepaddle cpu version
     check_gpu(cfg.use_gpu)
     if not FLAGS.dist or trainer_id == 0:
@@ -193,8 +190,11 @@ def main():
         compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog)
 
     fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel'
-    start_iter = 0
 
+    ignore_params = cfg.finetune_exclude_pretrained_params \
+                 if 'finetune_exclude_pretrained_params' in cfg else []
+
+    start_iter = 0
     if FLAGS.resume_checkpoint:
         checkpoint.load_checkpoint(exe, train_prog, FLAGS.resume_checkpoint)
         start_iter = checkpoint.global_step()
-- 
GitLab