Merge pull request #69 from littletomatodonkey/add_mv_lr

fix mv3 arch and faq doc

Merge pull request #69 from littletomatodonkey/add_mv_lr
fix mv3 arch and faq doc
f21594cc · dyning · GitHub · ea7e9558 · cbd94d72 · f21594cc
4 changed file
--- a/docs/zh_CN/faq.md
+++ b/docs/zh_CN/faq.md
@@ -13,3 +13,8 @@
 >>
 * Q: 在配置文件的`TRAIN`字段中配置了`mix`的参数，为什么`mixup`的数据增广预处理没有生效呢？
 * A: 使用mixup时，数据预处理部分与模型输入部分均需要修改，因此还需要在配置文件中显式地配置`use_mix: True`，才能使得`mixup`生效。
+>>
+* Q: 评估和预测时，已经指定了预训练模型所在文件夹的地址，但是仍然无法导入参数，这么为什么呢？
+* A: 加载预训练模型时，需要指定预训练模型的前缀，例如预训练模型参数所在的文件夹为`output/ResNet50_vd/19`，预训练模型参数的名称为`output/ResNet50_vd/19/ppcls.pdparams`，则`pretrained_model`参数需要指定为`output/ResNet50_vd/19/ppcls`，PaddleClas会自动补齐`.pdparams`的后缀。
--- a/docs/zh_CN/models/Mobile.md
+++ b/docs/zh_CN/models/Mobile.md
@@ -32,7 +32,7 @@ MobileNetV3是Google于2019年提出的一种基于NAS的新的轻量级网络
 | MobileNetV2_x2_0                     | 0.752   | 0.926   |                   |                   | 2.320        | 11.130            |
 | MobileNetV2_ssld                     | 0.7674  | 0.9339  |                   |                   | 0.600        | 3.440             |
 | MobileNetV3_large_<br>x1_25          | 0.764   | 0.930   | 0.766             |                   | 0.714        | 7.440             |
-| MobileNetV3_large_<br>x1_0           | 0.753   | 0.753   | 0.752             |                   | 0.450        | 5.470             |
+| MobileNetV3_large_<br>x1_0           | 0.753   | 0.923   | 0.752             |                   | 0.450        | 5.470             |
 | MobileNetV3_large_<br>x0_75          | 0.731   | 0.911   | 0.733             |                   | 0.296        | 3.910             |
 | MobileNetV3_large_<br>x0_5           | 0.692   | 0.885   | 0.688             |                   | 0.138        | 2.670             |
 | MobileNetV3_large_<br>x0_35          | 0.643   | 0.855   | 0.642             |                   | 0.077        | 2.100             |

--- a/docs/zh_CN/tutorials/getting_started.md
+++ b/docs/zh_CN/tutorials/getting_started.md
@@ -68,6 +68,8 @@ python tools/eval.py \
 ```
 可以更改configs/eval.yaml中的`ARCHITECTURE.name`字段和pretrained_model字段来配置评估模型，也可以通过-o参数更新配置。
+**注意：** 加载预训练模型时，需要指定预训练模型的前缀，例如预训练模型参数所在的文件夹为`output/ResNet50_vd/19`，预训练模型参数的名称为`output/ResNet50_vd/19/ppcls.pdparams`，则`pretrained_model`参数需要指定为`output/ResNet50_vd/19/ppcls`，PaddleClas会自动补齐`.pdparams`的后缀。
 ## 三、模型推理
 PaddlePaddle提供三种方式进行预测推理，接下来介绍如何用预测引擎进行推理：

--- a/ppcls/modeling/architectures/mobilenet_v3.py
+++ b/ppcls/modeling/architectures/mobilenet_v3.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
+# you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
+# Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
+# See the License for the specific language governing permissions and
-#limitations under the License.
+# limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle.fluid as fluid
-from paddle.fluid.initializer import MSRA
 from paddle.fluid.param_attr import ParamAttr
 __all__ = [
@@ -30,9 +29,19 @@ __all__ = [
 class MobileNetV3():
-    def __init__(self, scale=1.0, model_name='small'):
+    def __init__(self,
+                 scale=1.0,
+                 model_name='small',
+                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0]):
        self.scale = scale
        self.inplanes = 16
+        self.lr_mult_list = lr_mult_list
+        assert len(self.lr_mult_list) == 5, \
+            "lr_mult_list length in MobileNetV3 must be 5 but got {}!!".format(
+            len(self.lr_mult_list))
+        self.curr_stage = 0
        if model_name == "large":
            self.cfg = [
                # k, exp, c,  se,     nl,  s,
@@ -54,6 +63,7 @@ class MobileNetV3():
            ]
            self.cls_ch_squeeze = 960
            self.cls_ch_expand = 1280
+            self.lr_interval = 3
        elif model_name == "small":
            self.cfg = [
                # k, exp, c,  se,     nl,  s,
@@ -71,9 +81,10 @@ class MobileNetV3():
            ]
            self.cls_ch_squeeze = 576
            self.cls_ch_expand = 1280
+            self.lr_interval = 2
        else:
-            raise NotImplementedError("mode[" + model_name +
+            raise NotImplementedError(
-                                      "_model] is not implemented!")
+                "mode[{}_model] is not implemented!".format(model_name))
    def net(self, input, class_dim=1000):
        scale = self.scale
@@ -81,7 +92,7 @@ class MobileNetV3():
        cfg = self.cfg
        cls_ch_squeeze = self.cls_ch_squeeze
        cls_ch_expand = self.cls_ch_expand
-        #conv1
+        # conv1
        conv = self.conv_bn_layer(
            input,
            filter_size=3,
@@ -107,6 +118,7 @@ class MobileNetV3():
                name='conv' + str(i + 2))
            inplanes = self.make_divisible(scale * layer_cfg[2])
            i += 1
+            self.curr_stage = i
        conv = self.conv_bn_layer(
            input=conv,
@@ -149,6 +161,10 @@ class MobileNetV3():
                      name=None,
                      use_cudnn=True,
                      res_last_bn_init=False):
+        lr_idx = self.curr_stage // self.lr_interval
+        lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
+        lr_mult = self.lr_mult_list[lr_idx]
        conv = fluid.layers.conv2d(
            input=input,
            num_filters=num_filters,
@@ -158,7 +174,8 @@ class MobileNetV3():
            groups=num_groups,
            act=None,
            use_cudnn=use_cudnn,
-            param_attr=ParamAttr(name=name + '_weights'),
+            param_attr=ParamAttr(
+                name=name + '_weights', learning_rate=lr_mult),
            bias_attr=False)
        bn_name = name + '_bn'
        bn = fluid.layers.batch_norm(
@@ -189,6 +206,10 @@ class MobileNetV3():
        return new_v
    def se_block(self, input, num_out_filter, ratio=4, name=None):
+        lr_idx = self.curr_stage // self.lr_interval
+        lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
+        lr_mult = self.lr_mult_list[lr_idx]
        num_mid_filter = num_out_filter // ratio
        pool = fluid.layers.pool2d(
            input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
@@ -197,15 +218,19 @@ class MobileNetV3():
            filter_size=1,
            num_filters=num_mid_filter,
            act='relu',
-            param_attr=ParamAttr(name=name + '_1_weights'),
+            param_attr=ParamAttr(
-            bias_attr=ParamAttr(name=name + '_1_offset'))
+                name=name + '_1_weights', learning_rate=lr_mult),
+            bias_attr=ParamAttr(
+                name=name + '_1_offset', learning_rate=lr_mult))
        conv2 = fluid.layers.conv2d(
            input=conv1,
            filter_size=1,
            num_filters=num_out_filter,
            act='hard_sigmoid',
-            param_attr=ParamAttr(name=name + '_2_weights'),
+            param_attr=ParamAttr(
-            bias_attr=ParamAttr(name=name + '_2_offset'))
+                name=name + '_2_weights', learning_rate=lr_mult),
+            bias_attr=ParamAttr(
+                name=name + '_2_offset', learning_rate=lr_mult))
        scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
        return scale
@@ -275,8 +300,8 @@ def MobileNetV3_small_x0_75():
    return model
-def MobileNetV3_small_x1_0():
+def MobileNetV3_small_x1_0(**args):
-    model = MobileNetV3(model_name='small', scale=1.0)
+    model = MobileNetV3(model_name='small', scale=1.0, **args)
    return model
@@ -300,8 +325,8 @@ def MobileNetV3_large_x0_75():
    return model
-def MobileNetV3_large_x1_0():
+def MobileNetV3_large_x1_0(**args):
-    model = MobileNetV3(model_name='large', scale=1.0)
+    model = MobileNetV3(model_name='large', scale=1.0, **args)
    return model