未验证 提交 f21594cc 编写于 作者: D dyning 提交者: GitHub

Merge pull request #69 from littletomatodonkey/add_mv_lr

fix mv3 arch and faq doc
...@@ -12,4 +12,9 @@ ...@@ -12,4 +12,9 @@
>> >>
* Q: 在配置文件的`TRAIN`字段中配置了`mix`的参数,为什么`mixup`的数据增广预处理没有生效呢? * Q: 在配置文件的`TRAIN`字段中配置了`mix`的参数,为什么`mixup`的数据增广预处理没有生效呢?
* A: 使用mixup时,数据预处理部分与模型输入部分均需要修改,因此还需要在配置文件中显式地配置`use_mix: True`,才能使得`mixup`生效。 * A: 使用mixup时,数据预处理部分与模型输入部分均需要修改,因此还需要在配置文件中显式地配置`use_mix: True`,才能使得`mixup`生效。
\ No newline at end of file
>>
* Q: 评估和预测时,已经指定了预训练模型所在文件夹的地址,但是仍然无法导入参数,这么为什么呢?
* A: 加载预训练模型时,需要指定预训练模型的前缀,例如预训练模型参数所在的文件夹为`output/ResNet50_vd/19`,预训练模型参数的名称为`output/ResNet50_vd/19/ppcls.pdparams`,则`pretrained_model`参数需要指定为`output/ResNet50_vd/19/ppcls`,PaddleClas会自动补齐`.pdparams`的后缀。
...@@ -32,7 +32,7 @@ MobileNetV3是Google于2019年提出的一种基于NAS的新的轻量级网络 ...@@ -32,7 +32,7 @@ MobileNetV3是Google于2019年提出的一种基于NAS的新的轻量级网络
| MobileNetV2_x2_0 | 0.752 | 0.926 | | | 2.320 | 11.130 | | MobileNetV2_x2_0 | 0.752 | 0.926 | | | 2.320 | 11.130 |
| MobileNetV2_ssld | 0.7674 | 0.9339 | | | 0.600 | 3.440 | | MobileNetV2_ssld | 0.7674 | 0.9339 | | | 0.600 | 3.440 |
| MobileNetV3_large_<br>x1_25 | 0.764 | 0.930 | 0.766 | | 0.714 | 7.440 | | MobileNetV3_large_<br>x1_25 | 0.764 | 0.930 | 0.766 | | 0.714 | 7.440 |
| MobileNetV3_large_<br>x1_0 | 0.753 | 0.753 | 0.752 | | 0.450 | 5.470 | | MobileNetV3_large_<br>x1_0 | 0.753 | 0.923 | 0.752 | | 0.450 | 5.470 |
| MobileNetV3_large_<br>x0_75 | 0.731 | 0.911 | 0.733 | | 0.296 | 3.910 | | MobileNetV3_large_<br>x0_75 | 0.731 | 0.911 | 0.733 | | 0.296 | 3.910 |
| MobileNetV3_large_<br>x0_5 | 0.692 | 0.885 | 0.688 | | 0.138 | 2.670 | | MobileNetV3_large_<br>x0_5 | 0.692 | 0.885 | 0.688 | | 0.138 | 2.670 |
| MobileNetV3_large_<br>x0_35 | 0.643 | 0.855 | 0.642 | | 0.077 | 2.100 | | MobileNetV3_large_<br>x0_35 | 0.643 | 0.855 | 0.642 | | 0.077 | 2.100 |
......
...@@ -68,6 +68,8 @@ python tools/eval.py \ ...@@ -68,6 +68,8 @@ python tools/eval.py \
``` ```
可以更改configs/eval.yaml中的`ARCHITECTURE.name`字段和pretrained_model字段来配置评估模型,也可以通过-o参数更新配置。 可以更改configs/eval.yaml中的`ARCHITECTURE.name`字段和pretrained_model字段来配置评估模型,也可以通过-o参数更新配置。
**注意:** 加载预训练模型时,需要指定预训练模型的前缀,例如预训练模型参数所在的文件夹为`output/ResNet50_vd/19`,预训练模型参数的名称为`output/ResNet50_vd/19/ppcls.pdparams`,则`pretrained_model`参数需要指定为`output/ResNet50_vd/19/ppcls`,PaddleClas会自动补齐`.pdparams`的后缀。
## 三、模型推理 ## 三、模型推理
PaddlePaddle提供三种方式进行预测推理,接下来介绍如何用预测引擎进行推理: PaddlePaddle提供三种方式进行预测推理,接下来介绍如何用预测引擎进行推理:
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
__all__ = [ __all__ = [
...@@ -30,9 +29,19 @@ __all__ = [ ...@@ -30,9 +29,19 @@ __all__ = [
class MobileNetV3(): class MobileNetV3():
def __init__(self, scale=1.0, model_name='small'): def __init__(self,
scale=1.0,
model_name='small',
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0]):
self.scale = scale self.scale = scale
self.inplanes = 16 self.inplanes = 16
self.lr_mult_list = lr_mult_list
assert len(self.lr_mult_list) == 5, \
"lr_mult_list length in MobileNetV3 must be 5 but got {}!!".format(
len(self.lr_mult_list))
self.curr_stage = 0
if model_name == "large": if model_name == "large":
self.cfg = [ self.cfg = [
# k, exp, c, se, nl, s, # k, exp, c, se, nl, s,
...@@ -54,6 +63,7 @@ class MobileNetV3(): ...@@ -54,6 +63,7 @@ class MobileNetV3():
] ]
self.cls_ch_squeeze = 960 self.cls_ch_squeeze = 960
self.cls_ch_expand = 1280 self.cls_ch_expand = 1280
self.lr_interval = 3
elif model_name == "small": elif model_name == "small":
self.cfg = [ self.cfg = [
# k, exp, c, se, nl, s, # k, exp, c, se, nl, s,
...@@ -71,9 +81,10 @@ class MobileNetV3(): ...@@ -71,9 +81,10 @@ class MobileNetV3():
] ]
self.cls_ch_squeeze = 576 self.cls_ch_squeeze = 576
self.cls_ch_expand = 1280 self.cls_ch_expand = 1280
self.lr_interval = 2
else: else:
raise NotImplementedError("mode[" + model_name + raise NotImplementedError(
"_model] is not implemented!") "mode[{}_model] is not implemented!".format(model_name))
def net(self, input, class_dim=1000): def net(self, input, class_dim=1000):
scale = self.scale scale = self.scale
...@@ -81,7 +92,7 @@ class MobileNetV3(): ...@@ -81,7 +92,7 @@ class MobileNetV3():
cfg = self.cfg cfg = self.cfg
cls_ch_squeeze = self.cls_ch_squeeze cls_ch_squeeze = self.cls_ch_squeeze
cls_ch_expand = self.cls_ch_expand cls_ch_expand = self.cls_ch_expand
#conv1 # conv1
conv = self.conv_bn_layer( conv = self.conv_bn_layer(
input, input,
filter_size=3, filter_size=3,
...@@ -107,6 +118,7 @@ class MobileNetV3(): ...@@ -107,6 +118,7 @@ class MobileNetV3():
name='conv' + str(i + 2)) name='conv' + str(i + 2))
inplanes = self.make_divisible(scale * layer_cfg[2]) inplanes = self.make_divisible(scale * layer_cfg[2])
i += 1 i += 1
self.curr_stage = i
conv = self.conv_bn_layer( conv = self.conv_bn_layer(
input=conv, input=conv,
...@@ -149,6 +161,10 @@ class MobileNetV3(): ...@@ -149,6 +161,10 @@ class MobileNetV3():
name=None, name=None,
use_cudnn=True, use_cudnn=True,
res_last_bn_init=False): res_last_bn_init=False):
lr_idx = self.curr_stage // self.lr_interval
lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
lr_mult = self.lr_mult_list[lr_idx]
conv = fluid.layers.conv2d( conv = fluid.layers.conv2d(
input=input, input=input,
num_filters=num_filters, num_filters=num_filters,
...@@ -158,7 +174,8 @@ class MobileNetV3(): ...@@ -158,7 +174,8 @@ class MobileNetV3():
groups=num_groups, groups=num_groups,
act=None, act=None,
use_cudnn=use_cudnn, use_cudnn=use_cudnn,
param_attr=ParamAttr(name=name + '_weights'), param_attr=ParamAttr(
name=name + '_weights', learning_rate=lr_mult),
bias_attr=False) bias_attr=False)
bn_name = name + '_bn' bn_name = name + '_bn'
bn = fluid.layers.batch_norm( bn = fluid.layers.batch_norm(
...@@ -189,6 +206,10 @@ class MobileNetV3(): ...@@ -189,6 +206,10 @@ class MobileNetV3():
return new_v return new_v
def se_block(self, input, num_out_filter, ratio=4, name=None): def se_block(self, input, num_out_filter, ratio=4, name=None):
lr_idx = self.curr_stage // self.lr_interval
lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
lr_mult = self.lr_mult_list[lr_idx]
num_mid_filter = num_out_filter // ratio num_mid_filter = num_out_filter // ratio
pool = fluid.layers.pool2d( pool = fluid.layers.pool2d(
input=input, pool_type='avg', global_pooling=True, use_cudnn=False) input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
...@@ -197,15 +218,19 @@ class MobileNetV3(): ...@@ -197,15 +218,19 @@ class MobileNetV3():
filter_size=1, filter_size=1,
num_filters=num_mid_filter, num_filters=num_mid_filter,
act='relu', act='relu',
param_attr=ParamAttr(name=name + '_1_weights'), param_attr=ParamAttr(
bias_attr=ParamAttr(name=name + '_1_offset')) name=name + '_1_weights', learning_rate=lr_mult),
bias_attr=ParamAttr(
name=name + '_1_offset', learning_rate=lr_mult))
conv2 = fluid.layers.conv2d( conv2 = fluid.layers.conv2d(
input=conv1, input=conv1,
filter_size=1, filter_size=1,
num_filters=num_out_filter, num_filters=num_out_filter,
act='hard_sigmoid', act='hard_sigmoid',
param_attr=ParamAttr(name=name + '_2_weights'), param_attr=ParamAttr(
bias_attr=ParamAttr(name=name + '_2_offset')) name=name + '_2_weights', learning_rate=lr_mult),
bias_attr=ParamAttr(
name=name + '_2_offset', learning_rate=lr_mult))
scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0) scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
return scale return scale
...@@ -275,8 +300,8 @@ def MobileNetV3_small_x0_75(): ...@@ -275,8 +300,8 @@ def MobileNetV3_small_x0_75():
return model return model
def MobileNetV3_small_x1_0(): def MobileNetV3_small_x1_0(**args):
model = MobileNetV3(model_name='small', scale=1.0) model = MobileNetV3(model_name='small', scale=1.0, **args)
return model return model
...@@ -300,8 +325,8 @@ def MobileNetV3_large_x0_75(): ...@@ -300,8 +325,8 @@ def MobileNetV3_large_x0_75():
return model return model
def MobileNetV3_large_x1_0(): def MobileNetV3_large_x1_0(**args):
model = MobileNetV3(model_name='large', scale=1.0) model = MobileNetV3(model_name='large', scale=1.0, **args)
return model return model
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册