未验证 提交 68af310b 编写于 作者: N Nyakku Shigure 提交者: GitHub

add MobileNetV3 (#38653)

* add mobilenetv3
上级 767647ce
......@@ -61,6 +61,8 @@ class TestPretrainedModel(unittest.TestCase):
arches = [
'mobilenet_v1',
'mobilenet_v2',
'mobilenet_v3_small',
'mobilenet_v3_large',
'squeezenet1_0',
'shufflenet_v2_x0_25',
]
......
......@@ -40,6 +40,12 @@ class TestVisonModels(unittest.TestCase):
def test_mobilenetv1(self):
self.models_infer('mobilenet_v1')
def test_mobilenetv3_small(self):
self.models_infer('mobilenet_v3_small')
def test_mobilenetv3_large(self):
self.models_infer('mobilenet_v3_large')
def test_vgg11(self):
self.models_infer('vgg11')
......
......@@ -40,6 +40,10 @@ from .models import MobileNetV1 # noqa: F401
from .models import mobilenet_v1 # noqa: F401
from .models import MobileNetV2 # noqa: F401
from .models import mobilenet_v2 # noqa: F401
from .models import MobileNetV3Small # noqa: F401
from .models import MobileNetV3Large # noqa: F401
from .models import mobilenet_v3_small # noqa: F401
from .models import mobilenet_v3_large # noqa: F401
from .models import SqueezeNet # noqa: F401
from .models import squeezenet1_0 # noqa: F401
from .models import squeezenet1_1 # noqa: F401
......
......@@ -24,6 +24,10 @@ from .mobilenetv1 import MobileNetV1 # noqa: F401
from .mobilenetv1 import mobilenet_v1 # noqa: F401
from .mobilenetv2 import MobileNetV2 # noqa: F401
from .mobilenetv2 import mobilenet_v2 # noqa: F401
from .mobilenetv3 import MobileNetV3Small # noqa: F401
from .mobilenetv3 import MobileNetV3Large # noqa: F401
from .mobilenetv3 import mobilenet_v3_small # noqa: F401
from .mobilenetv3 import mobilenet_v3_large # noqa: F401
from .vgg import VGG # noqa: F401
from .vgg import vgg11 # noqa: F401
from .vgg import vgg13 # noqa: F401
......@@ -79,6 +83,10 @@ __all__ = [ #noqa
'mobilenet_v1',
'MobileNetV2',
'mobilenet_v2',
'MobileNetV3Small',
'MobileNetV3Large',
'mobilenet_v3_small',
'mobilenet_v3_large',
'LeNet',
'DenseNet',
'densenet121',
......
......@@ -12,14 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.utils.download import get_weights_path_from_url
from .utils import _make_divisible
__all__ = []
model_urls = {
......@@ -29,16 +27,6 @@ model_urls = {
}
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNReLU(nn.Sequential):
def __init__(self,
in_planes,
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
from paddle.utils.download import get_weights_path_from_url
from functools import partial
from .utils import _make_divisible
from ..ops import ConvNormActivation
__all__ = []
model_urls = {
"mobilenet_v3_small_x1.0":
("https://paddle-hapi.bj.bcebos.com/models/mobilenet_v3_small_x1.0.pdparams",
"34fe0e7c1f8b00b2b056ad6788d0590c"),
"mobilenet_v3_large_x1.0":
("https://paddle-hapi.bj.bcebos.com/models/mobilenet_v3_large_x1.0.pdparams",
"118db5792b4e183b925d8e8e334db3df"),
}
class SqueezeExcitation(nn.Layer):
"""
This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1).
Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in in eq. 3.
This code is based on the torchvision code with modifications.
You can also see at https://github.com/pytorch/vision/blob/main/torchvision/ops/misc.py#L127
Args:
input_channels (int): Number of channels in the input image
squeeze_channels (int): Number of squeeze channels
activation (Callable[..., paddle.nn.Layer], optional): ``delta`` activation. Default: ``paddle.nn.ReLU``
scale_activation (Callable[..., paddle.nn.Layer]): ``sigma`` activation. Default: ``paddle.nn.Sigmoid``
"""
def __init__(self,
input_channels,
squeeze_channels,
activation=nn.ReLU,
scale_activation=nn.Sigmoid):
super().__init__()
self.avgpool = nn.AdaptiveAvgPool2D(1)
self.fc1 = nn.Conv2D(input_channels, squeeze_channels, 1)
self.fc2 = nn.Conv2D(squeeze_channels, input_channels, 1)
self.activation = activation()
self.scale_activation = scale_activation()
def _scale(self, input):
scale = self.avgpool(input)
scale = self.fc1(scale)
scale = self.activation(scale)
scale = self.fc2(scale)
return self.scale_activation(scale)
def forward(self, input):
scale = self._scale(input)
return scale * input
class InvertedResidualConfig:
def __init__(self,
in_channels,
kernel,
expanded_channels,
out_channels,
use_se,
activation,
stride,
scale=1.0):
self.in_channels = self.adjust_channels(in_channels, scale=scale)
self.kernel = kernel
self.expanded_channels = self.adjust_channels(
expanded_channels, scale=scale)
self.out_channels = self.adjust_channels(out_channels, scale=scale)
self.use_se = use_se
if activation is None:
self.activation_layer = None
elif activation == "relu":
self.activation_layer = nn.ReLU
elif activation == "hardswish":
self.activation_layer = nn.Hardswish
else:
raise RuntimeError("The activation function is not supported: {}".
format(activation))
self.stride = stride
@staticmethod
def adjust_channels(channels, scale=1.0):
return _make_divisible(channels * scale, 8)
class InvertedResidual(nn.Layer):
def __init__(self, in_channels, expanded_channels, out_channels,
filter_size, stride, use_se, activation_layer, norm_layer):
super().__init__()
self.use_res_connect = stride == 1 and in_channels == out_channels
self.use_se = use_se
self.expand = in_channels != expanded_channels
if self.expand:
self.expand_conv = ConvNormActivation(
in_channels=in_channels,
out_channels=expanded_channels,
kernel_size=1,
stride=1,
padding=0,
norm_layer=norm_layer,
activation_layer=activation_layer)
self.bottleneck_conv = ConvNormActivation(
in_channels=expanded_channels,
out_channels=expanded_channels,
kernel_size=filter_size,
stride=stride,
padding=int((filter_size - 1) // 2),
groups=expanded_channels,
norm_layer=norm_layer,
activation_layer=activation_layer)
if self.use_se:
self.mid_se = SqueezeExcitation(
expanded_channels,
_make_divisible(expanded_channels // 4),
scale_activation=nn.Hardsigmoid)
self.linear_conv = ConvNormActivation(
in_channels=expanded_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
padding=0,
norm_layer=norm_layer,
activation_layer=None)
def forward(self, x):
identity = x
if self.expand:
x = self.expand_conv(x)
x = self.bottleneck_conv(x)
if self.use_se:
x = self.mid_se(x)
x = self.linear_conv(x)
if self.use_res_connect:
x = paddle.add(identity, x)
return x
class MobileNetV3(nn.Layer):
"""MobileNetV3 model from
`"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.
Args:
config (list[InvertedResidualConfig]): MobileNetV3 depthwise blocks config.
last_channel (int): The number of channels on the penultimate layer.
scale (float, optional): Scale of channels in each layer. Default: 1.0.
num_classes (int, optional): Output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool, optional): Use pool before the last fc layer or not. Default: True.
"""
def __init__(self,
config,
last_channel,
scale=1.0,
num_classes=1000,
with_pool=True):
super().__init__()
self.config = config
self.scale = scale
self.last_channel = last_channel
self.num_classes = num_classes
self.with_pool = with_pool
self.firstconv_in_channels = config[0].in_channels
self.lastconv_in_channels = config[-1].in_channels
self.lastconv_out_channels = self.lastconv_in_channels * 6
norm_layer = partial(nn.BatchNorm2D, epsilon=0.001, momentum=0.99)
self.conv = ConvNormActivation(
in_channels=3,
out_channels=self.firstconv_in_channels,
kernel_size=3,
stride=2,
padding=1,
groups=1,
activation_layer=nn.Hardswish,
norm_layer=norm_layer)
self.blocks = nn.Sequential(*[
InvertedResidual(
in_channels=cfg.in_channels,
expanded_channels=cfg.expanded_channels,
out_channels=cfg.out_channels,
filter_size=cfg.kernel,
stride=cfg.stride,
use_se=cfg.use_se,
activation_layer=cfg.activation_layer,
norm_layer=norm_layer) for cfg in self.config
])
self.lastconv = ConvNormActivation(
in_channels=self.lastconv_in_channels,
out_channels=self.lastconv_out_channels,
kernel_size=1,
stride=1,
padding=0,
groups=1,
norm_layer=norm_layer,
activation_layer=nn.Hardswish)
if with_pool:
self.avgpool = nn.AdaptiveAvgPool2D(1)
if num_classes > 0:
self.classifier = nn.Sequential(
nn.Linear(self.lastconv_out_channels, self.last_channel),
nn.Hardswish(),
nn.Dropout(p=0.2),
nn.Linear(self.last_channel, num_classes))
def forward(self, x):
x = self.conv(x)
x = self.blocks(x)
x = self.lastconv(x)
if self.with_pool:
x = self.avgpool(x)
if self.num_classes > 0:
x = paddle.flatten(x, 1)
x = self.classifier(x)
return x
class MobileNetV3Small(MobileNetV3):
"""MobileNetV3 Small architecture model from
`"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.
Args:
scale (float, optional): Scale of channels in each layer. Default: 1.0.
num_classes (int, optional): Output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool, optional): Use pool before the last fc layer or not. Default: True.
Examples:
.. code-block:: python
import paddle
from paddle.vision.models import MobileNetV3Small
# build model
model = MobileNetV3Small(scale=1.0)
x = paddle.rand([1, 3, 224, 224])
out = model(x)
print(out.shape)
"""
def __init__(self, scale=1.0, num_classes=1000, with_pool=True):
config = [
InvertedResidualConfig(16, 3, 16, 16, True, "relu", 2, scale),
InvertedResidualConfig(16, 3, 72, 24, False, "relu", 2, scale),
InvertedResidualConfig(24, 3, 88, 24, False, "relu", 1, scale),
InvertedResidualConfig(24, 5, 96, 40, True, "hardswish", 2, scale),
InvertedResidualConfig(40, 5, 240, 40, True, "hardswish", 1, scale),
InvertedResidualConfig(40, 5, 240, 40, True, "hardswish", 1, scale),
InvertedResidualConfig(40, 5, 120, 48, True, "hardswish", 1, scale),
InvertedResidualConfig(48, 5, 144, 48, True, "hardswish", 1, scale),
InvertedResidualConfig(48, 5, 288, 96, True, "hardswish", 2, scale),
InvertedResidualConfig(96, 5, 576, 96, True, "hardswish", 1, scale),
InvertedResidualConfig(96, 5, 576, 96, True, "hardswish", 1, scale),
]
last_channel = _make_divisible(1024 * scale, 8)
super().__init__(
config,
last_channel=last_channel,
scale=scale,
with_pool=with_pool,
num_classes=num_classes)
class MobileNetV3Large(MobileNetV3):
"""MobileNetV3 Large architecture model from
`"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.
Args:
scale (float, optional): Scale of channels in each layer. Default: 1.0.
num_classes (int, optional): Output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool, optional): Use pool before the last fc layer or not. Default: True.
Examples:
.. code-block:: python
import paddle
from paddle.vision.models import MobileNetV3Large
# build model
model = MobileNetV3Large(scale=1.0)
x = paddle.rand([1, 3, 224, 224])
out = model(x)
print(out.shape)
"""
def __init__(self, scale=1.0, num_classes=1000, with_pool=True):
config = [
InvertedResidualConfig(16, 3, 16, 16, False, "relu", 1, scale),
InvertedResidualConfig(16, 3, 64, 24, False, "relu", 2, scale),
InvertedResidualConfig(24, 3, 72, 24, False, "relu", 1, scale),
InvertedResidualConfig(24, 5, 72, 40, True, "relu", 2, scale),
InvertedResidualConfig(40, 5, 120, 40, True, "relu", 1, scale),
InvertedResidualConfig(40, 5, 120, 40, True, "relu", 1, scale),
InvertedResidualConfig(40, 3, 240, 80, False, "hardswish", 2,
scale),
InvertedResidualConfig(80, 3, 200, 80, False, "hardswish", 1,
scale),
InvertedResidualConfig(80, 3, 184, 80, False, "hardswish", 1,
scale),
InvertedResidualConfig(80, 3, 184, 80, False, "hardswish", 1,
scale),
InvertedResidualConfig(80, 3, 480, 112, True, "hardswish", 1,
scale),
InvertedResidualConfig(112, 3, 672, 112, True, "hardswish", 1,
scale),
InvertedResidualConfig(112, 5, 672, 160, True, "hardswish", 2,
scale),
InvertedResidualConfig(160, 5, 960, 160, True, "hardswish", 1,
scale),
InvertedResidualConfig(160, 5, 960, 160, True, "hardswish", 1,
scale),
]
last_channel = _make_divisible(1280 * scale, 8)
super().__init__(
config,
last_channel=last_channel,
scale=scale,
with_pool=with_pool,
num_classes=num_classes)
def _mobilenet_v3(arch, pretrained=False, scale=1.0, **kwargs):
if arch == "mobilenet_v3_large":
model = MobileNetV3Large(scale=scale, **kwargs)
else:
model = MobileNetV3Small(scale=scale, **kwargs)
if pretrained:
arch = "{}_x{}".format(arch, scale)
assert (
arch in model_urls
), "{} model do not have a pretrained model now, you should set pretrained=False".format(
arch)
weight_path = get_weights_path_from_url(model_urls[arch][0],
model_urls[arch][1])
param = paddle.load(weight_path)
model.set_dict(param)
return model
def mobilenet_v3_small(pretrained=False, scale=1.0, **kwargs):
"""MobileNetV3 Small architecture model from
`"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
scale (float, optional): Scale of channels in each layer. Default: 1.0.
Examples:
.. code-block:: python
import paddle
from paddle.vision.models import mobilenet_v3_small
# build model
model = mobilenet_v3_small()
# build model and load imagenet pretrained weight
# model = mobilenet_v3_small(pretrained=True)
# build mobilenet v3 small model with scale=0.5
model = mobilenet_v3_small(scale=0.5)
x = paddle.rand([1, 3, 224, 224])
out = model(x)
print(out.shape)
"""
model = _mobilenet_v3(
"mobilenet_v3_small", scale=scale, pretrained=pretrained, **kwargs)
return model
def mobilenet_v3_large(pretrained=False, scale=1.0, **kwargs):
"""MobileNetV3 Large architecture model from
`"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
scale (float, optional): Scale of channels in each layer. Default: 1.0.
Examples:
.. code-block:: python
import paddle
from paddle.vision.models import mobilenet_v3_large
# build model
model = mobilenet_v3_large()
# build model and load imagenet pretrained weight
# model = mobilenet_v3_large(pretrained=True)
# build mobilenet v3 large model with scale=0.5
model = mobilenet_v3_large(scale=0.5)
x = paddle.rand([1, 3, 224, 224])
out = model(x)
print(out.shape)
"""
model = _mobilenet_v3(
"mobilenet_v3_large", scale=scale, pretrained=pretrained, **kwargs)
return model
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
def _make_divisible(v, divisor=8, min_value=None):
"""
This function ensures that all layers have a channel number that is divisible by divisor
You can also see at https://github.com/keras-team/keras/blob/8ecef127f70db723c158dbe9ed3268b3d610ab55/keras/applications/mobilenet_v2.py#L505
Args:
divisor (int): The divisor for number of channels. Default: 8.
min_value (int, optional): The minimum value of number of channels, if it is None,
the default is divisor. Default: None.
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
......@@ -17,7 +17,7 @@ from ..fluid.layer_helper import LayerHelper
from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype
from ..fluid import core, layers
from ..fluid.layers import nn, utils
from ..nn import Layer
from ..nn import Layer, Conv2D, Sequential, ReLU, BatchNorm2D
from ..fluid.initializer import Normal
from paddle.common_ops_import import *
......@@ -1297,3 +1297,57 @@ class RoIAlign(Layer):
output_size=self._output_size,
spatial_scale=self._spatial_scale,
aligned=aligned)
class ConvNormActivation(Sequential):
"""
Configurable block used for Convolution-Normalzation-Activation blocks.
This code is based on the torchvision code with modifications.
You can also see at https://github.com/pytorch/vision/blob/main/torchvision/ops/misc.py#L68
Args:
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the Convolution-Normalzation-Activation block
kernel_size: (int, optional): Size of the convolving kernel. Default: 3
stride (int, optional): Stride of the convolution. Default: 1
padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None,
in wich case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
norm_layer (Callable[..., paddle.nn.Layer], optional): Norm layer that will be stacked on top of the convolutiuon layer.
If ``None`` this layer wont be used. Default: ``paddle.nn.BatchNorm2d``
activation_layer (Callable[..., paddle.nn.Layer], optional): Activation function which will be stacked on top of the normalization
layer (if not ``None``), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``paddle.nn.ReLU``
dilation (int): Spacing between kernel elements. Default: 1
bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size=3,
stride=1,
padding=None,
groups=1,
norm_layer=BatchNorm2D,
activation_layer=ReLU,
dilation=1,
bias=None):
if padding is None:
padding = (kernel_size - 1) // 2 * dilation
if bias is None:
bias = norm_layer is None
layers = [
Conv2D(
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation=dilation,
groups=groups,
bias_attr=bias)
]
if norm_layer is not None:
layers.append(norm_layer(out_channels))
if activation_layer is not None:
layers.append(activation_layer())
super().__init__(*layers)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册