diff --git a/configs/MobileNetV3/MobileNetV3_large_x0_35.yaml b/configs/MobileNetV3/MobileNetV3_large_x0_35.yaml index 2f42c00b528d9e508f0b3ddbfe611379bb9bf449..9f6c1fca6243476a19e07f29d9ccbe75a17ee1ec 100644 --- a/configs/MobileNetV3/MobileNetV3_large_x0_35.yaml +++ b/configs/MobileNetV3/MobileNetV3_large_x0_35.yaml @@ -15,7 +15,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 2.6 warmup_epoch: 5 diff --git a/configs/MobileNetV3/MobileNetV3_large_x0_5.yaml b/configs/MobileNetV3/MobileNetV3_large_x0_5.yaml index 34389fef0ccbe86e8e7603cedead2faca2aa819f..b183cc5d06334542f5043aca55fd02f20c165ce6 100644 --- a/configs/MobileNetV3/MobileNetV3_large_x0_5.yaml +++ b/configs/MobileNetV3/MobileNetV3_large_x0_5.yaml @@ -15,7 +15,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 1.3 warmup_epoch: 5 diff --git a/configs/MobileNetV3/MobileNetV3_large_x0_75.yaml b/configs/MobileNetV3/MobileNetV3_large_x0_75.yaml index 4666c66b141d3f1d1abffe3bd99b972135abf980..4714e773adc22d8eceb8345ab97818efbaaa846f 100644 --- a/configs/MobileNetV3/MobileNetV3_large_x0_75.yaml +++ b/configs/MobileNetV3/MobileNetV3_large_x0_75.yaml @@ -15,7 +15,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 1.3 warmup_epoch: 5 diff --git a/configs/MobileNetV3/MobileNetV3_large_x1_0.yaml b/configs/MobileNetV3/MobileNetV3_large_x1_0.yaml index b2c8353d886d7eeff653cdf840373275eb16a69c..e924d4ea9789bb7bbab91723d0166cfca2308358 100644 --- a/configs/MobileNetV3/MobileNetV3_large_x1_0.yaml +++ b/configs/MobileNetV3/MobileNetV3_large_x1_0.yaml @@ -15,7 +15,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 2.6 warmup_epoch: 5 diff --git a/configs/MobileNetV3/MobileNetV3_large_x1_25.yaml b/configs/MobileNetV3/MobileNetV3_large_x1_25.yaml index 2a1840b9f26b41a480accbbf5a5ad159a2f38d31..1eef02ebfe509ddfdc7bc47432dfd3402c6f69f2 100644 --- a/configs/MobileNetV3/MobileNetV3_large_x1_25.yaml +++ b/configs/MobileNetV3/MobileNetV3_large_x1_25.yaml @@ -15,7 +15,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 0.65 warmup_epoch: 5 diff --git a/configs/MobileNetV3/MobileNetV3_small_x0_35.yaml b/configs/MobileNetV3/MobileNetV3_small_x0_35.yaml index dca65b7d3a172ef393b67715d53f5b1b0b526653..bac36b97cd3e46704bcd69eac520087b201d63ea 100644 --- a/configs/MobileNetV3/MobileNetV3_small_x0_35.yaml +++ b/configs/MobileNetV3/MobileNetV3_small_x0_35.yaml @@ -14,7 +14,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 2.6 warmup_epoch: 5 diff --git a/configs/MobileNetV3/MobileNetV3_small_x0_5.yaml b/configs/MobileNetV3/MobileNetV3_small_x0_5.yaml index 36fd90f405f34aa15984db02953a6e743ebb27a5..324faac3a36b4c1d50e323a2f109144674221132 100644 --- a/configs/MobileNetV3/MobileNetV3_small_x0_5.yaml +++ b/configs/MobileNetV3/MobileNetV3_small_x0_5.yaml @@ -15,7 +15,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 2.6 warmup_epoch: 5 diff --git a/configs/MobileNetV3/MobileNetV3_small_x0_75.yaml b/configs/MobileNetV3/MobileNetV3_small_x0_75.yaml index bae9e83a11b93b570bd13ad5670fd58b8c5e4268..3a7a6cd415fff8a737929044547fa99ee2fbc0c7 100644 --- a/configs/MobileNetV3/MobileNetV3_small_x0_75.yaml +++ b/configs/MobileNetV3/MobileNetV3_small_x0_75.yaml @@ -15,7 +15,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 2.6 warmup_epoch: 5 diff --git a/configs/MobileNetV3/MobileNetV3_small_x1_0.yaml b/configs/MobileNetV3/MobileNetV3_small_x1_0.yaml index 141c42cf48383d114a42220d41d3b4ebce697757..ebd332262b9fbb907ba7c5a4b00622ac2980bedf 100644 --- a/configs/MobileNetV3/MobileNetV3_small_x1_0.yaml +++ b/configs/MobileNetV3/MobileNetV3_small_x1_0.yaml @@ -15,7 +15,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 2.6 warmup_epoch: 5 diff --git a/configs/MobileNetV3/MobileNetV3_small_x1_25.yaml b/configs/MobileNetV3/MobileNetV3_small_x1_25.yaml index e279ffb283dd921f386ffcfb5dc9fb532e6cb6b2..14f74c3e26cae8825cb2899af305007ed4f68f1e 100644 --- a/configs/MobileNetV3/MobileNetV3_small_x1_25.yaml +++ b/configs/MobileNetV3/MobileNetV3_small_x1_25.yaml @@ -15,7 +15,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 1.3 warmup_epoch: 5 diff --git a/configs/ShuffleNet/ShuffleNetV2.yaml b/configs/ShuffleNet/ShuffleNetV2.yaml index c097afaea5a97bd02cdc5d3eef236a71a3feb4b7..1ee8787c0fd504b23c858416d643b4724933a6f5 100644 --- a/configs/ShuffleNet/ShuffleNetV2.yaml +++ b/configs/ShuffleNet/ShuffleNetV2.yaml @@ -14,7 +14,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 0.5 warmup_epoch: 5 diff --git a/configs/ShuffleNet/ShuffleNetV2_swish.yaml b/configs/ShuffleNet/ShuffleNetV2_swish.yaml index 4e64ce8bb0562258c70234a2a7f888b0dbe08f8e..313f626fcd83491151a9496183290889e6e8e7dd 100644 --- a/configs/ShuffleNet/ShuffleNetV2_swish.yaml +++ b/configs/ShuffleNet/ShuffleNetV2_swish.yaml @@ -14,7 +14,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 0.5 warmup_epoch: 5 diff --git a/configs/ShuffleNet/ShuffleNetV2_x0_25.yaml b/configs/ShuffleNet/ShuffleNetV2_x0_25.yaml index 996f040bbd3fac8bd7d8bce672379cda647f0714..a8e8055e67da82e76fc9e46d30bcb075d832d66a 100644 --- a/configs/ShuffleNet/ShuffleNetV2_x0_25.yaml +++ b/configs/ShuffleNet/ShuffleNetV2_x0_25.yaml @@ -14,7 +14,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 0.5 warmup_epoch: 5 diff --git a/configs/ShuffleNet/ShuffleNetV2_x0_33.yaml b/configs/ShuffleNet/ShuffleNetV2_x0_33.yaml index f2941474105a4352ef1294711ecbb865a19b4774..9e1814013e99f9b1cc3236e4bf10c53386ec7321 100644 --- a/configs/ShuffleNet/ShuffleNetV2_x0_33.yaml +++ b/configs/ShuffleNet/ShuffleNetV2_x0_33.yaml @@ -14,7 +14,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 0.5 warmup_epoch: 5 diff --git a/configs/ShuffleNet/ShuffleNetV2_x0_5.yaml b/configs/ShuffleNet/ShuffleNetV2_x0_5.yaml index 05a1ad3eb94cb71e16e15eefb1bd195dee4b143b..be8f0be06396257490d4def0d906244a4150b0bd 100644 --- a/configs/ShuffleNet/ShuffleNetV2_x0_5.yaml +++ b/configs/ShuffleNet/ShuffleNetV2_x0_5.yaml @@ -14,7 +14,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 0.5 warmup_epoch: 5 diff --git a/configs/ShuffleNet/ShuffleNetV2_x1_5.yaml b/configs/ShuffleNet/ShuffleNetV2_x1_5.yaml index 63f50d48404e1bb0b9b9c599418df19465087a99..a10ec37ca7af1afa830af25d3aa096a03f1febd1 100644 --- a/configs/ShuffleNet/ShuffleNetV2_x1_5.yaml +++ b/configs/ShuffleNet/ShuffleNetV2_x1_5.yaml @@ -14,7 +14,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 0.25 warmup_epoch: 5 diff --git a/configs/ShuffleNet/ShuffleNetV2_x2_0.yaml b/configs/ShuffleNet/ShuffleNetV2_x2_0.yaml index 5a14cebbf8e789a9e5c03d180f11bb0dc5a42e47..d84e29bc7075e73e44eb457cf32497c5b04671e2 100644 --- a/configs/ShuffleNet/ShuffleNetV2_x2_0.yaml +++ b/configs/ShuffleNet/ShuffleNetV2_x2_0.yaml @@ -14,7 +14,7 @@ topk: 5 image_shape: [3, 224, 224] LEARNING_RATE: - function: 'CosineWarmup' + function: 'Cosine' params: lr: 0.25 warmup_epoch: 5 diff --git a/docs/images/feature_maps/feature_visualization_input.jpg b/docs/images/feature_maps/feature_visualization_input.jpg new file mode 100644 index 0000000000000000000000000000000000000000..da9d1a756b17ebe07ea2ad5cec0657ea23d0ea1d Binary files /dev/null and b/docs/images/feature_maps/feature_visualization_input.jpg differ diff --git a/docs/images/feature_maps/feature_visualization_output.jpg b/docs/images/feature_maps/feature_visualization_output.jpg new file mode 100644 index 0000000000000000000000000000000000000000..18b99f96ff1f3e099d38bcd99db4ab1d3a77d95a Binary files /dev/null and b/docs/images/feature_maps/feature_visualization_output.jpg differ diff --git a/docs/zh_CN/feature_visiualization/get_started.md b/docs/zh_CN/feature_visiualization/get_started.md new file mode 100644 index 0000000000000000000000000000000000000000..f80a2f848692fb691a1d3c00a70cf13e680fdaf0 --- /dev/null +++ b/docs/zh_CN/feature_visiualization/get_started.md @@ -0,0 +1,70 @@ +# 特征图可视化指南 + +## 一、概述 + +特征图是输入图片在卷积网络中的特征表达,对特征图的研究可以有利于我们对于模型的理解与设计,所以基于动态图我们使用本工具来可视化特征图。 + +## 二、准备工作 + +首先需要选定研究的模型,本文设定ResNet50作为研究模型,将resnet.py从[模型库](../../../ppcls/modeling/architecture/)拷贝到当前目录下,并下载预训练模型[预训练模型](../../zh_CN/models/models_intro), 复制resnet50的模型链接,使用下列命令下载并解压预训练模型。 + +```bash +wget The Link for Pretrained Model +tar -xf Downloaded Pretrained Model +``` + +以resnet50为例: +```bash +wget https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar +tar -xf ResNet50_pretrained.tar +``` + +## 三、修改模型 + +找到我们所需要的特征图位置,设置self.fm将其fetch出来,本文以resnet50中的stem层之后的特征图为例。 + +在fm_vis.py中修改模型的名字。 + +在ResNet50的__init__函数中定义self.fm +```python +self.fm = None +``` +在ResNet50的forward函数中指定特征图 +```python +def forward(self, inputs): + y = self.conv(inputs) + self.fm = y + y = self.pool2d_max(y) + for bottleneck_block in self.bottleneck_block_list: + y = bottleneck_block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output]) + y = self.out(y) + return y, self.fm +``` +执行函数 +```bash +python tools/feature_maps_visualization/fm_vis.py -i the image you want to test \ + -c channel_num -p pretrained model \ + --show whether to show \ + --interpolation interpolation method\ + --save_path where to save \ + --use_gpu whether to use gpu +``` +参数说明: ++ `-i`:待预测的图片文件路径,如 `./test.jpeg` ++ `-c`:特征图维度,如 `./resnet50_vd/model` ++ `-p`:权重文件路径,如 `./ResNet50_pretrained/` ++ `--show`:是否展示图片,默认值 False ++ `--interpolation`: 图像插值方式, 默认值 1 ++ `--save_path`:保存路径,如:`./tools/` ++ `--use_gpu`:是否使用 GPU 预测,默认值:True + +## 四、结果 +输入图片: + +![](../../../tools/feature_maps_visualization/test.jpg) + +输出特征图: + +![](../../../tools/feature_maps_visualization/fm.jpg) diff --git a/ppcls/modeling/architectures/__init__.py b/ppcls/modeling/architectures/__init__.py index 5942b925e1e06ca561ec13a8f522b9fef6eb541e..ffc0851752f0808d33f352da9001c41c9a682576 100644 --- a/ppcls/modeling/architectures/__init__.py +++ b/ppcls/modeling/architectures/__init__.py @@ -12,7 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .resnet_name import * +from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152 +from .resnet_vc import ResNet18_vc, ResNet34_vc, ResNet50_vc, ResNet101_vc, ResNet152_vc +from .resnet_vd import ResNet18_vd, ResNet34_vd, ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd +from .resnext import ResNeXt50_32x4d, ResNeXt50_64x4d, ResNeXt101_32x4d, ResNeXt101_64x4d, ResNeXt152_32x4d, ResNeXt152_64x4d +from .resnext_vd import ResNeXt50_vd_32x4d, ResNeXt50_vd_64x4d, ResNeXt101_vd_32x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_32x4d, ResNeXt152_vd_64x4d +from .res2net import Res2Net50_48w_2s, Res2Net50_26w_4s, Res2Net50_14w_8s, Res2Net50_48w_2s, Res2Net50_26w_6s, Res2Net50_26w_8s, Res2Net101_26w_4s, Res2Net152_26w_4s, Res2Net200_26w_4s +from .res2net_vd import Res2Net50_vd_48w_2s, Res2Net50_vd_26w_4s, Res2Net50_vd_14w_8s, Res2Net50_vd_48w_2s, Res2Net50_vd_26w_6s, Res2Net50_vd_26w_8s, Res2Net101_vd_26w_4s, Res2Net152_vd_26w_4s, Res2Net200_vd_26w_4s +from .se_resnet_vd import SE_ResNet18_vd, SE_ResNet34_vd, SE_ResNet50_vd, SE_ResNet101_vd, SE_ResNet152_vd, SE_ResNet200_vd +from .se_resnext_vd import SE_ResNeXt50_vd_32x4d, SE_ResNeXt50_vd_32x4d, SENet154_vd from .dpn import DPN68 from .densenet import DenseNet121 -from .hrnet import HRNet_W18_C \ No newline at end of file +from .hrnet import HRNet_W18_C +from .mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x0_75, MobileNetV1 +from .mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75, MobileNetV2, MobileNetV2_x1_5, MobileNetV2_x2_0 +from .mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25 +from .shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2_swish diff --git a/ppcls/modeling/architectures/mobilenet_v1.py b/ppcls/modeling/architectures/mobilenet_v1.py index b968a916132e90db23ee46ef25921e23a9a851f6..5dc229d4c4e215333320b49a7a47296b704d580c 100644 --- a/ppcls/modeling/architectures/mobilenet_v1.py +++ b/ppcls/modeling/architectures/mobilenet_v1.py @@ -1,156 +1,51 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np +import paddle import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout +from paddle.fluid.initializer import MSRA +import math __all__ = [ - 'MobileNetV1', 'MobileNetV1_x0_25', 'MobileNetV1_x0_5', 'MobileNetV1_x1_0', - 'MobileNetV1_x0_75' + "MobileNetV1_x0_25", "MobileNetV1_x0_5", "MobileNetV1_x0_75", "MobileNetV1" ] -class MobileNetV1(): - def __init__(self, scale=1.0): - self.scale = scale - - def net(self, input, class_dim=1000): - scale = self.scale - # conv1: 112x112 - input = self.conv_bn_layer( - input, - filter_size=3, - channels=3, - num_filters=int(32 * scale), - stride=2, - padding=1, - name="conv1") - - # 56x56 - input = self.depthwise_separable( - input, - num_filters1=32, - num_filters2=64, - num_groups=32, - stride=1, - scale=scale, - name="conv2_1") - - input = self.depthwise_separable( - input, - num_filters1=64, - num_filters2=128, - num_groups=64, - stride=2, - scale=scale, - name="conv2_2") - - # 28x28 - input = self.depthwise_separable( - input, - num_filters1=128, - num_filters2=128, - num_groups=128, - stride=1, - scale=scale, - name="conv3_1") - - input = self.depthwise_separable( - input, - num_filters1=128, - num_filters2=256, - num_groups=128, - stride=2, - scale=scale, - name="conv3_2") - - # 14x14 - input = self.depthwise_separable( - input, - num_filters1=256, - num_filters2=256, - num_groups=256, - stride=1, - scale=scale, - name="conv4_1") - - input = self.depthwise_separable( - input, - num_filters1=256, - num_filters2=512, - num_groups=256, - stride=2, - scale=scale, - name="conv4_2") +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + act='relu', + use_cudnn=True, + name=None): + super(ConvBNLayer, self).__init__() - # 14x14 - for i in range(5): - input = self.depthwise_separable( - input, - num_filters1=512, - num_filters2=512, - num_groups=512, - stride=1, - scale=scale, - name="conv5" + "_" + str(i + 1)) - # 7x7 - input = self.depthwise_separable( - input, - num_filters1=512, - num_filters2=1024, - num_groups=512, - stride=2, - scale=scale, - name="conv5_6") - - input = self.depthwise_separable( - input, - num_filters1=1024, - num_filters2=1024, - num_groups=1024, - stride=1, - scale=scale, - name="conv6") - - input = fluid.layers.pool2d( - input=input, pool_type='avg', global_pooling=True) - - output = fluid.layers.fc(input=input, - size=class_dim, - param_attr=ParamAttr( - initializer=MSRA(), name="fc7_weights"), - bias_attr=ParamAttr(name="fc7_offset")) - return output - - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - act='relu', - use_cudnn=True, - name=None): - conv = fluid.layers.conv2d( - input=input, + self._conv = Conv2D( + num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, @@ -161,58 +56,214 @@ class MobileNetV1(): param_attr=ParamAttr( initializer=MSRA(), name=name + "_weights"), bias_attr=False) - bn_name = name + "_bn" - return fluid.layers.batch_norm( - input=conv, + + self._batch_norm = BatchNorm( + num_filters, act=act, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def depthwise_separable(self, - input, - num_filters1, - num_filters2, - num_groups, - stride, - scale, - name=None): - depthwise_conv = self.conv_bn_layer( - input=input, - filter_size=3, + param_attr=ParamAttr(name + "_bn_scale"), + bias_attr=ParamAttr(name + "_bn_offset"), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class DepthwiseSeparable(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters1, + num_filters2, + num_groups, + stride, + scale, + name=None): + super(DepthwiseSeparable, self).__init__() + + self._depthwise_conv = ConvBNLayer( + num_channels=num_channels, num_filters=int(num_filters1 * scale), + filter_size=3, stride=stride, padding=1, num_groups=int(num_groups * scale), use_cudnn=False, name=name + "_dw") - pointwise_conv = self.conv_bn_layer( - input=depthwise_conv, + self._pointwise_conv = ConvBNLayer( + num_channels=int(num_filters1 * scale), filter_size=1, num_filters=int(num_filters2 * scale), stride=1, padding=0, name=name + "_sep") - return pointwise_conv + + def forward(self, inputs): + y = self._depthwise_conv(inputs) + y = self._pointwise_conv(y) + return y + + +class MobileNet(fluid.dygraph.Layer): + def __init__(self, scale=1.0, class_dim=1000): + super(MobileNet, self).__init__() + self.scale = scale + self.block_list = [] + + self.conv1 = ConvBNLayer( + num_channels=3, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1, + name="conv1") + + conv2_1 = self.add_sublayer( + "conv2_1", + sublayer=DepthwiseSeparable( + num_channels=int(32 * scale), + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale, + name="conv2_1")) + self.block_list.append(conv2_1) + + conv2_2 = self.add_sublayer( + "conv2_2", + sublayer=DepthwiseSeparable( + num_channels=int(64 * scale), + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=scale, + name="conv2_2")) + self.block_list.append(conv2_2) + + conv3_1 = self.add_sublayer( + "conv3_1", + sublayer=DepthwiseSeparable( + num_channels=int(128 * scale), + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale, + name="conv3_1")) + self.block_list.append(conv3_1) + + conv3_2 = self.add_sublayer( + "conv3_2", + sublayer=DepthwiseSeparable( + num_channels=int(128 * scale), + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=scale, + name="conv3_2")) + self.block_list.append(conv3_2) + + conv4_1 = self.add_sublayer( + "conv4_1", + sublayer=DepthwiseSeparable( + num_channels=int(256 * scale), + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale, + name="conv4_1")) + self.block_list.append(conv4_1) + + conv4_2 = self.add_sublayer( + "conv4_2", + sublayer=DepthwiseSeparable( + num_channels=int(256 * scale), + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=scale, + name="conv4_2")) + self.block_list.append(conv4_2) + + for i in range(5): + conv5 = self.add_sublayer( + "conv5_" + str(i + 1), + sublayer=DepthwiseSeparable( + num_channels=int(512 * scale), + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=scale, + name="conv5_" + str(i + 1))) + self.block_list.append(conv5) + + conv5_6 = self.add_sublayer( + "conv5_6", + sublayer=DepthwiseSeparable( + num_channels=int(512 * scale), + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=scale, + name="conv5_6")) + self.block_list.append(conv5_6) + + conv6 = self.add_sublayer( + "conv6", + sublayer=DepthwiseSeparable( + num_channels=int(1024 * scale), + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=scale, + name="conv6")) + self.block_list.append(conv6) + + self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) + + self.out = Linear( + int(1024 * scale), + class_dim, + param_attr=ParamAttr( + initializer=MSRA(), name="fc7_weights"), + bias_attr=ParamAttr(name="fc7_offset")) + + def forward(self, inputs): + y = self.conv1(inputs) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, int(1024 * self.scale)]) + y = self.out(y) + return y -def MobileNetV1_x0_25(): - model = MobileNetV1(scale=0.25) +def MobileNetV1_x0_25(**args): + model = MobileNet(scale=0.25, **args) return model -def MobileNetV1_x0_5(): - model = MobileNetV1(scale=0.5) +def MobileNetV1_x0_5(**args): + model = MobileNet(scale=0.5, **args) return model -def MobileNetV1_x1_0(): - model = MobileNetV1(scale=1.0) +def MobileNetV1_x0_75(**args): + model = MobileNet(scale=0.75, **args) return model -def MobileNetV1_x0_75(): - model = MobileNetV1(scale=0.75) +def MobileNetV1(**args): + model = MobileNet(scale=1.0, **args) return model diff --git a/ppcls/modeling/architectures/mobilenet_v2.py b/ppcls/modeling/architectures/mobilenet_v2.py index 8abaa416f4554a50898299e3831cf350b6298cff..f8a5e62d420daf2060d000fc372d0394badaa291 100644 --- a/ppcls/modeling/architectures/mobilenet_v2.py +++ b/ppcls/modeling/architectures/mobilenet_v2.py @@ -1,104 +1,51 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function + +import numpy as np +import paddle import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout + +import math __all__ = [ - 'MobileNetV2_x0_25', 'MobileNetV2_x0_5' - 'MobileNetV2_x0_75', 'MobileNetV2_x1_0', 'MobileNetV2_x1_5', - 'MobileNetV2_x2_0', 'MobileNetV2' + "MobileNetV2_x0_25", "MobileNetV2_x0_5", "MobileNetV2_x0_75", + "MobileNetV2", "MobileNetV2_x1_5", "MobileNetV2_x2_0" ] -class MobileNetV2(): - def __init__(self, scale=1.0): - self.scale = scale - - def net(self, input, class_dim=1000): - scale = self.scale - bottleneck_params_list = [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 3, 2), - (6, 64, 4, 2), - (6, 96, 3, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), - ] - - #conv1 - input = self.conv_bn_layer( - input, - num_filters=int(32 * scale), - filter_size=3, - stride=2, - padding=1, - if_act=True, - name='conv1_1') - - # bottleneck sequences - i = 1 - in_c = int(32 * scale) - for layer_setting in bottleneck_params_list: - t, c, n, s = layer_setting - i += 1 - input = self.invresi_blocks( - input=input, - in_c=in_c, - t=t, - c=int(c * scale), - n=n, - s=s, - name='conv' + str(i)) - in_c = int(c * scale) - #last_conv - input = self.conv_bn_layer( - input=input, - num_filters=int(1280 * scale) if scale > 1.0 else 1280, - filter_size=1, - stride=1, - padding=0, - if_act=True, - name='conv9') - - input = fluid.layers.pool2d( - input=input, pool_type='avg', global_pooling=True) - - output = fluid.layers.fc(input=input, - size=class_dim, - param_attr=ParamAttr(name='fc10_weights'), - bias_attr=ParamAttr(name='fc10_offset')) - return output - - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - if_act=True, - name=None, - use_cudnn=True): - conv = fluid.layers.conv2d( - input=input, +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + name=None, + use_cudnn=True): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, @@ -106,125 +53,197 @@ class MobileNetV2(): groups=num_groups, act=None, use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + '_weights'), + param_attr=ParamAttr(name=name + "_weights"), bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') + + self._batch_norm = BatchNorm( + num_filters, + param_attr=ParamAttr(name=name + "_bn_scale"), + bias_attr=ParamAttr(name=name + "_bn_offset"), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs, if_act=True): + y = self._conv(inputs) + y = self._batch_norm(y) if if_act: - return fluid.layers.relu6(bn) - else: - return bn - - def shortcut(self, input, data_residual): - return fluid.layers.elementwise_add(input, data_residual) - - def inverted_residual_unit(self, - input, - num_in_filter, - num_filters, - ifshortcut, - stride, - filter_size, - padding, - expansion_factor, - name=None): - num_expfilter = int(round(num_in_filter * expansion_factor)) + y = fluid.layers.relu6(y) + return y - channel_expand = self.conv_bn_layer( - input=input, + +class InvertedResidualUnit(fluid.dygraph.Layer): + def __init__(self, num_channels, num_in_filter, num_filters, stride, + filter_size, padding, expansion_factor, name): + super(InvertedResidualUnit, self).__init__() + num_expfilter = int(round(num_in_filter * expansion_factor)) + self._expand_conv = ConvBNLayer( + num_channels=num_channels, num_filters=num_expfilter, filter_size=1, stride=1, padding=0, num_groups=1, - if_act=True, - name=name + '_expand') + name=name + "_expand") - bottleneck_conv = self.conv_bn_layer( - input=channel_expand, + self._bottleneck_conv = ConvBNLayer( + num_channels=num_expfilter, num_filters=num_expfilter, filter_size=filter_size, stride=stride, padding=padding, num_groups=num_expfilter, - if_act=True, - name=name + '_dwise', - use_cudnn=False) + use_cudnn=False, + name=name + "_dwise") - linear_out = self.conv_bn_layer( - input=bottleneck_conv, + self._linear_conv = ConvBNLayer( + num_channels=num_expfilter, num_filters=num_filters, filter_size=1, stride=1, padding=0, num_groups=1, - if_act=False, - name=name + '_linear') + name=name + "_linear") + + def forward(self, inputs, ifshortcut): + y = self._expand_conv(inputs, if_act=True) + y = self._bottleneck_conv(y, if_act=True) + y = self._linear_conv(y, if_act=False) if ifshortcut: - out = self.shortcut(input=input, data_residual=linear_out) - return out - else: - return linear_out - - def invresi_blocks(self, input, in_c, t, c, n, s, name=None): - first_block = self.inverted_residual_unit( - input=input, + y = fluid.layers.elementwise_add(inputs, y) + return y + + +class InvresiBlocks(fluid.dygraph.Layer): + def __init__(self, in_c, t, c, n, s, name): + super(InvresiBlocks, self).__init__() + + self._first_block = InvertedResidualUnit( + num_channels=in_c, num_in_filter=in_c, num_filters=c, - ifshortcut=False, stride=s, filter_size=3, padding=1, expansion_factor=t, - name=name + '_1') - - last_residual_block = first_block - last_c = c + name=name + "_1") + self._block_list = [] for i in range(1, n): - last_residual_block = self.inverted_residual_unit( - input=last_residual_block, - num_in_filter=last_c, - num_filters=c, - ifshortcut=True, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_' + str(i + 1)) - return last_residual_block - - -def MobileNetV2_x0_25(): - model = MobileNetV2(scale=0.25) + block = self.add_sublayer( + name + "_" + str(i + 1), + sublayer=InvertedResidualUnit( + num_channels=c, + num_in_filter=c, + num_filters=c, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + "_" + str(i + 1))) + self._block_list.append(block) + + def forward(self, inputs): + y = self._first_block(inputs, ifshortcut=False) + for block in self._block_list: + y = block(y, ifshortcut=True) + return y + + +class MobileNet(fluid.dygraph.Layer): + def __init__(self, class_dim=1000, scale=1.0): + super(MobileNet, self).__init__() + self.scale = scale + self.class_dim = class_dim + + bottleneck_params_list = [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 3, 2), + (6, 64, 4, 2), + (6, 96, 3, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), + ] + + self.conv1 = ConvBNLayer( + num_channels=3, + num_filters=int(32 * scale), + filter_size=3, + stride=2, + padding=1, + name="conv1_1") + + self.block_list = [] + i = 1 + in_c = int(32 * scale) + for layer_setting in bottleneck_params_list: + t, c, n, s = layer_setting + i += 1 + block = self.add_sublayer( + "conv" + str(i), + sublayer=InvresiBlocks( + in_c=in_c, + t=t, + c=int(c * scale), + n=n, + s=s, + name="conv" + str(i))) + self.block_list.append(block) + in_c = int(c * scale) + + self.out_c = int(1280 * scale) if scale > 1.0 else 1280 + self.conv9 = ConvBNLayer( + num_channels=in_c, + num_filters=self.out_c, + filter_size=1, + stride=1, + padding=0, + name="conv9") + + self.pool2d_avg = Pool2D(pool_type="avg", global_pooling=True) + + self.out = Linear( + self.out_c, + class_dim, + param_attr=ParamAttr(name="fc10_weights"), + bias_attr=ParamAttr(name="fc10_offset")) + + def forward(self, inputs): + y = self.conv1(inputs, if_act=True) + for block in self.block_list: + y = block(y) + y = self.conv9(y, if_act=True) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.out_c]) + y = self.out(y) + return y + + +def MobileNetV2_x0_25(**args): + model = MobileNet(scale=0.25, **args) return model -def MobileNetV2_x0_5(): - model = MobileNetV2(scale=0.5) +def MobileNetV2_x0_5(**args): + model = MobileNet(scale=0.5, **args) return model -def MobileNetV2_x0_75(): - model = MobileNetV2(scale=0.75) +def MobileNetV2_x0_75(**args): + model = MobileNet(scale=0.75, **args) return model -def MobileNetV2_x1_0(): - model = MobileNetV2(scale=1.0) +def MobileNetV2(**args): + model = MobileNet(scale=1.0, **args) return model -def MobileNetV2_x1_5(): - model = MobileNetV2(scale=1.5) +def MobileNetV2_x1_5(**args): + model = MobileNet(scale=1.5, **args) return model -def MobileNetV2_x2_0(): - model = MobileNetV2(scale=2.0) +def MobileNetV2_x2_0(**args): + model = MobileNet(scale=2.0, **args) return model diff --git a/ppcls/modeling/architectures/mobilenet_v3.py b/ppcls/modeling/architectures/mobilenet_v3.py index d2efcedb105a9f95c30c9506cf683864203afbba..eb99dd21f5ad89b9b87f2f7222d41f06ce4d68e8 100644 --- a/ppcls/modeling/architectures/mobilenet_v3.py +++ b/ppcls/modeling/architectures/mobilenet_v3.py @@ -16,320 +16,342 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np +import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout + +import math __all__ = [ - 'MobileNetV3', 'MobileNetV3_small_x0_35', 'MobileNetV3_small_x0_5', - 'MobileNetV3_small_x0_75', 'MobileNetV3_small_x1_0', - 'MobileNetV3_small_x1_25', 'MobileNetV3_large_x0_35', - 'MobileNetV3_large_x0_5', 'MobileNetV3_large_x0_75', - 'MobileNetV3_large_x1_0', 'MobileNetV3_large_x1_25' + "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5", + "MobileNetV3_small_x0_75", "MobileNetV3_small_x1_0", + "MobileNetV3_small_x1_25", "MobileNetV3_large_x0_35", + "MobileNetV3_large_x0_5", "MobileNetV3_large_x0_75", + "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25" ] -class MobileNetV3(): - def __init__(self, - scale=1.0, - model_name='small', - lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0]): - self.scale = scale - self.inplanes = 16 - - self.lr_mult_list = lr_mult_list - assert len(self.lr_mult_list) == 5, \ - "lr_mult_list length in MobileNetV3 must be 5 but got {}!!".format( - len(self.lr_mult_list)) - self.curr_stage = 0 +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + +class MobileNetV3(fluid.dygraph.Layer): + def __init__(self, scale=1.0, model_name="small", class_dim=1000): + super(MobileNetV3, self).__init__() + + inplanes = 16 if model_name == "large": self.cfg = [ # k, exp, c, se, nl, s, - [3, 16, 16, False, 'relu', 1], - [3, 64, 24, False, 'relu', 2], - [3, 72, 24, False, 'relu', 1], - [5, 72, 40, True, 'relu', 2], - [5, 120, 40, True, 'relu', 1], - [5, 120, 40, True, 'relu', 1], - [3, 240, 80, False, 'hard_swish', 2], - [3, 200, 80, False, 'hard_swish', 1], - [3, 184, 80, False, 'hard_swish', 1], - [3, 184, 80, False, 'hard_swish', 1], - [3, 480, 112, True, 'hard_swish', 1], - [3, 672, 112, True, 'hard_swish', 1], - [5, 672, 160, True, 'hard_swish', 2], - [5, 960, 160, True, 'hard_swish', 1], - [5, 960, 160, True, 'hard_swish', 1], + [3, 16, 16, False, "relu", 1], + [3, 64, 24, False, "relu", 2], + [3, 72, 24, False, "relu", 1], + [5, 72, 40, True, "relu", 2], + [5, 120, 40, True, "relu", 1], + [5, 120, 40, True, "relu", 1], + [3, 240, 80, False, "hard_swish", 2], + [3, 200, 80, False, "hard_swish", 1], + [3, 184, 80, False, "hard_swish", 1], + [3, 184, 80, False, "hard_swish", 1], + [3, 480, 112, True, "hard_swish", 1], + [3, 672, 112, True, "hard_swish", 1], + [5, 672, 160, True, "hard_swish", 2], + [5, 960, 160, True, "hard_swish", 1], + [5, 960, 160, True, "hard_swish", 1], ] self.cls_ch_squeeze = 960 self.cls_ch_expand = 1280 - self.lr_interval = 3 elif model_name == "small": self.cfg = [ # k, exp, c, se, nl, s, - [3, 16, 16, True, 'relu', 2], - [3, 72, 24, False, 'relu', 2], - [3, 88, 24, False, 'relu', 1], - [5, 96, 40, True, 'hard_swish', 2], - [5, 240, 40, True, 'hard_swish', 1], - [5, 240, 40, True, 'hard_swish', 1], - [5, 120, 48, True, 'hard_swish', 1], - [5, 144, 48, True, 'hard_swish', 1], - [5, 288, 96, True, 'hard_swish', 2], - [5, 576, 96, True, 'hard_swish', 1], - [5, 576, 96, True, 'hard_swish', 1], + [3, 16, 16, True, "relu", 2], + [3, 72, 24, False, "relu", 2], + [3, 88, 24, False, "relu", 1], + [5, 96, 40, True, "hard_swish", 2], + [5, 240, 40, True, "hard_swish", 1], + [5, 240, 40, True, "hard_swish", 1], + [5, 120, 48, True, "hard_swish", 1], + [5, 144, 48, True, "hard_swish", 1], + [5, 288, 96, True, "hard_swish", 2], + [5, 576, 96, True, "hard_swish", 1], + [5, 576, 96, True, "hard_swish", 1], ] self.cls_ch_squeeze = 576 self.cls_ch_expand = 1280 - self.lr_interval = 2 else: raise NotImplementedError( "mode[{}_model] is not implemented!".format(model_name)) - def net(self, input, class_dim=1000): - scale = self.scale - inplanes = self.inplanes - cfg = self.cfg - cls_ch_squeeze = self.cls_ch_squeeze - cls_ch_expand = self.cls_ch_expand - # conv1 - conv = self.conv_bn_layer( - input, + self.conv1 = ConvBNLayer( + in_c=3, + out_c=make_divisible(inplanes * scale), filter_size=3, - num_filters=self.make_divisible(inplanes * scale), stride=2, padding=1, num_groups=1, if_act=True, - act='hard_swish', - name='conv1') + act="hard_swish", + name="conv1") + + self.block_list = [] i = 0 - inplanes = self.make_divisible(inplanes * scale) - for layer_cfg in cfg: - conv = self.residual_unit( - input=conv, - num_in_filter=inplanes, - num_mid_filter=self.make_divisible(scale * layer_cfg[1]), - num_out_filter=self.make_divisible(scale * layer_cfg[2]), - act=layer_cfg[4], - stride=layer_cfg[5], - filter_size=layer_cfg[0], - use_se=layer_cfg[3], - name='conv' + str(i + 2)) - inplanes = self.make_divisible(scale * layer_cfg[2]) + inplanes = make_divisible(inplanes * scale) + for (k, exp, c, se, nl, s) in self.cfg: + self.block_list.append( + ResidualUnit( + in_c=inplanes, + mid_c=make_divisible(scale * exp), + out_c=make_divisible(scale * c), + filter_size=k, + stride=s, + use_se=se, + act=nl, + name="conv" + str(i + 2))) + self.add_sublayer( + sublayer=self.block_list[-1], name="conv" + str(i + 2)) + inplanes = make_divisible(scale * c) i += 1 - self.curr_stage = i - conv = self.conv_bn_layer( - input=conv, + self.last_second_conv = ConvBNLayer( + in_c=inplanes, + out_c=make_divisible(scale * self.cls_ch_squeeze), filter_size=1, - num_filters=self.make_divisible(scale * cls_ch_squeeze), stride=1, padding=0, num_groups=1, if_act=True, - act='hard_swish', - name='conv_last') - conv = fluid.layers.pool2d( - input=conv, pool_type='avg', global_pooling=True, use_cudnn=False) - conv = fluid.layers.conv2d( - input=conv, - num_filters=cls_ch_expand, + act="hard_swish", + name="conv_last") + + self.pool = Pool2D( + pool_type="avg", global_pooling=True, use_cudnn=False) + + self.last_conv = Conv2D( + num_channels=make_divisible(scale * self.cls_ch_squeeze), + num_filters=self.cls_ch_expand, filter_size=1, stride=1, padding=0, act=None, - param_attr=ParamAttr(name='last_1x1_conv_weights'), + param_attr=ParamAttr(name="last_1x1_conv_weights"), bias_attr=False) - conv = fluid.layers.hard_swish(conv) - drop = fluid.layers.dropout(x=conv, dropout_prob=0.2) - out = fluid.layers.fc(input=drop, - size=class_dim, - param_attr=ParamAttr(name='fc_weights'), - bias_attr=ParamAttr(name='fc_offset')) - return out - - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - if_act=True, - act=None, - name=None, - use_cudnn=True, - res_last_bn_init=False): - lr_idx = self.curr_stage // self.lr_interval - lr_idx = min(lr_idx, len(self.lr_mult_list) - 1) - lr_mult = self.lr_mult_list[lr_idx] - - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, + + self.out = Linear( + input_dim=self.cls_ch_expand, + output_dim=class_dim, + param_attr=ParamAttr("fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs, label=None, dropout_prob=0.2): + x = self.conv1(inputs) + for block in self.block_list: + x = block(x) + x = self.last_second_conv(x) + x = self.pool(x) + x = self.last_conv(x) + x = fluid.layers.hard_swish(x) + x = fluid.layers.dropout(x=x, dropout_prob=dropout_prob) + x = fluid.layers.reshape(x, shape=[x.shape[0], x.shape[1]]) + x = self.out(x) + + return x + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + in_c, + out_c, + filter_size, + stride, + padding, + num_groups=1, + if_act=True, + act=None, + use_cudnn=True, + name=""): + super(ConvBNLayer, self).__init__() + self.if_act = if_act + self.act = act + self.conv = fluid.dygraph.Conv2D( + num_channels=in_c, + num_filters=out_c, filter_size=filter_size, stride=stride, padding=padding, groups=num_groups, - act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, use_cudnn=use_cudnn, + act=None) + self.bn = fluid.dygraph.BatchNorm( + num_channels=out_c, + act=None, param_attr=ParamAttr( - name=name + '_weights', learning_rate=lr_mult), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr( - name=bn_name + "_scale", + name=name + "_bn_scale", regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0)), bias_attr=ParamAttr( - name=bn_name + "_offset", + name=name + "_bn_offset", regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0)), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - if act == 'relu': - bn = fluid.layers.relu(bn) - elif act == 'hard_swish': - bn = fluid.layers.hard_swish(bn) - return bn - - def make_divisible(self, v, divisor=8, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - if new_v < 0.9 * v: - new_v += divisor - return new_v - - def se_block(self, input, num_out_filter, ratio=4, name=None): - lr_idx = self.curr_stage // self.lr_interval - lr_idx = min(lr_idx, len(self.lr_mult_list) - 1) - lr_mult = self.lr_mult_list[lr_idx] - - num_mid_filter = num_out_filter // ratio - pool = fluid.layers.pool2d( - input=input, pool_type='avg', global_pooling=True, use_cudnn=False) - conv1 = fluid.layers.conv2d( - input=pool, - filter_size=1, - num_filters=num_mid_filter, - act='relu', - param_attr=ParamAttr( - name=name + '_1_weights', learning_rate=lr_mult), - bias_attr=ParamAttr( - name=name + '_1_offset', learning_rate=lr_mult)) - conv2 = fluid.layers.conv2d( - input=conv1, - filter_size=1, - num_filters=num_out_filter, - act='hard_sigmoid', - param_attr=ParamAttr( - name=name + '_2_weights', learning_rate=lr_mult), - bias_attr=ParamAttr( - name=name + '_2_offset', learning_rate=lr_mult)) - scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0) - return scale - - def residual_unit(self, - input, - num_in_filter, - num_mid_filter, - num_out_filter, - stride, - filter_size, - act=None, - use_se=False, - name=None): - - conv0 = self.conv_bn_layer( - input=input, + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.if_act: + if self.act == "relu": + x = fluid.layers.relu(x) + elif self.act == "hard_swish": + x = fluid.layers.hard_swish(x) + else: + print("The activation function is selected incorrectly.") + exit() + return x + + +class ResidualUnit(fluid.dygraph.Layer): + def __init__(self, + in_c, + mid_c, + out_c, + filter_size, + stride, + use_se, + act=None, + name=''): + super(ResidualUnit, self).__init__() + self.if_shortcut = stride == 1 and in_c == out_c + self.if_se = use_se + + self.expand_conv = ConvBNLayer( + in_c=in_c, + out_c=mid_c, filter_size=1, - num_filters=num_mid_filter, stride=1, padding=0, if_act=True, act=act, - name=name + '_expand') - - conv1 = self.conv_bn_layer( - input=conv0, + name=name + "_expand") + self.bottleneck_conv = ConvBNLayer( + in_c=mid_c, + out_c=mid_c, filter_size=filter_size, - num_filters=num_mid_filter, stride=stride, padding=int((filter_size - 1) // 2), + num_groups=mid_c, if_act=True, act=act, - num_groups=num_mid_filter, - use_cudnn=False, - name=name + '_depthwise') - if use_se: - conv1 = self.se_block( - input=conv1, num_out_filter=num_mid_filter, name=name + '_se') - - conv2 = self.conv_bn_layer( - input=conv1, + name=name + "_depthwise") + if self.if_se: + self.mid_se = SEModule(mid_c, name=name + "_se") + self.linear_conv = ConvBNLayer( + in_c=mid_c, + out_c=out_c, filter_size=1, - num_filters=num_out_filter, stride=1, padding=0, if_act=False, - name=name + '_linear', - res_last_bn_init=True) - if num_in_filter != num_out_filter or stride != 1: - return conv2 - else: - return fluid.layers.elementwise_add(x=input, y=conv2, act=None) + act=None, + name=name + "_linear") + + def forward(self, inputs): + x = self.expand_conv(inputs) + x = self.bottleneck_conv(x) + if self.if_se: + x = self.mid_se(x) + x = self.linear_conv(x) + if self.if_shortcut: + x = fluid.layers.elementwise_add(inputs, x) + return x + + +class SEModule(fluid.dygraph.Layer): + def __init__(self, channel, reduction=4, name=""): + super(SEModule, self).__init__() + self.avg_pool = fluid.dygraph.Pool2D( + pool_type="avg", global_pooling=True, use_cudnn=False) + self.conv1 = fluid.dygraph.Conv2D( + num_channels=channel, + num_filters=channel // reduction, + filter_size=1, + stride=1, + padding=0, + act="relu", + param_attr=ParamAttr(name=name + "_1_weights"), + bias_attr=ParamAttr(name=name + "_1_offset")) + self.conv2 = fluid.dygraph.Conv2D( + num_channels=channel // reduction, + num_filters=channel, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name + "_2_weights"), + bias_attr=ParamAttr(name=name + "_2_offset")) + + def forward(self, inputs): + outputs = self.avg_pool(inputs) + outputs = self.conv1(outputs) + outputs = self.conv2(outputs) + outputs = fluid.layers.hard_sigmoid(outputs) + return fluid.layers.elementwise_mul(x=inputs, y=outputs, axis=0) -def MobileNetV3_small_x0_35(): - model = MobileNetV3(model_name='small', scale=0.35) +def MobileNetV3_small_x0_35(**args): + model = MobileNetV3(model_name="small", scale=0.35, **args) return model -def MobileNetV3_small_x0_5(): - model = MobileNetV3(model_name='small', scale=0.5) +def MobileNetV3_small_x0_5(**args): + model = MobileNetV3(model_name="small", scale=0.5, **args) return model -def MobileNetV3_small_x0_75(): - model = MobileNetV3(model_name='small', scale=0.75) +def MobileNetV3_small_x0_75(**args): + model = MobileNetV3(model_name="small", scale=0.75, **args) return model def MobileNetV3_small_x1_0(**args): - model = MobileNetV3(model_name='small', scale=1.0, **args) + model = MobileNetV3(model_name="small", scale=1.0, **args) return model -def MobileNetV3_small_x1_25(): - model = MobileNetV3(model_name='small', scale=1.25) +def MobileNetV3_small_x1_25(**args): + model = MobileNetV3(model_name="small", scale=1.25, **args) return model -def MobileNetV3_large_x0_35(): - model = MobileNetV3(model_name='large', scale=0.35) +def MobileNetV3_large_x0_35(**args): + model = MobileNetV3(model_name="large", scale=0.35, **args) return model -def MobileNetV3_large_x0_5(): - model = MobileNetV3(model_name='large', scale=0.5) +def MobileNetV3_large_x0_5(**args): + model = MobileNetV3(model_name="large", scale=0.5, **args) return model -def MobileNetV3_large_x0_75(): - model = MobileNetV3(model_name='large', scale=0.75) +def MobileNetV3_large_x0_75(**args): + model = MobileNetV3(model_name="large", scale=0.75, **args) return model def MobileNetV3_large_x1_0(**args): - model = MobileNetV3(model_name='large', scale=1.0, **args) + model = MobileNetV3(model_name="large", scale=1.0, **args) return model -def MobileNetV3_large_x1_25(): - model = MobileNetV3(model_name='large', scale=1.25) +def MobileNetV3_large_x1_25(**args): + model = MobileNetV3(model_name="large", scale=1.25, **args) return model diff --git a/ppcls/modeling/architectures/res2net.py b/ppcls/modeling/architectures/res2net.py index e6b11803627fd1b513fcc568e797e58081856041..ed4dcd8056f608912b9e73b8bf4646f7acb87d6a 100644 --- a/ppcls/modeling/architectures/res2net.py +++ b/ppcls/modeling/architectures/res2net.py @@ -1,47 +1,165 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np import paddle import paddle.fluid as fluid -import math from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout + +import math __all__ = [ - "Res2Net", "Res2Net50_48w_2s", "Res2Net50_26w_4s", "Res2Net50_14w_8s", - "Res2Net50_26w_6s", "Res2Net50_26w_8s", "Res2Net101_26w_4s", - "Res2Net152_26w_4s" + "Res2Net50_48w_2s", "Res2Net50_26w_4s", "Res2Net50_14w_8s", + "Res2Net50_48w_2s", "Res2Net50_26w_6s", "Res2Net50_26w_8s", + "Res2Net101_26w_4s", "Res2Net152_26w_4s", "Res2Net200_26w_4s" ] -class Res2Net(): - def __init__(self, layers=50, scales=4, width=26): +class ConvBNLayer(fluid.dygraph.Layer): + def __init__( + self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None, ): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels1, + num_channels2, + num_filters, + stride, + scales, + shortcut=True, + if_first=False, + name=None): + super(BottleneckBlock, self).__init__() + self.stride = stride + self.scales = scales + self.conv0 = ConvBNLayer( + num_channels=num_channels1, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + self.conv1_list = [] + for s in range(scales - 1): + conv1 = self.add_sublayer( + name + '_branch2b_' + str(s + 1), + ConvBNLayer( + num_channels=num_filters // scales, + num_filters=num_filters // scales, + filter_size=3, + stride=stride, + act='relu', + name=name + '_branch2b_' + str(s + 1))) + self.conv1_list.append(conv1) + self.pool2d_avg = Pool2D( + pool_size=3, pool_stride=stride, pool_padding=1, pool_type='avg') + + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_channels2, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels1, + num_filters=num_channels2, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + xs = fluid.layers.split(y, self.scales, 1) + ys = [] + for s, conv1 in enumerate(self.conv1_list): + if s == 0 or self.stride == 2: + ys.append(conv1(xs[s])) + else: + ys.append(conv1(xs[s] + ys[-1])) + if self.stride == 1: + ys.append(xs[-1]) + else: + ys.append(self.pool2d_avg(xs[-1])) + conv1 = fluid.layers.concat(ys, axis=1) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=conv2) + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class Res2Net(fluid.dygraph.Layer): + def __init__(self, layers=50, scales=4, width=26, class_dim=1000): + super(Res2Net, self).__init__() + self.layers = layers self.scales = scales self.width = width - - def net(self, input, class_dim=1000): - layers = self.layers - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) basic_width = self.width * self.scales - num_filters1 = [basic_width * t for t in [1, 2, 4, 8]] - num_filters2 = [256 * t for t in [1, 2, 4, 8]] + supported_layers = [50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) if layers == 50: depth = [3, 4, 6, 3] @@ -49,22 +167,25 @@ class Res2Net(): depth = [3, 4, 23, 3] elif layers == 152: depth = [3, 8, 36, 3] - conv = self.conv_bn_layer( - input=input, + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024] + num_channels2 = [256, 512, 1024, 2048] + num_filters = [basic_width * t for t in [1, 2, 4, 8]] + + self.conv1 = ConvBNLayer( + num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="conv1") + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - + self.block_list = [] for block in range(len(depth)): + shortcut = False for i in range(depth[block]): if layers in [101, 152] and block == 2: if i == 0: @@ -73,153 +194,87 @@ class Res2Net(): conv_name = "res" + str(block + 2) + "b" + str(i) else: conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters1=num_filters1[block], - num_filters2=num_filters2[block], - stride=2 if i == 0 and block != 0 else 1, - name=conv_name) - pool = fluid.layers.pool2d( - input=conv, - pool_size=7, - pool_stride=1, - pool_type='avg', - global_pooling=True) - - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels1=num_channels[block] + if i == 0 else num_channels2[block], + num_channels2=num_channels2[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + scales=scales, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + param_attr=ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc_weights'), - bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) - return out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def shortcut(self, input, ch_out, stride, name): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters1, num_filters2, stride, - name): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters1, - filter_size=1, - stride=1, - act='relu', - name=name + '_branch2a') - xs = fluid.layers.split(conv0, self.scales, 1) - ys = [] - for s in range(self.scales - 1): - if s == 0 or stride == 2: - ys.append( - self.conv_bn_layer( - input=xs[s], - num_filters=num_filters1 // self.scales, - stride=stride, - filter_size=3, - act='relu', - name=name + '_branch2b_' + str(s + 1))) - else: - ys.append( - self.conv_bn_layer( - input=xs[s] + ys[-1], - num_filters=num_filters1 // self.scales, - stride=stride, - filter_size=3, - act='relu', - name=name + '_branch2b_' + str(s + 1))) - if stride == 1: - ys.append(xs[-1]) - else: - ys.append( - fluid.layers.pool2d( - input=xs[-1], - pool_size=3, - pool_stride=stride, - pool_padding=1, - pool_type='avg')) - - conv1 = fluid.layers.concat(ys, axis=1) - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters2, - filter_size=1, - act=None, - name=name + "_branch2c") + name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs): + y = self.conv1(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def Res2Net50_48w_2s(**args): + model = Res2Net(layers=50, scales=2, width=48, **args) + return model - short = self.shortcut( - input, num_filters2, stride, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') +def Res2Net50_26w_4s(**args): + model = Res2Net(layers=50, scales=4, width=26, **args) + return model -def Res2Net50_48w_2s(): - model = Res2Net(layers=50, scales=2, width=48) +def Res2Net50_14w_8s(**args): + model = Res2Net(layers=50, scales=8, width=14, **args) return model -def Res2Net50_26w_4s(): - model = Res2Net(layers=50, scales=4, width=26) +def Res2Net50_48w_2s(**args): + model = Res2Net(layers=50, scales=2, width=48, **args) return model -def Res2Net50_14w_8s(): - model = Res2Net(layers=50, scales=8, width=14) +def Res2Net50_26w_6s(**args): + model = Res2Net(layers=50, scales=6, width=26, **args) return model -def Res2Net50_26w_6s(): - model = Res2Net(layers=50, scales=6, width=26) +def Res2Net50_26w_8s(**args): + model = Res2Net(layers=50, scales=8, width=26, **args) return model -def Res2Net50_26w_8s(): - model = Res2Net(layers=50, scales=8, width=26) +def Res2Net101_26w_4s(**args): + model = Res2Net(layers=101, scales=4, width=26, **args) return model -def Res2Net101_26w_4s(): - model = Res2Net(layers=101, scales=4, width=26) +def Res2Net152_26w_4s(**args): + model = Res2Net(layers=152, scales=4, width=26, **args) return model -def Res2Net152_26w_4s(): - model = Res2Net(layers=152, scales=4, width=26) +def Res2Net200_26w_4s(**args): + model = Res2Net(layers=200, scales=4, width=26, **args) return model diff --git a/ppcls/modeling/architectures/res2net_vd.py b/ppcls/modeling/architectures/res2net_vd.py index 464db8b71fd2b53258d6ed947129716741df1478..1cc397e87f5341ea158fcd839a493fdb278e905d 100644 --- a/ppcls/modeling/architectures/res2net_vd.py +++ b/ppcls/modeling/architectures/res2net_vd.py @@ -16,33 +16,158 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import math - +import numpy as np +import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout + +import math __all__ = [ - "Res2Net_vd", "Res2Net50_vd_48w_2s", "Res2Net50_vd_26w_4s", - "Res2Net50_vd_14w_8s", "Res2Net50_vd_26w_6s", "Res2Net50_vd_26w_8s", + "Res2Net50_vd_48w_2s", "Res2Net50_vd_26w_4s", "Res2Net50_vd_14w_8s", + "Res2Net50_vd_48w_2s", "Res2Net50_vd_26w_6s", "Res2Net50_vd_26w_8s", "Res2Net101_vd_26w_4s", "Res2Net152_vd_26w_4s", "Res2Net200_vd_26w_4s" ] -class Res2Net_vd(): - def __init__(self, layers=50, scales=4, width=26): +class ConvBNLayer(fluid.dygraph.Layer): + def __init__( + self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + is_vd_mode=False, + act=None, + name=None, ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = Pool2D( + pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg', ceil_mode=True) + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels1, + num_channels2, + num_filters, + stride, + scales, + shortcut=True, + if_first=False, + name=None): + super(BottleneckBlock, self).__init__() + self.stride = stride + self.scales = scales + self.conv0 = ConvBNLayer( + num_channels=num_channels1, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + self.conv1_list = [] + for s in range(scales - 1): + conv1 = self.add_sublayer( + name + '_branch2b_' + str(s + 1), + ConvBNLayer( + num_channels=num_filters // scales, + num_filters=num_filters // scales, + filter_size=3, + stride=stride, + act='relu', + name=name + '_branch2b_' + str(s + 1))) + self.conv1_list.append(conv1) + self.pool2d_avg = Pool2D( + pool_size=3, pool_stride=stride, pool_padding=1, pool_type='avg') + + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_channels2, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels1, + num_filters=num_channels2, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + xs = fluid.layers.split(y, self.scales, 1) + ys = [] + for s, conv1 in enumerate(self.conv1_list): + if s == 0 or self.stride == 2: + ys.append(conv1(xs[s])) + else: + ys.append(conv1(xs[s] + ys[-1])) + if self.stride == 1: + ys.append(xs[-1]) + else: + ys.append(self.pool2d_avg(xs[-1])) + conv1 = fluid.layers.concat(ys, axis=1) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=conv2) + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class Res2Net_vd(fluid.dygraph.Layer): + def __init__(self, layers=50, scales=4, width=26, class_dim=1000): + super(Res2Net_vd, self).__init__() + self.layers = layers self.scales = scales self.width = width - - def net(self, input, class_dim=1000): - layers = self.layers + basic_width = self.width * self.scales supported_layers = [50, 101, 152, 200] assert layers in supported_layers, \ "supported layers are {} but input layer is {}".format( supported_layers, layers) - basic_width = self.width * self.scales - num_filters1 = [basic_width * t for t in [1, 2, 4, 8]] - num_filters2 = [256 * t for t in [1, 2, 4, 8]] + if layers == 50: depth = [3, 4, 6, 3] elif layers == 101: @@ -51,35 +176,37 @@ class Res2Net_vd(): depth = [3, 8, 36, 3] elif layers == 200: depth = [3, 12, 48, 3] - conv = self.conv_bn_layer( - input=input, + num_channels = [64, 256, 512, 1024] + num_channels2 = [256, 512, 1024, 2048] + num_filters = [basic_width * t for t in [1, 2, 4, 8]] + + self.conv1_1 = ConvBNLayer( + num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, + name="conv1_1") + self.conv1_2 = ConvBNLayer( + num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, + name="conv1_2") + self.conv1_3 = ConvBNLayer( + num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', - name='conv1_3') - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') + name="conv1_3") + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + self.block_list = [] for block in range(len(depth)): + shortcut = False for i in range(depth[block]): if layers in [101, 152, 200] and block == 2: if i == 0: @@ -88,207 +215,89 @@ class Res2Net_vd(): conv_name = "res" + str(block + 2) + "b" + str(i) else: conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters1=num_filters1[block], - num_filters2=num_filters2[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == i == 0, - name=conv_name) - pool = fluid.layers.pool2d( - input=conv, - pool_size=7, - pool_stride=1, - pool_type='avg', - global_pooling=True) - - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels1=num_channels[block] + if i == 0 else num_channels2[block], + num_channels2=num_channels2[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + scales=scales, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + param_attr=ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc_weights'), - bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) - return out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg', - ceil_mode=True) - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new( - input, ch_out, 1, stride, name=name) - elif if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters1, num_filters2, stride, name, - if_first): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters1, - filter_size=1, - stride=1, - act='relu', - name=name + '_branch2a') - - xs = fluid.layers.split(conv0, self.scales, 1) - ys = [] - for s in range(self.scales - 1): - if s == 0 or stride == 2: - ys.append( - self.conv_bn_layer( - input=xs[s], - num_filters=num_filters1 // self.scales, - stride=stride, - filter_size=3, - act='relu', - name=name + '_branch2b_' + str(s + 1))) - else: - ys.append( - self.conv_bn_layer( - input=xs[s] + ys[-1], - num_filters=num_filters1 // self.scales, - stride=stride, - filter_size=3, - act='relu', - name=name + '_branch2b_' + str(s + 1))) - - if stride == 1: - ys.append(xs[-1]) - else: - ys.append( - fluid.layers.pool2d( - input=xs[-1], - pool_size=3, - pool_stride=stride, - pool_padding=1, - pool_type='avg')) - - conv1 = fluid.layers.concat(ys, axis=1) - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters2, - filter_size=1, - act=None, - name=name + "_branch2c") - - short = self.shortcut( - input, - num_filters2, - stride, - if_first=if_first, - name=name + "_branch1") - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def Res2Net50_vd_48w_2s(**args): + model = Res2Net_vd(layers=50, scales=2, width=48, **args) + return model -def Res2Net50_vd_48w_2s(): - model = Res2Net_vd(layers=50, scales=2, width=48) +def Res2Net50_vd_26w_4s(**args): + model = Res2Net_vd(layers=50, scales=4, width=26, **args) return model -def Res2Net50_vd_26w_4s(): - model = Res2Net_vd(layers=50, scales=4, width=26) +def Res2Net50_vd_14w_8s(**args): + model = Res2Net_vd(layers=50, scales=8, width=14, **args) return model -def Res2Net50_vd_14w_8s(): - model = Res2Net_vd(layers=50, scales=8, width=14) +def Res2Net50_vd_48w_2s(**args): + model = Res2Net_vd(layers=50, scales=2, width=48, **args) return model -def Res2Net50_vd_26w_6s(): - model = Res2Net_vd(layers=50, scales=6, width=26) +def Res2Net50_vd_26w_6s(**args): + model = Res2Net_vd(layers=50, scales=6, width=26, **args) return model -def Res2Net50_vd_26w_8s(): - model = Res2Net_vd(layers=50, scales=8, width=26) +def Res2Net50_vd_26w_8s(**args): + model = Res2Net_vd(layers=50, scales=8, width=26, **args) return model -def Res2Net101_vd_26w_4s(): - model = Res2Net_vd(layers=101, scales=4, width=26) +def Res2Net101_vd_26w_4s(**args): + model = Res2Net_vd(layers=101, scales=4, width=26, **args) return model -def Res2Net152_vd_26w_4s(): - model = Res2Net_vd(layers=152, scales=4, width=26) +def Res2Net152_vd_26w_4s(**args): + model = Res2Net_vd(layers=152, scales=4, width=26, **args) return model -def Res2Net200_vd_26w_4s(): - model = Res2Net_vd(layers=200, scales=4, width=26) +def Res2Net200_vd_26w_4s(**args): + model = Res2Net_vd(layers=200, scales=4, width=26, **args) return model diff --git a/ppcls/modeling/architectures/resnet.py b/ppcls/modeling/architectures/resnet.py index 7a137f8c7b93cfac538bc95db7173c8e80815ef6..925e8c3976d1e9d7e3771654545fbe0d2ef63c7f 100644 --- a/ppcls/modeling/architectures/resnet.py +++ b/ppcls/modeling/architectures/resnet.py @@ -12,19 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout import math -__all__ = [ - "ResNet18", - "ResNet34", - "ResNet50", - "ResNet101", - "ResNet152", -] +__all__ = ["ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"] class ConvBNLayer(fluid.dygraph.Layer): @@ -34,7 +35,8 @@ class ConvBNLayer(fluid.dygraph.Layer): filter_size, stride=1, groups=1, - act=None): + act=None, + name=None): super(ConvBNLayer, self).__init__() self._conv = Conv2D( @@ -45,37 +47,54 @@ class ConvBNLayer(fluid.dygraph.Layer): padding=(filter_size - 1) // 2, groups=groups, act=None, + param_attr=ParamAttr(name=name + "_weights"), bias_attr=False) - - self._batch_norm = BatchNorm(num_filters, act=act) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(bn_name + "_offset"), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance") def forward(self, inputs): y = self._conv(inputs) y = self._batch_norm(y) - return y class BottleneckBlock(fluid.dygraph.Layer): - def __init__(self, num_channels, num_filters, stride, shortcut=True): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + name=None): super(BottleneckBlock, self).__init__() self.conv0 = ConvBNLayer( num_channels=num_channels, num_filters=num_filters, filter_size=1, - act='relu') + act="relu", + name=name + "_branch2a") self.conv1 = ConvBNLayer( num_channels=num_filters, num_filters=num_filters, filter_size=3, stride=stride, - act='relu') + act="relu", + name=name + "_branch2b") self.conv2 = ConvBNLayer( num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, - act=None) + act=None, + name=name + "_branch2c") self.shortcut = shortcut @@ -84,7 +103,8 @@ class BottleneckBlock(fluid.dygraph.Layer): num_channels=num_channels, num_filters=num_filters * 4, filter_size=1, - stride=stride) + stride=stride, + name=name + "_branch1") self._num_channels_out = num_filters * 4 @@ -100,7 +120,54 @@ class BottleneckBlock(fluid.dygraph.Layer): y = fluid.layers.elementwise_add(x=short, y=conv2) - layer_helper = LayerHelper(self.full_name(), act='relu') + layer_helper = LayerHelper(self.full_name(), act="relu") + return layer_helper.append_activation(y) + + +class BasicBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + name=None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=conv1) + + layer_helper = LayerHelper(self.full_name(), act="relu") return layer_helper.append_activation(y) @@ -108,9 +175,15 @@ class ResNet(fluid.dygraph.Layer): def __init__(self, layers=50, class_dim=1000): super(ResNet, self).__init__() + self.layers = layers + supported_layers = [18, 34, 50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + if layers == 18: depth = [2, 2, 2, 2] - elif layers == 18 or layers == 50: + elif layers == 34 or layers == 50: depth = [3, 4, 6, 3] elif layers == 101: depth = [3, 4, 23, 3] @@ -118,77 +191,105 @@ class ResNet(fluid.dygraph.Layer): depth = [3, 8, 36, 3] else: raise ValueError('Input layer is not supported') - num_channels = [64, 256, 512, 1024] - num_filters = [64, 128, 256, 512] + num_channels = [64, 256, 512, + 1024] if layers >= 50 else [64, 64, 128, 256] self.conv = ConvBNLayer( num_channels=3, num_filters=64, filter_size=7, stride=2, - act='relu') + act="relu", + name="conv1") self.pool2d_max = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - - self.bottleneck_block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock( - num_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut)) - self.bottleneck_block_list.append(bottleneck_block) - shortcut = True + pool_size=3, pool_stride=2, pool_padding=1, pool_type="max") + + self.block_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + conv_name, + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + else: + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + conv_name, + BasicBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(basic_block) + shortcut = True self.pool2d_avg = Pool2D( pool_size=7, pool_type='avg', global_pooling=True) - self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 4 * 1 * 1 + self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(2048 * 1.0) + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) self.out = Linear( - self.pool2d_avg_output, + self.pool2d_avg_channels, class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) def forward(self, inputs): y = self.conv(inputs) y = self.pool2d_max(y) - for bottleneck_block in self.bottleneck_block_list: - y = bottleneck_block(y) + for block in self.block_list: + y = block(y) y = self.pool2d_avg(y) - y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output]) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels]) y = self.out(y) return y -def ResNet18(**kwargs): - model = ResNet(layers=18, **kwargs) +def ResNet18(**args): + model = ResNet(layers=18, **args) return model -def ResNet34(**kwargs): - model = ResNet(layers=34, **kwargs) +def ResNet34(**args): + model = ResNet(layers=34, **args) return model -def ResNet50(**kwargs): - model = ResNet(layers=50, **kwargs) +def ResNet50(**args): + model = ResNet(layers=50, **args) return model -def ResNet101(**kwargs): - model = ResNet(layers=101, **kwargs) +def ResNet101(**args): + model = ResNet(layers=101, **args) return model -def ResNet152(**kwargs): - model = ResNet(layers=152, **kwargs) +def ResNet152(**args): + model = ResNet(layers=152, **args) return model diff --git a/ppcls/modeling/architectures/resnet_vc.py b/ppcls/modeling/architectures/resnet_vc.py index 36e7e59438f994e3c995a2ba654333f716a2241c..b568d0207f946fea589a49d9902a7d61fa478a83 100644 --- a/ppcls/modeling/architectures/resnet_vc.py +++ b/ppcls/modeling/architectures/resnet_vc.py @@ -1,127 +1,48 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function -import math - +import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout -__all__ = ["ResNet", "ResNet50_vc", "ResNet101_vc", "ResNet152_vc"] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - +import math -class ResNet(): - def __init__(self, layers=50): - self.params = train_parameters - self.layers = layers +__all__ = [ + "ResNet18_vc", "ResNet34_vc", "ResNet50_vc", "ResNet101_vc", "ResNet152_vc" +] - def net(self, input, class_dim=1000): - layers = self.layers - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + super(ConvBNLayer, self).__init__() - conv = self.conv_bn_layer( - input=input, - num_filters=32, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=32, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - name="fc_0.w_0", - initializer=fluid.initializer.Uniform(-stdv, - stdv)), - bias_attr=ParamAttr(name="fc_0.b_0")) - return out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, + self._conv = Conv2D( + num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, @@ -129,66 +50,264 @@ class ResNet(): groups=groups, act=None, param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - name=name + '.conv2d.output.1') + bias_attr=False) if name == "conv1": bn_name = "bn_" + name else: bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, + self._batch_norm = BatchNorm( + num_filters, act=act, - name=bn_name + '.output.1', param_attr=ParamAttr(name=bn_name + '_scale'), bias_attr=ParamAttr(bn_name + '_offset'), moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', ) + moving_variance_name=bn_name + '_variance') - def shortcut(self, input, ch_out, stride, name): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + name=None): + super(BottleneckBlock, self).__init__() - def bottleneck_block(self, input, num_filters, stride, name): - conv0 = self.conv_bn_layer( - input=input, + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, stride=stride, act='relu', name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") - short = self.shortcut( - input, num_filters * 4, stride, name=name + "_branch1") + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + self._num_channels_out = num_filters * 4 + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = fluid.layers.elementwise_add(x=short, y=conv2) + + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class BasicBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + name=None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=conv1) + + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class ResNet_vc(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=1000): + super(ResNet_vc, self).__init__() + + self.layers = layers + supported_layers = [18, 34, 50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, + 1024] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] - return fluid.layers.elementwise_add( - x=short, y=conv2, act='relu', name=name + ".add.output.5") + self.conv1_1 = ConvBNLayer( + num_channels=3, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = ConvBNLayer( + num_channels=32, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = ConvBNLayer( + num_channels=32, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name="conv1_3") + + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + self.block_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + else: + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(basic_block) + shortcut = True + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + def forward(self, inputs): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def ResNet18_vc(**args): + model = ResNet_vc(layers=18, **args) + return model + + +def ResNet34_vc(**args): + model = ResNet_vc(layers=34, **args) + return model -def ResNet50_vc(): - model = ResNet(layers=50) +def ResNet50_vc(**args): + model = ResNet_vc(layers=50, **args) return model -def ResNet101_vc(): - model = ResNet(layers=101) +def ResNet101_vc(**args): + model = ResNet_vc(layers=101, **args) return model -def ResNet152_vc(): - model = ResNet(layers=152) +def ResNet152_vc(**args): + model = ResNet_vc(layers=152, **args) return model diff --git a/ppcls/modeling/architectures/resnet_vd.py b/ppcls/modeling/architectures/resnet_vd.py index 6d0f3c4f31345e8c6d196b3ca4013f067dc806ce..9a7ba5fce1e61520dff3f00c9ce2c5124b1bb9a3 100644 --- a/ppcls/modeling/architectures/resnet_vd.py +++ b/ppcls/modeling/architectures/resnet_vd.py @@ -1,314 +1,333 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function -import math - +import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout + +import math __all__ = [ - "ResNet", "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", - "ResNet152_vd", "ResNet200_vd" + "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd" ] -class ResNet(): - def __init__(self, - layers=50, - is_3x3=False, - postfix_name="", - lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0]): - self.layers = layers - self.is_3x3 = is_3x3 - self.postfix_name = "" if postfix_name is None else postfix_name - self.lr_mult_list = lr_mult_list - assert len( - self.lr_mult_list - ) == 5, "lr_mult_list length in ResNet must be 5 but got {}!!".format( - len(self.lr_mult_list)) - self.curr_stage = 0 - - def net(self, input, class_dim=1000): - is_3x3 = self.is_3x3 - layers = self.layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_filters = [64, 128, 256, 512] - if is_3x3 == False: - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - else: - conv = self.conv_bn_layer( - input=input, - num_filters=32, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=32, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - if layers >= 50: - for block in range(len(depth)): - self.curr_stage += 1 - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == i == 0, - name=conv_name) - else: - for block in range(len(depth)): - self.curr_stage += 1 - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.basic_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == i == 0, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - name="fc_0.w_0" + self.postfix_name, - initializer=fluid.initializer.Uniform(-stdv, stdv)), - bias_attr=ParamAttr(name="fc_0.b_0" + self.postfix_name)) - - return out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - lr_mult = self.lr_mult_list[self.curr_stage] - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, +class ConvBNLayer(fluid.dygraph.Layer): + def __init__( + self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + is_vd_mode=False, act=None, - param_attr=ParamAttr(name=name + "_weights" + self.postfix_name), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale' + self.postfix_name), - bias_attr=ParamAttr(bn_name + '_offset' + self.postfix_name), - moving_mean_name=bn_name + '_mean' + self.postfix_name, - moving_variance_name=bn_name + '_variance' + self.postfix_name) - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - lr_mult = self.lr_mult_list[self.curr_stage] - pool = fluid.layers.pool2d( - input=input, + name=None, ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = Pool2D( pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg', ceil_mode=True) - - conv = fluid.layers.conv2d( - input=pool, + self._conv = Conv2D( + num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, - stride=1, + stride=stride, padding=(filter_size - 1) // 2, groups=groups, act=None, - param_attr=ParamAttr( - name=name + "_weights" + self.postfix_name, - learning_rate=lr_mult), + param_attr=ParamAttr(name=name + "_weights"), bias_attr=False) if name == "conv1": bn_name = "bn_" + name else: bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, + self._batch_norm = BatchNorm( + num_filters, act=act, - param_attr=ParamAttr( - name=bn_name + '_scale' + self.postfix_name, - learning_rate=lr_mult), - bias_attr=ParamAttr( - bn_name + '_offset' + self.postfix_name, - learning_rate=lr_mult), - moving_mean_name=bn_name + '_mean' + self.postfix_name, - moving_variance_name=bn_name + '_variance' + self.postfix_name) - - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new( - input, ch_out, 1, stride, name=name) - elif if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') - def bottleneck_block(self, input, num_filters, stride, name, if_first): - conv0 = self.conv_bn_layer( - input=input, + def forward(self, inputs): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + if_first=False, + name=None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, stride=stride, act='relu', name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") - short = self.shortcut( - input, - num_filters * 4, - stride, - if_first=if_first, - name=name + "_branch1") + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=conv2) - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) - def basic_block(self, input, num_filters, stride, name, if_first): - conv0 = self.conv_bn_layer( - input=input, + +class BasicBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + if_first=False, + name=None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=3, - act='relu', stride=stride, + act='relu', name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") - short = self.shortcut( - input, - num_filters, - stride, - if_first=if_first, - name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=conv1) + + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class ResNet_vd(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=1000): + super(ResNet_vd, self).__init__() + + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, + 1024] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer( + num_channels=3, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = ConvBNLayer( + num_channels=32, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = ConvBNLayer( + num_channels=32, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name="conv1_3") + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') -def ResNet18_vd(): - model = ResNet(layers=18, is_3x3=True) + self.block_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + else: + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(basic_block) + shortcut = True + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + def forward(self, inputs): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def ResNet18_vd(**args): + model = ResNet_vd(layers=18, **args) return model -def ResNet34_vd(): - model = ResNet(layers=34, is_3x3=True) +def ResNet34_vd(**args): + model = ResNet_vd(layers=34, **args) return model def ResNet50_vd(**args): - model = ResNet(layers=50, is_3x3=True, **args) + model = ResNet_vd(layers=50, **args) return model -def ResNet101_vd(): - model = ResNet(layers=101, is_3x3=True) +def ResNet101_vd(**args): + model = ResNet_vd(layers=101, **args) return model -def ResNet152_vd(): - model = ResNet(layers=152, is_3x3=True) +def ResNet152_vd(**args): + model = ResNet_vd(layers=152, **args) return model -def ResNet200_vd(): - model = ResNet(layers=200, is_3x3=True) +def ResNet200_vd(**args): + model = ResNet_vd(layers=200, **args) return model diff --git a/ppcls/modeling/architectures/resnext.py b/ppcls/modeling/architectures/resnext.py index 2a5f46150f387c8e382ca34cf8acb451d0c77adc..c2200c6f35611708c48ada3b40bf24b1e559b2d0 100644 --- a/ppcls/modeling/architectures/resnext.py +++ b/ppcls/modeling/architectures/resnext.py @@ -1,108 +1,49 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function -import math - +import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout + +import math __all__ = [ - "ResNeXt", "ResNeXt50_64x4d", "ResNeXt101_64x4d", "ResNeXt152_64x4d", - "ResNeXt50_32x4d", "ResNeXt101_32x4d", "ResNeXt152_32x4d" + "ResNeXt50_32x4d", "ResNeXt50_64x4d", "ResNeXt101_32x4d", + "ResNeXt101_64x4d", "ResNeXt152_32x4d", "ResNeXt152_64x4d" ] -class ResNeXt(): - def __init__(self, layers=50, cardinality=64): - self.layers = layers - self.cardinality = cardinality - - def net(self, input, class_dim=1000): - layers = self.layers - cardinality = self.cardinality - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - - num_filters1 = [256, 512, 1024, 2048] - num_filters2 = [128, 256, 512, 1024] +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + super(ConvBNLayer, self).__init__() - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu', - name="res_conv1") #debug - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters1[block] - if cardinality == 64 else num_filters2[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=cardinality, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc_weights'), - bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) - return out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, + self._conv = Conv2D( + num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, @@ -110,86 +51,192 @@ class ResNeXt(): groups=groups, act=None, param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - name=name + '.conv2d.output.1') + bias_attr=False) if name == "conv1": bn_name = "bn_" + name else: bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, + self._batch_norm = BatchNorm( + num_filters, act=act, - name=bn_name + '.output.1', param_attr=ParamAttr(name=bn_name + '_scale'), bias_attr=ParamAttr(bn_name + '_offset'), moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', ) - - def shortcut(self, input, ch_out, stride, name): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, cardinality, name): - cardinality = self.cardinality - conv0 = self.conv_bn_layer( - input=input, + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + cardinality, + shortcut=True, + name=None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, - stride=stride, groups=cardinality, + stride=stride, act='relu', name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters if cardinality == 64 else num_filters * 2, + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, filter_size=1, act=None, name=name + "_branch2c") - short = self.shortcut( - input, - num_filters if cardinality == 64 else num_filters * 2, - stride, - name=name + "_branch1") + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 + if cardinality == 32 else num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = fluid.layers.elementwise_add(x=short, y=conv2) + + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + - return fluid.layers.elementwise_add( - x=short, y=conv2, act='relu', name=name + ".add.output.5") +class ResNeXt(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=1000, cardinality=32): + super(ResNeXt, self).__init__() + + self.layers = layers + self.cardinality = cardinality + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + supported_cardinality = [32, 64] + assert cardinality in supported_cardinality, \ + "supported cardinality is {} but input cardinality is {}" \ + .format(supported_cardinality, cardinality) + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [128, 256, 512, + 1024] if cardinality == 32 else [256, 512, 1024, 2048] + self.conv = ConvBNLayer( + num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="res_conv1") + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') -def ResNeXt50_64x4d(): - model = ResNeXt(layers=50, cardinality=64) + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else + num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs): + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def ResNeXt50_32x4d(**args): + model = ResNeXt(layers=50, cardinality=32, **args) return model -def ResNeXt50_32x4d(): - model = ResNeXt(layers=50, cardinality=32) +def ResNeXt50_64x4d(**args): + model = ResNeXt(layers=50, cardinality=64, **args) return model -def ResNeXt101_64x4d(): - model = ResNeXt(layers=101, cardinality=64) +def ResNeXt101_32x4d(**args): + model = ResNeXt(layers=101, cardinality=32, **args) return model -def ResNeXt101_32x4d(): - model = ResNeXt(layers=101, cardinality=32) +def ResNeXt101_64x4d(**args): + model = ResNeXt(layers=101, cardinality=64, **args) return model -def ResNeXt152_64x4d(): - model = ResNeXt(layers=152, cardinality=64) +def ResNeXt152_32x4d(**args): + model = ResNeXt(layers=152, cardinality=32, **args) return model -def ResNeXt152_32x4d(): - model = ResNeXt(layers=152, cardinality=32) +def ResNeXt152_64x4d(**args): + model = ResNeXt(layers=152, cardinality=64, **args) return model diff --git a/ppcls/modeling/architectures/resnext_vd.py b/ppcls/modeling/architectures/resnext_vd.py index b0a2fe6c980c48a6f0fca65b12c8b8fce2c08fc6..0abb2419fc9b9b7dfe9fe3c1b8d4d460da2dd68f 100644 --- a/ppcls/modeling/architectures/resnext_vd.py +++ b/ppcls/modeling/architectures/resnext_vd.py @@ -1,130 +1,54 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout + import math __all__ = [ - "ResNeXt", "ResNeXt50_vd_64x4d", "ResNeXt101_vd_64x4d", - "ResNeXt152_vd_64x4d", "ResNeXt50_vd_32x4d", "ResNeXt101_vd_32x4d", - "ResNeXt152_vd_32x4d" + "ResNeXt50_vd_32x4d", "ResNeXt50_vd_64x4d", "ResNeXt101_vd_32x4d", + "ResNeXt101_vd_64x4d", "ResNeXt152_vd_32x4d", "ResNeXt152_vd_64x4d" ] -class ResNeXt(): - def __init__(self, layers=50, is_3x3=False, cardinality=64): - self.layers = layers - self.is_3x3 = is_3x3 - self.cardinality = cardinality - - def net(self, input, class_dim=1000): - is_3x3 = self.is_3x3 - layers = self.layers - cardinality = self.cardinality - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters1 = [256, 512, 1024, 2048] - num_filters2 = [128, 256, 512, 1024] - - if is_3x3 == False: - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - else: - conv = self.conv_bn_layer( - input=input, - num_filters=32, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=32, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') +class ConvBNLayer(fluid.dygraph.Layer): + def __init__( + self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + is_vd_mode=False, + act=None, + name=None, ): + super(ConvBNLayer, self).__init__() - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters1[block] - if cardinality == 64 else num_filters2[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=cardinality, - if_first=block == 0, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc_weights'), - bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) - - return out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, + self.is_vd_mode = is_vd_mode + self._pool2d_avg = Pool2D( + pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg', ceil_mode=True) + self._conv = Conv2D( + num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, @@ -137,121 +61,209 @@ class ResNeXt(): bn_name = "bn_" + name else: bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, + self._batch_norm = BatchNorm( + num_filters, act=act, param_attr=ParamAttr(name=bn_name + '_scale'), bias_attr=ParamAttr(bn_name + '_offset'), moving_mean_name=bn_name + '_mean', moving_variance_name=bn_name + '_variance') - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg', - ceil_mode=True) - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') + def forward(self, inputs): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new( - input, ch_out, 1, stride, name=name) - else: - return input - def bottleneck_block(self, input, num_filters, stride, cardinality, name, - if_first): - conv0 = self.conv_bn_layer( - input=input, +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + cardinality, + shortcut=True, + if_first=False, + name=None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, + groups=cardinality, stride=stride, act='relu', - groups=cardinality, name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters if cardinality == 64 else num_filters * 2, + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, filter_size=1, act=None, name=name + "_branch2c") - short = self.shortcut( - input, - num_filters if cardinality == 64 else num_filters * 2, - stride, - if_first=if_first, - name=name + "_branch1") + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 + if cardinality == 32 else num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = fluid.layers.elementwise_add(x=short, y=conv2) + + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class ResNeXt(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=1000, cardinality=32): + super(ResNeXt, self).__init__() + + self.layers = layers + self.cardinality = cardinality + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + supported_cardinality = [32, 64] + assert cardinality in supported_cardinality, \ + "supported cardinality is {} but input cardinality is {}" \ + .format(supported_cardinality, cardinality) + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [128, 256, 512, + 1024] if cardinality == 32 else [256, 512, 1024, 2048] + + self.conv1_1 = ConvBNLayer( + num_channels=3, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = ConvBNLayer( + num_channels=32, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = ConvBNLayer( + num_channels=32, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name="conv1_3") + + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else + num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + def forward(self, inputs): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y -def ResNeXt50_vd_64x4d(): - model = ResNeXt(layers=50, is_3x3=True) +def ResNeXt50_vd_32x4d(**args): + model = ResNeXt(layers=50, cardinality=32, **args) return model -def ResNeXt50_vd_32x4d(): - model = ResNeXt(layers=50, cardinality=32, is_3x3=True) +def ResNeXt50_vd_64x4d(**args): + model = ResNeXt(layers=50, cardinality=64, **args) return model -def ResNeXt101_vd_64x4d(): - model = ResNeXt(layers=101, is_3x3=True) +def ResNeXt101_vd_32x4d(**args): + model = ResNeXt(layers=101, cardinality=32, **args) return model -def ResNeXt101_vd_32x4d(): - model = ResNeXt(layers=101, cardinality=32, is_3x3=True) +def ResNeXt101_vd_64x4d(**args): + model = ResNeXt(layers=101, cardinality=64, **args) return model -def ResNeXt152_vd_64x4d(): - model = ResNeXt(layers=152, is_3x3=True) +def ResNeXt152_vd_32x4d(**args): + model = ResNeXt(layers=152, cardinality=32, **args) return model -def ResNeXt152_vd_32x4d(): - model = ResNeXt(layers=152, cardinality=32, is_3x3=True) +def ResNeXt152_vd_64x4d(**args): + model = ResNeXt(layers=152, cardinality=64, **args) return model diff --git a/ppcls/modeling/architectures/se_resnet_vd.py b/ppcls/modeling/architectures/se_resnet_vd.py index fbe96119830bc87ba19d084755f452c3c2a06372..7e25a80b2281fd3cf4e3e3de46df26648d808dee 100644 --- a/ppcls/modeling/architectures/se_resnet_vd.py +++ b/ppcls/modeling/architectures/se_resnet_vd.py @@ -1,188 +1,60 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function -import math - +import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout + +import math __all__ = [ - "SE_ResNet_vd", "SE_ResNet18_vd", "SE_ResNet34_vd", "SE_ResNet50_vd", - "SE_ResNet101_vd", "SE_ResNet152_vd", "SE_ResNet200_vd" + "SE_ResNet18_vd", "SE_ResNet34_vd", "SE_ResNet50_vd", "SE_ResNet101_vd", + "SE_ResNet152_vd", "SE_ResNet200_vd" ] -class SE_ResNet_vd(): - def __init__(self, layers=50, is_3x3=False): - self.layers = layers - self.is_3x3 = is_3x3 - - def net(self, input, class_dim=1000): - is_3x3 = self.is_3x3 - layers = self.layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_filters = [64, 128, 256, 512] - reduction_ratio = 16 - if is_3x3 == False: - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - else: - conv = self.conv_bn_layer( - input=input, - num_filters=32, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=32, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - if layers >= 50: - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == i == 0, - reduction_ratio=reduction_ratio, - name=conv_name) - - else: - for block in range(len(depth)): - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.basic_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == i == 0, - reduction_ratio=reduction_ratio, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc6_weights'), - bias_attr=ParamAttr(name='fc6_offset')) - - return out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, +class ConvBNLayer(fluid.dygraph.Layer): + def __init__( + self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + is_vd_mode=False, act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') + name=None, ): + super(ConvBNLayer, self).__init__() - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d( - input=input, + self.is_vd_mode = is_vd_mode + self._pool2d_avg = Pool2D( pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg', ceil_mode=True) - - conv = fluid.layers.conv2d( - input=pool, + self._conv = Conv2D( + num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, - stride=1, + stride=stride, padding=(filter_size - 1) // 2, groups=groups, act=None, @@ -192,145 +64,325 @@ class SE_ResNet_vd(): bn_name = "bn_" + name else: bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, + self._batch_norm = BatchNorm( + num_filters, act=act, param_attr=ParamAttr(name=bn_name + '_scale'), bias_attr=ParamAttr(bn_name + '_offset'), moving_mean_name=bn_name + '_mean', moving_variance_name=bn_name + '_variance') - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new( - input, ch_out, 1, stride, name=name) - elif if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name, if_first, - reduction_ratio): - conv0 = self.conv_bn_layer( - input=input, + def forward(self, inputs): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + if_first=False, + reduction_ratio=16, + name=None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, stride=stride, act='relu', name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") - scale = self.squeeze_excitation( - input=conv2, + self.scale = SELayer( num_channels=num_filters * 4, + num_filters=num_filters * 4, reduction_ratio=reduction_ratio, name='fc_' + name) - short = self.shortcut( - input, - num_filters * 4, - stride, - if_first=if_first, - name=name + "_branch1") + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + self.shortcut = shortcut - def basic_block(self, input, num_filters, stride, name, if_first, - reduction_ratio): - conv0 = self.conv_bn_layer( - input=input, + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + scale = self.scale(conv2) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=scale) + + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class BasicBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + if_first=False, + reduction_ratio=16, + name=None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=3, - act='relu', stride=stride, + act='relu', name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") - scale = self.squeeze_excitation( - input=conv1, + + self.scale = SELayer( num_channels=num_filters, + num_filters=num_filters, reduction_ratio=reduction_ratio, name='fc_' + name) - short = self.shortcut( - input, - num_filters, - stride, - if_first=if_first, - name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=scale, act='relu') - - def squeeze_excitation(self, - input, - num_channels, - reduction_ratio, - name=None): - pool = fluid.layers.pool2d( - input=input, pool_size=0, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - squeeze = fluid.layers.fc( - input=pool, - size=num_channels // reduction_ratio, - act='relu', - param_attr=fluid.param_attr.ParamAttr( + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + scale = self.scale(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=scale) + + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class SELayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, reduction_ratio, name=None): + super(SELayer, self).__init__() + + self.pool2d_gap = Pool2D(pool_type='avg', global_pooling=True) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = Linear( + num_channels, + med_ch, + act="relu", + param_attr=ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), - name=name + '_sqz_weights'), + name=name + "_sqz_weights"), bias_attr=ParamAttr(name=name + '_sqz_offset')) - stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) - excitation = fluid.layers.fc( - input=squeeze, - size=num_channels, - act='sigmoid', - param_attr=fluid.param_attr.ParamAttr( + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = Linear( + med_ch, + num_filters, + act="sigmoid", + param_attr=ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), - name=name + '_exc_weights'), + name=name + "_exc_weights"), bias_attr=ParamAttr(name=name + '_exc_offset')) - scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) - return scale + def forward(self, input): + pool = self.pool2d_gap(input) + pool = fluid.layers.reshape(pool, shape=[-1, self._num_channels]) + squeeze = self.squeeze(pool) + excitation = self.excitation(squeeze) + excitation = fluid.layers.reshape( + excitation, shape=[-1, self._num_channels, 1, 1]) + out = input * excitation + return out + + +class SE_ResNet_vd(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=1000): + super(SE_ResNet_vd, self).__init__() + + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, + 1024] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer( + num_channels=3, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = ConvBNLayer( + num_channels=32, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = ConvBNLayer( + num_channels=32, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name="conv1_3") + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') -def SE_ResNet18_vd(): - model = SE_ResNet_vd(layers=18, is_3x3=True) + self.block_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + else: + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(basic_block) + shortcut = True + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name="fc6_weights"), + bias_attr=ParamAttr(name="fc6_offset")) + + def forward(self, inputs): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def SE_ResNet18_vd(**args): + model = SE_ResNet_vd(layers=18, **args) return model -def SE_ResNet34_vd(): - model = SE_ResNet_vd(layers=34, is_3x3=True) +def SE_ResNet34_vd(**args): + model = SE_ResNet_vd(layers=34, **args) return model -def SE_ResNet50_vd(): - model = SE_ResNet_vd(layers=50, is_3x3=True) +def SE_ResNet50_vd(**args): + model = SE_ResNet_vd(layers=50, **args) return model -def SE_ResNet101_vd(): - model = SE_ResNet_vd(layers=101, is_3x3=True) +def SE_ResNet101_vd(**args): + model = SE_ResNet_vd(layers=101, **args) return model -def SE_ResNet152_vd(): - model = SE_ResNet_vd(layers=152, is_3x3=True) +def SE_ResNet152_vd(**args): + model = SE_ResNet_vd(layers=152, **args) return model -def SE_ResNet200_vd(): - model = SE_ResNet_vd(layers=200, is_3x3=True) +def SE_ResNet200_vd(**args): + model = SE_ResNet_vd(layers=200, **args) return model diff --git a/ppcls/modeling/architectures/se_resnext_vd.py b/ppcls/modeling/architectures/se_resnext_vd.py index 8afb39415a1d7e1fbb6bf4e0581a8e858bdb2b21..5b3b47eab0ed52f44f03c0feaabe1412f071f4b6 100644 --- a/ppcls/modeling/architectures/se_resnext_vd.py +++ b/ppcls/modeling/architectures/se_resnext_vd.py @@ -1,329 +1,291 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function -import math - +import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout -__all__ = [ - "SE_ResNeXt_vd", "SE_ResNeXt50_32x4d_vd", "SE_ResNeXt101_32x4d_vd", - "SENet154_vd" -] +import math +__all__ = ["SE_ResNeXt50_vd_32x4d", "SE_ResNeXt50_vd_32x4d", "SENet154_vd"] -class SE_ResNeXt_vd(): - def __init__(self, layers=50): - self.layers = layers - def net(self, input, class_dim=1000): - layers = self.layers - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - if layers == 50: - cardinality = 32 - reduction_ratio = 16 - depth = [3, 4, 6, 3] - num_filters = [128, 256, 512, 1024] - - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=128, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - elif layers == 101: - cardinality = 32 - reduction_ratio = 16 - depth = [3, 4, 23, 3] - num_filters = [128, 256, 512, 1024] - - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=128, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - elif layers == 152: - cardinality = 64 - reduction_ratio = 16 - depth = [3, 8, 36, 3] - num_filters = [256, 512, 1024, 2048] - - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=128, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - conv = fluid.layers.pool2d( - input=conv, pool_size=3, pool_stride=2, pool_padding=1, \ - pool_type='max') - n = 1 if layers == 50 or layers == 101 else 3 - for block in range(len(depth)): - n += 1 - for i in range(depth[block]): - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=cardinality, - reduction_ratio=reduction_ratio, - if_first=block == 0, - name=str(n) + '_' + str(i + 1)) - - pool = fluid.layers.pool2d( - input=conv, pool_type='avg', global_pooling=True) - if layers == 152: - pool = fluid.layers.dropout(x=pool, dropout_prob=0.2) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc6_weights'), - bias_attr=ParamAttr(name='fc6_offset')) +class ConvBNLayer(fluid.dygraph.Layer): + def __init__( + self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + is_vd_mode=False, + act=None, + name=None, ): + super(ConvBNLayer, self).__init__() - return out + self.is_vd_mode = is_vd_mode + self._pool2d_avg = Pool2D( + pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg') + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + bn_name = name + '_bn' + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - filter_size = 1 - if if_first: - return self.conv_bn_layer( - input, - ch_out, - filter_size, - stride, - name='conv' + name + '_prj') - else: - return self.conv_bn_layer_new( - input, - ch_out, - filter_size, - stride, - name='conv' + name + '_prj') - else: - return input - - def bottleneck_block(self, - input, - num_filters, - stride, - cardinality, - reduction_ratio, - if_first, - name=None): - conv0 = self.conv_bn_layer( - input=input, + def forward(self, inputs): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + cardinality, + reduction_ratio, + shortcut=True, + if_first=False, + name=None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name='conv' + name + '_x1') - conv1 = self.conv_bn_layer( - input=conv0, + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, - stride=stride, groups=cardinality, + stride=stride, act='relu', name='conv' + name + '_x2') - if cardinality == 64: - num_filters = num_filters // 2 - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 2, + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, filter_size=1, act=None, name='conv' + name + '_x3') - scale = self.squeeze_excitation( - input=conv2, - num_channels=num_filters * 2, + self.scale = SELayer( + num_channels=num_filters * 2 if cardinality == 32 else num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, reduction_ratio=reduction_ratio, - name='fc' + name) - - short = self.shortcut( - input, num_filters * 2, stride, if_first=if_first, name=name) - - return fluid.layers.elementwise_add(x=short, y=scale, act='relu') - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - bias_attr=False, - param_attr=ParamAttr(name=name + '_weights'), ) - bn_name = name + "_bn" - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') + name='fc_' + name) - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg', - ceil_mode=True) - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - bn_name = name + "_bn" - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 + if cardinality == 32 else num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name='conv' + name + '_prj') - def squeeze_excitation(self, - input, - num_channels, - reduction_ratio, - name=None): - pool = fluid.layers.pool2d( - input=input, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - squeeze = fluid.layers.fc( - input=pool, - size=num_channels // reduction_ratio, - act='relu', - param_attr=fluid.param_attr.ParamAttr( + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + scale = self.scale(conv2) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=scale) + + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class SELayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, reduction_ratio, name=None): + super(SELayer, self).__init__() + + self.pool2d_gap = Pool2D(pool_type='avg', global_pooling=True) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = Linear( + num_channels, + med_ch, + act="relu", + param_attr=ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), - name=name + '_sqz_weights'), + name=name + "_sqz_weights"), bias_attr=ParamAttr(name=name + '_sqz_offset')) - stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) - excitation = fluid.layers.fc( - input=squeeze, - size=num_channels, - act='sigmoid', - param_attr=fluid.param_attr.ParamAttr( + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = Linear( + med_ch, + num_filters, + act="sigmoid", + param_attr=ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), - name=name + '_exc_weights'), + name=name + "_exc_weights"), bias_attr=ParamAttr(name=name + '_exc_offset')) - scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) - return scale + + def forward(self, input): + pool = self.pool2d_gap(input) + pool = fluid.layers.reshape(pool, shape=[-1, self._num_channels]) + squeeze = self.squeeze(pool) + excitation = self.excitation(squeeze) + excitation = fluid.layers.reshape( + excitation, shape=[-1, self._num_channels, 1, 1]) + out = input * excitation + return out + + +class ResNeXt(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=1000, cardinality=32): + super(ResNeXt, self).__init__() + + self.layers = layers + self.cardinality = cardinality + self.reduction_ratio = 16 + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + supported_cardinality = [32, 64] + assert cardinality in supported_cardinality, \ + "supported cardinality is {} but input cardinality is {}" \ + .format(supported_cardinality, cardinality) + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_channels = [128, 256, 512, 1024] + num_filters = [128, 256, 512, + 1024] if cardinality == 32 else [256, 512, 1024, 2048] + + self.conv1_1 = ConvBNLayer( + num_channels=3, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = ConvBNLayer( + num_channels=64, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = ConvBNLayer( + num_channels=64, + num_filters=128, + filter_size=3, + stride=1, + act='relu', + name="conv1_3") + + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + self.block_list = [] + n = 1 if layers == 50 or layers == 101 else 3 + for block in range(len(depth)): + n += 1 + shortcut = False + for i in range(depth[block]): + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else + num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + reduction_ratio=self.reduction_ratio, + shortcut=shortcut, + if_first=block == 0, + name=str(n) + '_' + str(i + 1))) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name="fc6_weights"), + bias_attr=ParamAttr(name="fc6_offset")) + + def forward(self, inputs): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y -def SE_ResNeXt50_vd_32x4d(): - model = SE_ResNeXt_vd(layers=50) +def SE_ResNeXt50_vd_32x4d(**args): + model = ResNeXt(layers=50, cardinality=32, **args) return model -def SE_ResNeXt101_vd_32x4d(): - model = SE_ResNeXt_vd(layers=101) +def SE_ResNeXt101_vd_32x4d(**args): + model = ResNeXt(layers=101, cardinality=32, **args) return model -def SENet154_vd(): - model = SE_ResNeXt_vd(layers=152) +def SENet154_vd(**args): + model = ResNeXt(layers=152, cardinality=64, **args) return model diff --git a/ppcls/modeling/architectures/shufflenet_v2.py b/ppcls/modeling/architectures/shufflenet_v2.py index b63a26c8304575d135aad10520c0f4da5ffed80d..83979915cfc34130a18fbf180da8926fa7222dd4 100644 --- a/ppcls/modeling/architectures/shufflenet_v2.py +++ b/ppcls/modeling/architectures/shufflenet_v2.py @@ -1,127 +1,74 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function -import math - +import numpy as np +import paddle import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout +from paddle.fluid.initializer import MSRA +import math __all__ = [ - 'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5', - 'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0', - 'ShuffleNetV2' + "ShuffleNetV2_x0_25", "ShuffleNetV2_x0_33", "ShuffleNetV2_x0_5", + "ShuffleNetV2_x1_0", "ShuffleNetV2_x1_5", "ShuffleNetV2_x2_0", + "ShuffleNetV2_swish" ] -class ShuffleNetV2(): - def __init__(self, scale=1.0): - self.scale = scale - - def net(self, input, class_dim=1000): - scale = self.scale - stage_repeats = [4, 8, 4] - - if scale == 0.25: - stage_out_channels = [-1, 24, 24, 48, 96, 512] - elif scale == 0.33: - stage_out_channels = [-1, 24, 32, 64, 128, 512] - elif scale == 0.5: - stage_out_channels = [-1, 24, 48, 96, 192, 1024] - elif scale == 1.0: - stage_out_channels = [-1, 24, 116, 232, 464, 1024] - elif scale == 1.5: - stage_out_channels = [-1, 24, 176, 352, 704, 1024] - elif scale == 2.0: - stage_out_channels = [-1, 24, 224, 488, 976, 2048] - else: - raise NotImplementedError("This scale size:[" + str(scale) + - "] is not implemented!") - #conv1 - - input_channel = stage_out_channels[1] - conv1 = self.conv_bn_layer( - input=input, - filter_size=3, - num_filters=input_channel, - padding=1, - stride=2, - name='stage1_conv') - pool1 = fluid.layers.pool2d( - input=conv1, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - conv = pool1 - # bottleneck sequences - for idxstage in range(len(stage_repeats)): - numrepeat = stage_repeats[idxstage] - output_channel = stage_out_channels[idxstage + 2] - for i in range(numrepeat): - if i == 0: - conv = self.inverted_residual_unit( - input=conv, - num_filters=output_channel, - stride=2, - benchmodel=2, - name=str(idxstage + 2) + '_' + str(i + 1)) - else: - conv = self.inverted_residual_unit( - input=conv, - num_filters=output_channel, - stride=1, - benchmodel=1, - name=str(idxstage + 2) + '_' + str(i + 1)) - - conv_last = self.conv_bn_layer( - input=conv, - filter_size=1, - num_filters=stage_out_channels[-1], - padding=0, - stride=1, - name='conv5') - pool_last = fluid.layers.pool2d( - input=conv_last, - pool_size=7, - pool_stride=1, - pool_padding=0, - pool_type='avg') - - output = fluid.layers.fc(input=pool_last, - size=class_dim, - param_attr=ParamAttr( - initializer=MSRA(), name='fc6_weights'), - bias_attr=ParamAttr(name='fc6_offset')) - return output - - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - use_cudnn=True, - if_act=True, - name=None): - conv = fluid.layers.conv2d( - input=input, +def channel_shuffle(x, groups): + batchsize, num_channels, height, width = x.shape[0], x.shape[1], x.shape[ + 2], x.shape[3] + channels_per_group = num_channels // groups + + # reshape + x = fluid.layers.reshape( + x=x, shape=[batchsize, groups, channels_per_group, height, width]) + + x = fluid.layers.transpose(x=x, perm=[0, 2, 1, 3, 4]) + # flatten + x = fluid.layers.reshape( + x=x, shape=[batchsize, num_channels, height, width]) + return x + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + if_act=True, + act='relu', + name=None, + use_cudnn=True): + super(ConvBNLayer, self).__init__() + self._if_act = if_act + assert act in ['relu', 'swish'], \ + "supported act are {} but your act is {}".format( + ['relu', 'swish'], act) + self._act = act + self._conv = Conv2D( + num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, @@ -130,178 +77,279 @@ class ShuffleNetV2(): act=None, use_cudnn=use_cudnn, param_attr=ParamAttr( - initializer=MSRA(), name=name + '_weights'), + initializer=MSRA(), name=name + "_weights"), bias_attr=False) - out = int((input.shape[2] - 1) / float(stride) + 1) - bn_name = name + '_bn' - if if_act: - return fluid.layers.batch_norm( - input=conv, - act='relu', - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - else: - return fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def channel_shuffle(self, x, groups): - batchsize, num_channels, height, width = x.shape[0], x.shape[ - 1], x.shape[2], x.shape[3] - channels_per_group = num_channels // groups - - # reshape - x = fluid.layers.reshape( - x=x, shape=[batchsize, groups, channels_per_group, height, width]) - - x = fluid.layers.transpose(x=x, perm=[0, 2, 1, 3, 4]) - - # flatten - x = fluid.layers.reshape( - x=x, shape=[batchsize, num_channels, height, width]) - - return x - - def inverted_residual_unit(self, - input, - num_filters, - stride, - benchmodel, - name=None): - assert stride in [1, 2], \ - "supported stride are {} but your stride is {}".format([1,2], stride) + self._batch_norm = BatchNorm( + num_filters, + param_attr=ParamAttr(name=name + "_bn_scale"), + bias_attr=ParamAttr(name=name + "_bn_offset"), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs, if_act=True): + y = self._conv(inputs) + y = self._batch_norm(y) + if self._if_act: + y = fluid.layers.relu( + y) if self._act == 'relu' else fluid.layers.swish(y) + return y + + +class InvertedResidualUnit(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + benchmodel, + act='relu', + name=None): + super(InvertedResidualUnit, self).__init__() + assert stride in [1, 2], \ + "supported stride are {} but your stride is {}".format([ + 1, 2], stride) + self.benchmodel = benchmodel oup_inc = num_filters // 2 - inp = input.shape[1] - + inp = num_channels if benchmodel == 1: - x1, x2 = fluid.layers.split( - input, - num_or_sections=[input.shape[1] // 2, input.shape[1] // 2], - dim=1) - - conv_pw = self.conv_bn_layer( - input=x2, + self._conv_pw = ConvBNLayer( + num_channels=num_channels // 2, num_filters=oup_inc, filter_size=1, stride=1, padding=0, num_groups=1, if_act=True, + act=act, name='stage_' + name + '_conv1') - - conv_dw = self.conv_bn_layer( - input=conv_pw, + self._conv_dw = ConvBNLayer( + num_channels=oup_inc, num_filters=oup_inc, filter_size=3, stride=stride, padding=1, num_groups=oup_inc, if_act=False, + act=act, use_cudnn=False, name='stage_' + name + '_conv2') - - conv_linear = self.conv_bn_layer( - input=conv_dw, + self._conv_linear = ConvBNLayer( + num_channels=oup_inc, num_filters=oup_inc, filter_size=1, stride=1, padding=0, num_groups=1, if_act=True, + act=act, name='stage_' + name + '_conv3') - - out = fluid.layers.concat([x1, conv_linear], axis=1) - else: - #branch1 - conv_dw_1 = self.conv_bn_layer( - input=input, + # branch1 + self._conv_dw_1 = ConvBNLayer( + num_channels=num_channels, num_filters=inp, filter_size=3, stride=stride, padding=1, num_groups=inp, if_act=False, + act=act, use_cudnn=False, name='stage_' + name + '_conv4') - - conv_linear_1 = self.conv_bn_layer( - input=conv_dw_1, + self._conv_linear_1 = ConvBNLayer( + num_channels=inp, num_filters=oup_inc, filter_size=1, stride=1, padding=0, num_groups=1, if_act=True, + act=act, name='stage_' + name + '_conv5') - - #branch2 - conv_pw_2 = self.conv_bn_layer( - input=input, + # branch2 + self._conv_pw_2 = ConvBNLayer( + num_channels=num_channels, num_filters=oup_inc, filter_size=1, stride=1, padding=0, num_groups=1, if_act=True, + act=act, name='stage_' + name + '_conv1') - - conv_dw_2 = self.conv_bn_layer( - input=conv_pw_2, + self._conv_dw_2 = ConvBNLayer( + num_channels=oup_inc, num_filters=oup_inc, filter_size=3, stride=stride, padding=1, num_groups=oup_inc, if_act=False, + act=act, use_cudnn=False, name='stage_' + name + '_conv2') - - conv_linear_2 = self.conv_bn_layer( - input=conv_dw_2, + self._conv_linear_2 = ConvBNLayer( + num_channels=oup_inc, num_filters=oup_inc, filter_size=1, stride=1, padding=0, num_groups=1, if_act=True, + act=act, name='stage_' + name + '_conv3') - out = fluid.layers.concat([conv_linear_1, conv_linear_2], axis=1) - return self.channel_shuffle(out, 2) + def forward(self, inputs): + if self.benchmodel == 1: + x1, x2 = fluid.layers.split( + inputs, + num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2], + dim=1) + x2 = self._conv_pw(x2) + x2 = self._conv_dw(x2) + x2 = self._conv_linear(x2) + out = fluid.layers.concat([x1, x2], axis=1) + else: + x1 = self._conv_dw_1(inputs) + x1 = self._conv_linear_1(x1) + + x2 = self._conv_pw_2(inputs) + x2 = self._conv_dw_2(x2) + x2 = self._conv_linear_2(x2) + out = fluid.layers.concat([x1, x2], axis=1) + + return channel_shuffle(out, 2) + + +class ShuffleNet(fluid.dygraph.Layer): + def __init__(self, class_dim=1000, scale=1.0, act='relu'): + super(ShuffleNet, self).__init__() + self.scale = scale + self.class_dim = class_dim + stage_repeats = [4, 8, 4] + + if scale == 0.25: + stage_out_channels = [-1, 24, 24, 48, 96, 512] + elif scale == 0.33: + stage_out_channels = [-1, 24, 32, 64, 128, 512] + elif scale == 0.5: + stage_out_channels = [-1, 24, 48, 96, 192, 1024] + elif scale == 1.0: + stage_out_channels = [-1, 24, 116, 232, 464, 1024] + elif scale == 1.5: + stage_out_channels = [-1, 24, 176, 352, 704, 1024] + elif scale == 2.0: + stage_out_channels = [-1, 24, 224, 488, 976, 2048] + else: + raise NotImplementedError("This scale size:[" + str(scale) + + "] is not implemented!") + # 1. conv1 + self._conv1 = ConvBNLayer( + num_channels=3, + num_filters=stage_out_channels[1], + filter_size=3, + stride=2, + padding=1, + if_act=True, + act=act, + name='stage1_conv') + self._max_pool = Pool2D( + pool_type='max', pool_size=3, pool_stride=2, pool_padding=1) + + # 2. bottleneck sequences + self._block_list = [] + i = 1 + in_c = int(32 * scale) + for idxstage in range(len(stage_repeats)): + numrepeat = stage_repeats[idxstage] + output_channel = stage_out_channels[idxstage + 2] + for i in range(numrepeat): + if i == 0: + block = self.add_sublayer( + str(idxstage + 2) + '_' + str(i + 1), + InvertedResidualUnit( + num_channels=stage_out_channels[idxstage + 1], + num_filters=output_channel, + stride=2, + benchmodel=2, + act=act, + name=str(idxstage + 2) + '_' + str(i + 1))) + self._block_list.append(block) + else: + block = self.add_sublayer( + str(idxstage + 2) + '_' + str(i + 1), + InvertedResidualUnit( + num_channels=output_channel, + num_filters=output_channel, + stride=1, + benchmodel=1, + act=act, + name=str(idxstage + 2) + '_' + str(i + 1))) + self._block_list.append(block) + + # 3. last_conv + self._last_conv = ConvBNLayer( + num_channels=stage_out_channels[-2], + num_filters=stage_out_channels[-1], + filter_size=1, + stride=1, + padding=0, + if_act=True, + act=act, + name='conv5') + + # 4. pool + self._pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) + self._out_c = stage_out_channels[-1] + # 5. fc + self._fc = Linear( + stage_out_channels[-1], + class_dim, + param_attr=ParamAttr(name='fc6_weights'), + bias_attr=ParamAttr(name='fc6_offset')) + + def forward(self, inputs): + y = self._conv1(inputs) + y = self._max_pool(y) + for inv in self._block_list: + y = inv(y) + y = self._last_conv(y) + y = self._pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self._out_c]) + y = self._fc(y) + return y + + +def ShuffleNetV2_x0_25(**args): + model = ShuffleNetV2(scale=0.25, **args) + return model -def ShuffleNetV2_x0_25(): - model = ShuffleNetV2(scale=0.25) +def ShuffleNetV2_x0_33(**args): + model = ShuffleNet(scale=0.33, **args) return model -def ShuffleNetV2_x0_33(): - model = ShuffleNetV2(scale=0.33) +def ShuffleNetV2_x0_5(**args): + model = ShuffleNet(scale=0.5, **args) return model -def ShuffleNetV2_x0_5(): - model = ShuffleNetV2(scale=0.5) +def ShuffleNetV2(**args): + model = ShuffleNet(scale=1.0, **args) return model -def ShuffleNetV2_x1_0(): - model = ShuffleNetV2(scale=1.0) +def ShuffleNetV2_x1_5(**args): + model = ShuffleNet(scale=1.5, **args) return model -def ShuffleNetV2_x1_5(): - model = ShuffleNetV2(scale=1.5) +def ShuffleNetV2_x2_0(**args): + model = ShuffleNet(scale=2.0, **args) return model -def ShuffleNetV2_x2_0(): - model = ShuffleNetV2(scale=2.0) +def ShuffleNetV2_swish(**args): + model = ShuffleNet(scale=1.0, act='swish', **args) return model diff --git a/ppcls/modeling/architectures/shufflenet_v2_swish.py b/ppcls/modeling/architectures/shufflenet_v2_swish.py deleted file mode 100644 index 8683dfc08583fc9ebaf3cd550a2dff8863de2921..0000000000000000000000000000000000000000 --- a/ppcls/modeling/architectures/shufflenet_v2_swish.py +++ /dev/null @@ -1,293 +0,0 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr - -__all__ = [ - 'ShuffleNetV2_x0_5_swish', 'ShuffleNetV2_x1_0_swish', - 'ShuffleNetV2_x1_5_swish', 'ShuffleNetV2_x2_0_swish', 'ShuffleNetV2_swish' -] - - -class ShuffleNetV2_swish(): - def __init__(self, scale=1.0): - self.scale = scale - - def net(self, input, class_dim=1000): - scale = self.scale - stage_repeats = [4, 8, 4] - - if scale == 0.5: - stage_out_channels = [-1, 24, 48, 96, 192, 1024] - elif scale == 1.0: - stage_out_channels = [-1, 24, 116, 232, 464, 1024] - elif scale == 1.5: - stage_out_channels = [-1, 24, 176, 352, 704, 1024] - elif scale == 2.0: - stage_out_channels = [-1, 24, 224, 488, 976, 2048] - else: - raise ValueError("""{} groups is not supported for - 1x1 Grouped Convolutions""".format(num_groups)) - - #conv1 - - input_channel = stage_out_channels[1] - conv1 = self.conv_bn_layer( - input=input, - filter_size=3, - num_filters=input_channel, - padding=1, - stride=2, - name='stage1_conv') - pool1 = fluid.layers.pool2d( - input=conv1, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - conv = pool1 - # bottleneck sequences - for idxstage in range(len(stage_repeats)): - numrepeat = stage_repeats[idxstage] - output_channel = stage_out_channels[idxstage + 2] - for i in range(numrepeat): - if i == 0: - conv = self.inverted_residual_unit( - input=conv, - num_filters=output_channel, - stride=2, - benchmodel=2, - name=str(idxstage + 2) + '_' + str(i + 1)) - else: - conv = self.inverted_residual_unit( - input=conv, - num_filters=output_channel, - stride=1, - benchmodel=1, - name=str(idxstage + 2) + '_' + str(i + 1)) - - conv_last = self.conv_bn_layer( - input=conv, - filter_size=1, - num_filters=stage_out_channels[-1], - padding=0, - stride=1, - name='conv5') - pool_last = fluid.layers.pool2d( - input=conv_last, - pool_size=7, - pool_stride=1, - pool_padding=0, - pool_type='avg') - - output = fluid.layers.fc(input=pool_last, - size=class_dim, - param_attr=ParamAttr( - initializer=MSRA(), name='fc6_weights'), - bias_attr=ParamAttr(name='fc6_offset')) - return output - - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - use_cudnn=True, - if_act=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=ParamAttr( - initializer=MSRA(), name=name + '_weights'), - bias_attr=False) - out = int((input.shape[2] - 1) / float(stride) + 1) - bn_name = name + '_bn' - if if_act: - return fluid.layers.batch_norm( - input=conv, - act='swish', - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - else: - return fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def channel_shuffle(self, x, groups): - batchsize, num_channels, height, width = x.shape[0], x.shape[ - 1], x.shape[2], x.shape[3] - channels_per_group = num_channels // groups - - # reshape - x = fluid.layers.reshape( - x=x, shape=[batchsize, groups, channels_per_group, height, width]) - - x = fluid.layers.transpose(x=x, perm=[0, 2, 1, 3, 4]) - - # flatten - x = fluid.layers.reshape( - x=x, shape=[batchsize, num_channels, height, width]) - - return x - - def inverted_residual_unit(self, - input, - num_filters, - stride, - benchmodel, - name=None): - assert stride in [1, 2], \ - "supported stride are {} but your stride is {}".format([1,2], stride) - - oup_inc = num_filters // 2 - inp = input.shape[1] - - if benchmodel == 1: - x1, x2 = fluid.layers.split( - input, - num_or_sections=[input.shape[1] // 2, input.shape[1] // 2], - dim=1) - - conv_pw = self.conv_bn_layer( - input=x2, - num_filters=oup_inc, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name='stage_' + name + '_conv1') - - conv_dw = self.conv_bn_layer( - input=conv_pw, - num_filters=oup_inc, - filter_size=3, - stride=stride, - padding=1, - num_groups=oup_inc, - if_act=False, - use_cudnn=False, - name='stage_' + name + '_conv2') - - conv_linear = self.conv_bn_layer( - input=conv_dw, - num_filters=oup_inc, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name='stage_' + name + '_conv3') - - out = fluid.layers.concat([x1, conv_linear], axis=1) - - else: - #branch1 - conv_dw_1 = self.conv_bn_layer( - input=input, - num_filters=inp, - filter_size=3, - stride=stride, - padding=1, - num_groups=inp, - if_act=False, - use_cudnn=False, - name='stage_' + name + '_conv4') - - conv_linear_1 = self.conv_bn_layer( - input=conv_dw_1, - num_filters=oup_inc, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name='stage_' + name + '_conv5') - - #branch2 - conv_pw_2 = self.conv_bn_layer( - input=input, - num_filters=oup_inc, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name='stage_' + name + '_conv1') - - conv_dw_2 = self.conv_bn_layer( - input=conv_pw_2, - num_filters=oup_inc, - filter_size=3, - stride=stride, - padding=1, - num_groups=oup_inc, - if_act=False, - use_cudnn=False, - name='stage_' + name + '_conv2') - - conv_linear_2 = self.conv_bn_layer( - input=conv_dw_2, - num_filters=oup_inc, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name='stage_' + name + '_conv3') - out = fluid.layers.concat([conv_linear_1, conv_linear_2], axis=1) - - return self.channel_shuffle(out, 2) - - -def ShuffleNetV2_x0_5_swish(): - model = ShuffleNetV2_swish(scale=0.5) - return model - - -def ShuffleNetV2_x1_0_swish(): - model = ShuffleNetV2_swish(scale=1.0) - return model - - -def ShuffleNetV2_x1_5_swish(): - model = ShuffleNetV2_swish(scale=1.5) - return model - - -def ShuffleNetV2_x2_0_swish(): - model = ShuffleNetV2_swish(scale=2.0) - return model diff --git a/ppcls/modeling/architectures/vgg.py b/ppcls/modeling/architectures/vgg.py index b439f267111fb93f1d4033bc6582156a86aae3c5..28845b3ec2d2dd22ab01ef3fba4ce478a922b764 100644 --- a/ppcls/modeling/architectures/vgg.py +++ b/ppcls/modeling/architectures/vgg.py @@ -106,7 +106,7 @@ class VGGNet(fluid.dygraph.Layer): x = self._conv_block_4(x) x = self._conv_block_5(x) - x = fluid.layers.flatten(x, axis=0) + x = fluid.layers.reshape(x, [0,-1]) x = self._fc1(x) x = self._drop(x) x = self._fc2(x) diff --git a/tools/eval.py b/tools/eval.py index 291f77f05d1361cc0bc75ebbe23afcff15987005..c458951811a503391601860ba5a83aacd6211eb8 100644 --- a/tools/eval.py +++ b/tools/eval.py @@ -19,13 +19,10 @@ from __future__ import print_function import os import argparse -import paddle.fluid as fluid - -import program - from ppcls.data import Reader from ppcls.utils.config import get_config from ppcls.utils.save_load import init_model +from ppcls.utils import logger from paddle.fluid.incubate.fleet.collective import fleet from paddle.fluid.incubate.fleet.base import role_maker @@ -45,37 +42,25 @@ def parse_args(): action='append', default=[], help='config options to be overridden') - args = parser.parse_args() return args def main(args): - role = role_maker.PaddleCloudRoleMaker(is_collective=True) - fleet.init(role) - - config = get_config(args.config, overrides=args.override, show=True) - gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) + # assign the place + gpu_id = fluid.dygraph.parallel.Env().dev_id place = fluid.CUDAPlace(gpu_id) - - startup_prog = fluid.Program() - valid_prog = fluid.Program() - valid_dataloader, valid_fetchs = program.build( - config, valid_prog, startup_prog, is_train=False) - valid_prog = valid_prog.clone(for_test=True) - - exe = fluid.Executor(place) - exe.run(startup_prog) - - init_model(config, valid_prog, exe) - - valid_reader = Reader(config, 'valid')() - valid_dataloader.set_sample_list_generator(valid_reader, place) - - compiled_valid_prog = program.compile(config, valid_prog) - program.run(valid_dataloader, exe, compiled_valid_prog, valid_fetchs, -1, - 'eval') - + with fluid.dygraph.guard(place): + pre_weights_dict = fluid.dygraph.load_dygraph(config.pretrained_model)[0] + strategy = fluid.dygraph.parallel.prepare_context() + net = program.create_model(config.ARCHITECTURE, config.classes_num) + net = fluid.dygraph.parallel.DataParallel(net, strategy) + net.set_dict(pre_weights_dict) + valid_dataloader = program.create_dataloader() + valid_reader = Reader(config, 'valid')() + valid_dataloader.set_sample_list_generator(valid_reader, place) + net.eval() + top1_acc = program.run(valid_dataloader, config, net, None, 0, 'valid') if __name__ == '__main__': args = parse_args() diff --git a/tools/feature_maps_visualization/download_resnet50_pretrained.sh b/tools/feature_maps_visualization/download_resnet50_pretrained.sh new file mode 100644 index 0000000000000000000000000000000000000000..286c2400ae6423dcb65a6341a65c5c1c7256c540 --- /dev/null +++ b/tools/feature_maps_visualization/download_resnet50_pretrained.sh @@ -0,0 +1,2 @@ +wget https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar +tar -xf ResNet50_pretrained.tar \ No newline at end of file diff --git a/tools/feature_maps_visualization/fm_vis.py b/tools/feature_maps_visualization/fm_vis.py new file mode 100644 index 0000000000000000000000000000000000000000..b389d833c947aa136791c13206b9c661b81a999e --- /dev/null +++ b/tools/feature_maps_visualization/fm_vis.py @@ -0,0 +1,94 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from resnet import ResNet50 +import paddle.fluid as fluid + +import numpy as np +import cv2 +import utils +import argparse + +def parse_args(): + def str2bool(v): + return v.lower() in ("true", "t", "1") + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--image_file", type=str) + parser.add_argument("-c", "--channel_num", type=int) + parser.add_argument("-p", "--pretrained_model", type=str) + parser.add_argument("--show", type=str2bool, default=False) + parser.add_argument("--interpolation", type=int, default=1) + parser.add_argument("--save_path", type=str) + parser.add_argument("--use_gpu", type=str2bool, default=True) + + return parser.parse_args() + +def create_operators(interpolation=1): + size = 224 + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + img_scale = 1.0 / 255.0 + + decode_op = utils.DecodeImage() + resize_op = utils.ResizeImage(resize_short=256, interpolation=interpolation) + crop_op = utils.CropImage(size=(size, size)) + normalize_op = utils.NormalizeImage( + scale=img_scale, mean=img_mean, std=img_std) + totensor_op = utils.ToTensor() + + return [decode_op, resize_op, crop_op, normalize_op, totensor_op] + + +def preprocess(fname, ops): + data = open(fname, 'rb').read() + for op in ops: + data = op(data) + + return data + +def main(): + args = parse_args() + operators = create_operators(args.interpolation) + # assign the place + if args.use_gpu: + gpu_id = fluid.dygraph.parallel.Env().dev_id + place = fluid.CUDAPlace(gpu_id) + else: + place = fluid.CPUPlace() + + #pre_weights_dict = fluid.load_program_state(args.pretrained_model) + with fluid.dygraph.guard(place): + net = ResNet50() + data = preprocess(args.image_file, operators) + data = np.expand_dims(data, axis=0) + data = fluid.dygraph.to_variable(data) + dy_weights_dict = net.state_dict() + pre_weights_dict_new = {} + for key in dy_weights_dict: + weights_name = dy_weights_dict[key].name + pre_weights_dict_new[key] = pre_weights_dict[weights_name] + net.set_dict(pre_weights_dict_new) + net.eval() + _, fm = net(data) + assert args.channel_num >= 0 and args.channel_num <= fm.shape[1], "the channel is out of the range, should be in {} but got {}".format([0, fm.shape[1]], args.channel_num) + fm = (np.squeeze(fm[0][args.channel_num].numpy())*255).astype(np.uint8) + if fm is not None: + if args.save: + cv2.imwrite(args.save_path, fm) + if args.show: + cv2.show(fm) + cv2.waitKey(0) + +if __name__ == "__main__": + main() diff --git a/tools/feature_maps_visualization/resnet.py b/tools/feature_maps_visualization/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..d3f230da6d7d7763638bf5cf7fa3c4b7a9d7b882 --- /dev/null +++ b/tools/feature_maps_visualization/resnet.py @@ -0,0 +1,215 @@ +import numpy as np +import argparse +import ast +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear +from paddle.fluid.dygraph.base import to_variable + +from paddle.fluid import framework + +import math +import sys +import time + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm(num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + name=None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name+"_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name+"_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name+"_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + self._num_channels_out = num_filters * 4 + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = fluid.layers.elementwise_add(x=short, y=conv2) + + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class ResNet(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=1000): + super(ResNet, self).__init__() + + self.layers = layers + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + self.fm = None + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + + self.conv = ConvBNLayer( + num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="conv1") + self.pool2d_max = Pool2D( + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + self.bottleneck_block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name="res"+str(block+2)+"a" + else: + conv_name="res"+str(block+2)+"b"+str(i) + else: + conv_name="res"+str(block+2)+chr(97+i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + name=conv_name)) + self.bottleneck_block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 4 * 1 * 1 + + stdv = 1.0 / math.sqrt(2048 * 1.0) + + self.out = Linear(self.pool2d_avg_output, + class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + def forward(self, inputs): + y = self.conv(inputs) + y = self.pool2d_max(y) + self.fm = y + for bottleneck_block in self.bottleneck_block_list: + y = bottleneck_block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output]) + y = self.out(y) + return y, self.fm + + +def ResNet50(**args): + model = ResNet(layers=50, **args) + return model + + +def ResNet101(**args): + model = ResNet(layers=101, **args) + return model + + +def ResNet152(**args): + model = ResNet(layers=152, **args) + return model + + +if __name__ == "__main__": + import numpy as np + place = fluid.CPUPlace() + with fluid.dygraph.guard(place): + model = ResNet50() + img = np.random.uniform(0, 255, [1, 3, 224, 224]).astype('float32') + img = fluid.dygraph.to_variable(img) + res = model(img) + print(res.shape) diff --git a/tools/feature_maps_visualization/utils.py b/tools/feature_maps_visualization/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7c70149327748b5060332364e03a25adec8c3401 --- /dev/null +++ b/tools/feature_maps_visualization/utils.py @@ -0,0 +1,85 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import numpy as np + + +class DecodeImage(object): + def __init__(self, to_rgb=True): + self.to_rgb = to_rgb + + def __call__(self, img): + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % ( + img.shape) + img = img[:, :, ::-1] + + return img + + +class ResizeImage(object): + def __init__(self, resize_short=None, interpolation=1): + self.resize_short = resize_short + self.interpolation = interpolation + + def __call__(self, img): + img_h, img_w = img.shape[:2] + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + return cv2.resize(img, (w, h), interpolation=self.interpolation) + + +class CropImage(object): + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class NormalizeImage(object): + def __init__(self, scale=None, mean=None, std=None): + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + return (img.astype('float32') * self.scale - self.mean) / self.std + + +class ToTensor(object): + def __init__(self): + pass + + def __call__(self, img): + img = img.transpose((2, 0, 1)) + return img diff --git a/tools/program.py b/tools/program.py index 34541043623ea9c4f78387488203f57e7fa8a0c7..55900b98599ecc15055f7368a9fea9b1430e852f 100644 --- a/tools/program.py +++ b/tools/program.py @@ -329,9 +329,13 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): feeds = create_feeds(batch, use_mix) fetchs = create_fetchs(feeds, net, config, mode) if mode == 'train': - avg_loss = net.scale_loss(fetchs['loss']) - avg_loss.backward() - net.apply_collective_grads() + if config["use_data_parallel"]: + avg_loss = net.scale_loss(fetchs['loss']) + avg_loss.backward() + net.apply_collective_grads() + else: + avg_loss = fetchs['loss'] + avg_loss.backward() optimizer.minimize(avg_loss) net.clear_gradients() diff --git a/tools/train.py b/tools/train.py index c244dd490297afa1132a794f4a0f3b85578c7408..976136e359f6631235cc009ac90e7ee9b72e594e 100644 --- a/tools/train.py +++ b/tools/train.py @@ -52,10 +52,14 @@ def main(args): gpu_id = fluid.dygraph.parallel.Env().dev_id place = fluid.CUDAPlace(gpu_id) + use_data_parallel = int(os.getenv("PADDLE_TRAINERS_NUM", 1)) != 1 + config["use_data_parallel"] = use_data_parallel + with fluid.dygraph.guard(place): - strategy = fluid.dygraph.parallel.prepare_context() net = program.create_model(config.ARCHITECTURE, config.classes_num) - net = fluid.dygraph.parallel.DataParallel(net, strategy) + if config["use_data_parallel"]: + strategy = fluid.dygraph.parallel.prepare_context() + net = fluid.dygraph.parallel.DataParallel(net, strategy) optimizer = program.create_optimizer( config, parameter_list=net.parameters()) @@ -79,7 +83,8 @@ def main(args): program.run(train_dataloader, config, net, optimizer, epoch_id, 'train') - if fluid.dygraph.parallel.Env().local_rank == 0: + if not config["use_data_parallel"] or fluid.dygraph.parallel.Env( + ).local_rank == 0: # 2. validate with validate dataset if config.validate and epoch_id % config.valid_interval == 0: net.eval() @@ -108,4 +113,4 @@ def main(args): if __name__ == '__main__': args = parse_args() - main(args) \ No newline at end of file + main(args)