diff --git a/PaddleCV/image_classification/README.md b/PaddleCV/image_classification/README.md index 507932b40d184ee352a602598244cf1523393234..96468b456042d46bbca2011fcd92d13b739ab2d6 100644 --- a/PaddleCV/image_classification/README.md +++ b/PaddleCV/image_classification/README.md @@ -377,7 +377,7 @@ Mixup相关介绍参考[mixup: Beyond Empirical Risk Minimization](https://arxiv 节点数*卡数|吞吐|加速比|test\_acc1|test\_acc5 ---|---|---|---|--- -1*1|1035 ins/s|1|0.75333|0.92702 +1*1|1035 ins/s|1|0.75333|0.92702 1*8|7840 ins/s|7.57|0.75603|0.92771 2*8|14277 ins/s|13.79|0.75872|0.92793 4*8|28594 ins/s|27.63|0.75253|0.92713 @@ -751,6 +751,18 @@ python -m paddle.distributed.launch train.py \ |[HRNet_W48_C](https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W48_C_pretrained.tar) | 78.95% | 94.42% | 30.064 | 19.963 | |[HRNet_W64_C](https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W64_C_pretrained.tar) | 79.30% | 94.61% | 38.921 | 24.742 | + +### ResNet_ACNet Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[ResNet50_ACNet](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_ACNet_pretrained.tar)1 | 76.71% | 93.24% | 13.205 | 8.804 | +|[ResNet50_ACNet](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_ACNet_deploy_pretrained.tar)2 | 76.71% | 93.24% | 7.418 | 5.950 | + +* 注: + * `1`. 不对训练模型结果进行参数转换,进行评估。 + * `2`. 使用`sh ./utils/acnet/convert_model.sh`命令对训练模型结果进行参数转换,并设置`deploy mode=True`,进行评估。 + * `./utils/acnet/convert_model.sh`包含4个参数,分别是模型名称、输入的模型地址、输出的模型地址以及类别数量。 + ## FAQ **Q:** 加载预训练模型报错,Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:1000 != labels_dims[1]:6. @@ -793,6 +805,7 @@ python -m paddle.distributed.launch train.py \ - Res2Net: [Res2Net: A New Multi-scale Backbone Architecture](https://arxiv.org/abs/1904.01169), Shang-Hua Gao, Ming-Ming Cheng, Kai Zhao, Xin-Yu Zhang, Ming-Hsuan Yang, Philip Torr - HRNet: [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/abs/1908.07919), Jingdong Wang, Ke Sun, Tianheng Cheng, Borui Jiang, Chaorui Deng, Yang Zhao, Dong Liu, Yadong Mu, Mingkui Tan, Xinggang Wang, Wenyu Liu, Bin Xiao - DARTS: [DARTS: Differentiable Architecture Search](https://arxiv.org/pdf/1806.09055.pdf), Hanxiao Liu, Karen Simonyan, Yiming Yang +- ACNet: [ACNet: Strengthening the Kernel Skeletons for Powerful CNN via Asymmetric Convolution Blocks](https://arxiv.org/abs/1908.03930), Xiaohan Ding, Yuchen Guo, Guiguang Ding, Jungong Han ## 版本更新 - 2018/12/03 **Stage1**: 更新AlexNet,ResNet50,ResNet101,MobileNetV1 diff --git a/PaddleCV/image_classification/README_en.md b/PaddleCV/image_classification/README_en.md index 437cd1ba01352650437e0cdbda2dcdb406568708..1baa86d963ccf8f3df39095085ba2fdd2eb796fb 100644 --- a/PaddleCV/image_classification/README_en.md +++ b/PaddleCV/image_classification/README_en.md @@ -265,7 +265,7 @@ Performing on Tesla V100 single machine with 8 cards, two machines with 16 cards nodes*crads|throughput|speedup|test\_acc1|test\_acc5 ---|---|---|---|--- -1*1|1035 ins/s|1|0.75333|0.92702 +1*1|1035 ins/s|1|0.75333|0.92702 1*8|7840 ins/s|7.57|0.75603|0.92771 2*8|14277 ins/s|13.79|0.75872|0.92793 4*8|28594 ins/s|27.63|0.75253|0.92713 @@ -627,6 +627,18 @@ Pretrained models can be downloaded by clicking related model names. |[HRNet_W48_C](https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W48_C_pretrained.tar) | 78.95% | 94.42% | 30.064 | 19.963 | |[HRNet_W64_C](https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W64_C_pretrained.tar) | 79.30% | 94.61% | 38.921 | 24.742 | +### ResNet_ACNet Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[ResNet50_ACNet](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_ACNet_pretrained.tar)1 | 76.71% | 93.24% | 13.205 | 8.804 | +|[ResNet50_ACNet](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_ACNet_deploy_pretrained.tar)2 | 76.71% | 93.24% | 7.418 | 5.950 | + +* Note: + * `1`. deploy mode is set as False to eval. + * `2`. Use `sh ./utils/acnet/convert_model.sh` to convert to trained model, and set deploy mode as True to eval. + * `./utils/acnet/convert_model.sh` contains 4 parmeters, which are model name, input model directory, output model directory and class number. + + ## FAQ **Q:** How to solve this problem when I try to train a 6-classes dataset with indicating pretrained_model parameter ? @@ -661,9 +673,12 @@ Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:100 - EfficientNet: [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946), Mingxing Tan, Quoc V. Le - Res2Net: [Res2Net: A New Multi-scale Backbone Architecture](https://arxiv.org/abs/1904.01169), Shang-Hua Gao, Ming-Ming Cheng, Kai Zhao, Xin-Yu Zhang, Ming-Hsuan Yang, Philip Torr - HRNet: [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/abs/1908.07919), Jingdong Wang, Ke Sun, Tianheng Cheng, Borui Jiang, Chaorui Deng, Yang Zhao, Dong Liu, Yadong Mu, Mingkui Tan, Xinggang Wang, Wenyu Liu, Bin Xiao +- DARTS: [DARTS: Differentiable Architecture Search](https://arxiv.org/pdf/1806.09055.pdf), Hanxiao Liu, Karen Simonyan, Yiming Yang +- ACNet: [ACNet: Strengthening the Kernel Skeletons for Powerful CNN via Asymmetric Convolution Blocks](https://arxiv.org/abs/1908.03930), Xiaohan Ding, Yuchen Guo, Guiguang Ding, Jungong Han -## Update + +## Update - 2018/12/03 **Stage1**: Update AlexNet, ResNet50, ResNet101, MobileNetV1 - 2018/12/23 **Stage2**: Update VGG Series, SeResNeXt50_32x4d, SeResNeXt101_32x4d, ResNet152 - 2019/01/31 Update MobileNetV2_x1_0 diff --git a/PaddleCV/image_classification/eval.py b/PaddleCV/image_classification/eval.py index 7de1f09abc849f55c1476aa79dfdcb91d333a828..8d592006fda2d9af01daee3f0a6e32e1865a4cf3 100644 --- a/PaddleCV/image_classification/eval.py +++ b/PaddleCV/image_classification/eval.py @@ -52,6 +52,7 @@ add_arg('use_se', bool, True, "Whether to use Squeeze- add_arg('save_json_path', str, None, "Whether to save output in json file.") add_arg('same_feed', int, 0, "Whether to feed same images") add_arg('print_step', int, 1, "the batch step to print info") +add_arg('deploy', bool, False, "deploy mode, currently used in ACNet") # yapf: enable logging.basicConfig(level=logging.INFO) @@ -84,6 +85,8 @@ def eval(args): model = models.__dict__[args.model](is_test=True, padding_type=args.padding_type, use_se=args.use_se) + elif "ACNet" in args.model: + model = models.__dict__[args.model](deploy=args.deploy) else: model = models.__dict__[args.model]() diff --git a/PaddleCV/image_classification/models/__init__.py b/PaddleCV/image_classification/models/__init__.py index bb12a824d203f5f5698c62f7aa0ba381ca644405..63e6220688306b55d6dfe9369b4176c232314282 100644 --- a/PaddleCV/image_classification/models/__init__.py +++ b/PaddleCV/image_classification/models/__init__.py @@ -42,3 +42,4 @@ from .res2net import Res2Net50_48w_2s, Res2Net50_26w_4s, Res2Net50_14w_8s, Res2N from .res2net_vd import Res2Net50_vd_48w_2s, Res2Net50_vd_26w_4s, Res2Net50_vd_14w_8s, Res2Net50_vd_26w_6s, Res2Net50_vd_26w_8s, Res2Net101_vd_26w_4s, Res2Net152_vd_26w_4s, Res2Net200_vd_26w_4s from .hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W18_C, SE_HRNet_W30_C, SE_HRNet_W32_C, SE_HRNet_W40_C, SE_HRNet_W44_C, SE_HRNet_W48_C, SE_HRNet_W60_C, SE_HRNet_W64_C from .autodl import DARTS_6M, DARTS_4M +from .resnet_acnet import ResNet18_ACNet, ResNet34_ACNet, ResNet50_ACNet, ResNet101_ACNet, ResNet152_ACNet diff --git a/PaddleCV/image_classification/models/resnet_acnet.py b/PaddleCV/image_classification/models/resnet_acnet.py new file mode 100644 index 0000000000000000000000000000000000000000..575603382a2f8676d43d51bbbbe70c499c442b46 --- /dev/null +++ b/PaddleCV/image_classification/models/resnet_acnet.py @@ -0,0 +1,332 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "ResNet_ACNet", "ResNet18_ACNet", "ResNet34_ACNet", "ResNet50_ACNet", + "ResNet101_ACNet", "ResNet152_ACNet" +] + + +class ResNetACNet(object): + """ ACNet """ + + def __init__(self, layers=50, deploy=False): + """init""" + self.layers = layers + self.deploy = deploy + + def net(self, input, class_dim=1000): + """model""" + layers = self.layers + supported_layers = [18, 34, 50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters = [64, 128, 256, 512] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="conv1") + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + name=conv_name) + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + is_first=block == i == 0, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_size=7, pool_type='avg', global_pooling=True) + + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv))) + return out + + def conv_bn_layer(self, **kwargs): + """ + conv_bn_layer + """ + if kwargs['filter_size'] == 1: + return self.conv_bn_layer_ori(**kwargs) + else: + return self.conv_bn_layer_ac(**kwargs) + + # conv bn+relu + def conv_bn_layer_ori(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + """ + standard convbn + used for 1x1 convbn in acnet + """ + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.conv2d.output.1') + + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', ) + + # conv bn+relu + def conv_bn_layer_ac(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + """ ACNet conv bn """ + padding = (filter_size - 1) // 2 + + square_conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + act=act if self.deploy else None, + param_attr=ParamAttr(name=name + "_acsquare_weights"), + bias_attr=ParamAttr(name=name + "_acsquare_bias") + if self.deploy else None, + name=name + '.acsquare.conv2d.output.1') + + if self.deploy: + return square_conv + else: + ver_conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=(filter_size, 1), + stride=stride, + padding=(padding, 0), + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_acver_weights"), + bias_attr=False, + name=name + '.acver.conv2d.output.1') + + hor_conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=(1, filter_size), + stride=stride, + padding=(0, padding), + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_achor_weights"), + bias_attr=False, + name=name + '.achor.conv2d.output.1') + + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + + square_bn = fluid.layers.batch_norm( + input=square_conv, + act=None, + name=bn_name + '.acsquare.output.1', + param_attr=ParamAttr(name=bn_name + '_acsquare_scale'), + bias_attr=ParamAttr(bn_name + '_acsquare_offset'), + moving_mean_name=bn_name + '_acsquare_mean', + moving_variance_name=bn_name + '_acsquare_variance', ) + + ver_bn = fluid.layers.batch_norm( + input=ver_conv, + act=None, + name=bn_name + '.acver.output.1', + param_attr=ParamAttr(name=bn_name + '_acver_scale'), + bias_attr=ParamAttr(bn_name + '_acver_offset'), + moving_mean_name=bn_name + '_acver_mean', + moving_variance_name=bn_name + '_acver_variance', ) + + hor_bn = fluid.layers.batch_norm( + input=hor_conv, + act=None, + name=bn_name + '.achor.output.1', + param_attr=ParamAttr(name=bn_name + '_achor_scale'), + bias_attr=ParamAttr(bn_name + '_achor_offset'), + moving_mean_name=bn_name + '_achor_mean', + moving_variance_name=bn_name + '_achor_variance', ) + + return fluid.layers.elementwise_add( + x=square_bn, y=ver_bn + hor_bn, act=act) + + def shortcut(self, input, ch_out, stride, is_first, name): + """ shortcut """ + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1 or is_first == True: + return self.conv_bn_layer( + input=input, + num_filters=ch_out, + filter_size=1, + stride=stride, + name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name): + """" bottleneck_block """ + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, + num_filters * 4, + stride, + is_first=False, + name=name + "_branch1") + + return fluid.layers.elementwise_add( + x=short, y=conv2, act='relu', name=name + ".add.output.5") + + def basic_block(self, input, num_filters, stride, is_first, name): + """ basic_block """ + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=3, + act='relu', + stride=stride, + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + short = self.shortcut( + input, num_filters, stride, is_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + +def ResNet18_ACNet(deploy=False): + """ResNet18 + ACNet""" + model = ResNet_ACNet(layers=18, deploy=deploy) + return model + + +def ResNet34_ACNet(deploy=False): + """ResNet34 + ACNet""" + model = ResNetACNet(layers=34, deploy=deploy) + return model + + +def ResNet50_ACNet(deploy=False): + """ResNet50 + ACNet""" + model = ResNetACNet(layers=50, deploy=deploy) + return model + + +def ResNet101_ACNet(deploy=False): + """ResNet101 + ACNet""" + model = ResNetACNet(layers=101, deploy=deploy) + return model + + +def ResNet152_ACNet(deploy=False): + """ResNet152 + ACNet""" + model = ResNetACNet(layers=152, deploy=deploy) + return model diff --git a/PaddleCV/image_classification/scripts/train/ResNet50_ACNet.sh b/PaddleCV/image_classification/scripts/train/ResNet50_ACNet.sh new file mode 100644 index 0000000000000000000000000000000000000000..4c83be5f773ea817d94339bcb04960458a94e727 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet50_ACNet.sh @@ -0,0 +1,15 @@ +##Training details +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#ResNet50: +python train.py \ + --model=ResNet50_ACNet.sh \ + --batch_size=256 \ + --model_save_dir=output/ \ + --lr_strategy=piecewise_decay \ + --num_epochs=120 \ + --lr=0.1 \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/utils/acnet/convert_model.sh b/PaddleCV/image_classification/utils/acnet/convert_model.sh new file mode 100644 index 0000000000000000000000000000000000000000..20f43b16d5a8a0d3bd16b07c6ba26499f275ca30 --- /dev/null +++ b/PaddleCV/image_classification/utils/acnet/convert_model.sh @@ -0,0 +1,5 @@ +python utils/acnet/weights_aggregator.py \ + ResNet50ACNet \ + ./ResNet50ACNet_pretrained \ + ./ResNet50ACNet_pretrained_after_fuse \ + 1000 diff --git a/PaddleCV/image_classification/utils/acnet/weights_aggregator.py b/PaddleCV/image_classification/utils/acnet/weights_aggregator.py new file mode 100644 index 0000000000000000000000000000000000000000..cb2db23f3337b745ddd4d0e18987782a2294ed0c --- /dev/null +++ b/PaddleCV/image_classification/utils/acnet/weights_aggregator.py @@ -0,0 +1,198 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +import sys +import os +import shutil +import logging + +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +import models + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def get_ac_tensor(name): + gamma = fluid.global_scope().find_var(name + '_scale').get_tensor() + beta = fluid.global_scope().find_var(name + '_offset').get_tensor() + mean = fluid.global_scope().find_var(name + '_mean').get_tensor() + var = fluid.global_scope().find_var(name + '_variance').get_tensor() + return gamma, beta, mean, var + + +def get_kernel_bn_tensors(name): + if "conv1" in name: + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + + ac_square = fluid.global_scope().find_var(name + + "_acsquare_weights").get_tensor() + ac_ver = fluid.global_scope().find_var(name + "_acver_weights").get_tensor() + ac_hor = fluid.global_scope().find_var(name + "_achor_weights").get_tensor() + + ac_square_bn_gamma, ac_square_bn_beta, ac_square_bn_mean, ac_square_bn_var = \ + get_ac_tensor(bn_name + '_acsquare') + ac_ver_bn_gamma, ac_ver_bn_beta, ac_ver_bn_mean, ac_ver_bn_var = \ + get_ac_tensor(bn_name + '_acver') + ac_hor_bn_gamma, ac_hor_bn_beta, ac_hor_bn_mean, ac_hor_bn_var = \ + get_ac_tensor(bn_name + '_achor') + + kernels = [np.array(ac_square), np.array(ac_ver), np.array(ac_hor)] + gammas = [ + np.array(ac_square_bn_gamma), np.array(ac_ver_bn_gamma), + np.array(ac_hor_bn_gamma) + ] + betas = [ + np.array(ac_square_bn_beta), np.array(ac_ver_bn_beta), + np.array(ac_hor_bn_beta) + ] + means = [ + np.array(ac_square_bn_mean), np.array(ac_ver_bn_mean), + np.array(ac_hor_bn_mean) + ] + var = [ + np.array(ac_square_bn_var), np.array(ac_ver_bn_var), + np.array(ac_hor_bn_var) + ] + + return {"kernels": kernels, "bn": (gammas, betas, means, var)} + + +def kernel_fusion(kernels, gammas, betas, means, var): + """fuse conv + BN""" + kernel_size_h, kernel_size_w = kernels[0].shape[2:] + + square = (gammas[0] / (var[0] + 1e-5) + **0.5).reshape(-1, 1, 1, 1) * kernels[0] + ver = (gammas[1] / (var[1] + 1e-5)**0.5).reshape(-1, 1, 1, 1) * kernels[1] + hor = (gammas[2] / (var[2] + 1e-5)**0.5).reshape(-1, 1, 1, 1) * kernels[2] + + b = 0 + for i in range(3): + b += -((means[i] * gammas[i]) / (var[i] + 1e-5)**0.5) + betas[i] # eq.7 + + square[:, :, :, kernel_size_w // 2:kernel_size_w // 2 + 1] += ver + square[:, :, kernel_size_h // 2:kernel_size_h // 2 + 1, :] += hor + + return square, b + + +def convert_main(model_name, input_path, output_path, class_num=1000): + model = models.__dict__[model_name]() + + main_prog = fluid.Program() + acnet_prog = fluid.Program() + startup_prog = fluid.Program() + + with fluid.program_guard(acnet_prog, startup_prog): + with fluid.unique_name.guard(): + image = fluid.data( + name="image", + shape=[-1, 3, 224, 224], + dtype="float32", + lod_level=0) + model_train = models.__dict__[model_name](deploy=False) + model_train.net(image, class_dim=1000) + + with fluid.program_guard(main_prog, startup_prog): + with fluid.unique_name.guard(): + image = fluid.data( + name="image", + shape=[-1, 3, 224, 224], + dtype="float32", + lod_level=0) + model_infer = models.__dict__[model_name](deploy=True) + model_infer.net(image, class_dim=1000) + + acnet_prog = acnet_prog.clone(for_test=True) + main_prog = main_prog.clone(for_test=True) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(startup_prog) + + assert os.path.exists( + input_path), "Pretrained model path {} not exist!".format(input_path) + fluid.io.load_vars(exe, input_path, + main_program=acnet_prog, + predicate=lambda var: os.path.exists(os.path.join(input_path, var.name))) + + mapping = {} + + for param in main_prog.blocks[0].all_parameters(): + if "acsquare" in param.name: + name_root = "_".join(param.name.split("_")[:-2]) + if name_root in mapping.keys(): + mapping[name_root].append(param.name) + else: + mapping[name_root] = [param.name] + else: + assert param.name not in mapping.keys() + mapping[param.name] = [param.name] + + for name_root, names in mapping.items(): + if len(names) == 1: + pass + else: + if "bias" in names[0]: + bias_id = 0 + kernel_id = 1 + else: + bias_id = 1 + kernel_id = 0 + + tensor_bias = fluid.global_scope().find_var(names[ + bias_id]).get_tensor() + tensor_kernel = fluid.global_scope().find_var(names[ + kernel_id]).get_tensor() + + ret = get_kernel_bn_tensors(name_root) + kernels = ret['kernels'] + gammas, betas, means, var = ret['bn'] + + kernel, bias = kernel_fusion(kernels, gammas, betas, means, var) + + logger.info("Before {}: {}".format(names[ + kernel_id], np.array(tensor_kernel).ravel()[:5])) + + tensor_bias.set(bias, place) + tensor_kernel.set(kernel, place) + + logger.info("After {}: {}\n".format(names[ + kernel_id], np.array(tensor_kernel).ravel()[:5])) + + if os.path.isdir(output_path): + shutil.rmtree(output_path) + os.makedirs(output_path) + fluid.io.save_persistables(exe, output_path, main_program=main_prog) + + +if __name__ == "__main__": + assert len( + sys.argv + ) == 5, "input format: python weights_aggregator.py $model_name $input_path $output_path $class_num" + model_name = sys.argv[1] + input_path = sys.argv[2] + output_path = sys.argv[3] + class_num = int(sys.argv[4]) + logger.info("model_name: {}".format(model_name)) + logger.info("input_path: {}".format(input_path)) + logger.info("output_path: {}".format(output_path)) + logger.info("class_num: {}".format(class_num)) + convert_main(model_name, input_path, output_path, class_num)