From 2e8ad253e8949baaf9a3fbf5c3b2ba8bc33e85a5 Mon Sep 17 00:00:00 2001 From: Chang Xu Date: Mon, 19 Apr 2021 16:04:28 +0800 Subject: [PATCH] add_sync (#702) * add_syncbn --- paddleslim/nas/ofa/convert_super.py | 58 ++++++++++++++++++++++++++--- paddleslim/nas/ofa/layers.py | 41 +++++++++++++++++++- tests/test_ofa.py | 1 + tests/test_ofa_layers.py | 1 + 4 files changed, 94 insertions(+), 7 deletions(-) diff --git a/paddleslim/nas/ofa/convert_super.py b/paddleslim/nas/ofa/convert_super.py index 7b879449..41ce4980 100644 --- a/paddleslim/nas/ofa/convert_super.py +++ b/paddleslim/nas/ofa/convert_super.py @@ -29,7 +29,7 @@ if pd_ver == 185: Layer = paddle.fluid.dygraph.Layer else: import paddle.nn as nn - from paddle.nn import Conv2D, Conv2DTranspose, Linear, LayerNorm, Embedding + from paddle.nn import Conv2D, Conv2DTranspose, Linear, LayerNorm, Embedding, SyncBatchNorm from paddle import ParamAttr from .layers import * from . import layers @@ -287,6 +287,44 @@ class Convert: ) if pd_ver == 185 else layers.SuperBatchNorm2D(**new_attr_dict) model[idx] = layer + elif isinstance(layer, SyncBatchNorm) and ( + getattr(self.context, 'expand', None) != None or + getattr(self.context, 'channel', None) != None): + # num_features in SyncBatchNorm don't change after last weight operators + if idx > last_weight_layer_idx: + continue + + attr_dict = layer.__dict__ + new_attr_name = ['momentum', 'epsilon', 'bias_attr'] + + new_attr_name += ['weight_attr', 'data_format', 'name'] + + self._change_name(layer, pd_ver) + new_attr_dict = dict.fromkeys(new_attr_name, None) + new_attr_dict['num_features'] = None + + new_key = 'num_channels' if 'num_channels' in new_attr_dict.keys( + ) else 'num_features' + + if self.context.expand: + new_attr_dict[new_key] = int( + self.context.expand * + layer._parameters['weight'].shape[0]) + elif self.context.channel: + new_attr_dict[new_key] = max(cur_channel) + else: + new_attr_dict[new_key] = attr_dict[ + '_num_channels'] if '_num_channels' in attr_dict.keys( + ) else attr_dict['_num_features'] + + for attr in new_attr_name: + new_attr_dict[attr] = attr_dict['_' + attr] + + del layer, attr_dict + + layer = layers.SuperSyncBatchNorm(**new_attr_dict) + model[idx] = layer + ### assume output_size = None, filter_size != None ### NOTE: output_size != None may raise error, solve when it happend. elif isinstance(layer, Conv2DTranspose): @@ -651,13 +689,23 @@ class Convert: raise NotImplementedError("name error") return net + def get_split_names(layer, name_list): + if name_list: + self.name_list.append(name_list) + for _, (name, sublayer) in enumerate(layer.named_children()): + if sublayer.named_children(): + get_split_names(sublayer, name_list + [name]) + if isinstance(network, Layer): - for idx, (name, sublayer) in enumerate(network.named_sublayers()): - if len(name.split('.')) > 1: - net = split_prefix(network, name.split('.')[:-1]) + curr_id = 0 + self.name_list = [] + get_split_names(network, []) + for idx, nl in enumerate(self.name_list): + if len(nl) > 1: + net = split_prefix(network, nl[:-1]) else: net = network - setattr(net, name.split('.')[-1], model[idx]) + setattr(net, nl[-1], model[idx]) return network diff --git a/paddleslim/nas/ofa/layers.py b/paddleslim/nas/ofa/layers.py index 18214213..0f69647d 100644 --- a/paddleslim/nas/ofa/layers.py +++ b/paddleslim/nas/ofa/layers.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,7 +29,8 @@ __all__ = [ 'SuperConv2D', 'SuperConv2DTranspose', 'SuperSeparableConv2D', 'SuperBatchNorm2D', 'SuperLinear', 'SuperInstanceNorm2D', 'SuperGroupConv2D', 'SuperDepthwiseConv2D', 'SuperGroupConv2DTranspose', - 'SuperDepthwiseConv2DTranspose', 'SuperLayerNorm', 'SuperEmbedding' + 'SuperDepthwiseConv2DTranspose', 'SuperLayerNorm', 'SuperEmbedding', + 'SuperSyncBatchNorm' ] _logger = get_logger(__name__, level=logging.INFO) @@ -956,6 +957,42 @@ class SuperBatchNorm2D(nn.BatchNorm2D): data_format=self._data_format) +class SuperSyncBatchNorm(nn.SyncBatchNorm): + def __init__(self, + num_features, + momentum=0.9, + epsilon=1e-05, + weight_attr=None, + bias_attr=None, + data_format='NCHW', + name=None): + super(SuperSyncBatchNorm, + self).__init__(num_features, momentum, epsilon, weight_attr, + bias_attr, data_format, name) + + def forward(self, input): + + feature_dim = int(input.shape[1]) + + weight = self.weight[:feature_dim] + bias = self.bias[:feature_dim] + mean = self._mean[:feature_dim] + variance = self._variance[:feature_dim] + + mean_out = mean + # variance and variance out share the same memory + variance_out = variance + + attrs = ("momentum", self._momentum, "epsilon", self._epsilon, + "is_test", not self.training, "data_layout", self._data_format, + "use_mkldnn", False, "fuse_with_relu", False, + "use_global_stats", False, 'trainable_statistics', False) + sync_batch_norm_out, _, _, _, _, _ = core.ops.sync_batch_norm( + input, weight, bias, mean, variance, mean_out, variance_out, *attrs) + + return sync_batch_norm_out + + class SuperInstanceNorm2D(nn.InstanceNorm2D): """ This interface is used to construct a callable object of the ``SuperBatchNorm2D`` class. diff --git a/tests/test_ofa.py b/tests/test_ofa.py index 4d77ad1e..8a314ab3 100644 --- a/tests/test_ofa.py +++ b/tests/test_ofa.py @@ -37,6 +37,7 @@ class ModelConv(nn.Layer): models = [] models += [nn.Conv2D(3, 4, 3, padding=1)] models += [nn.InstanceNorm2D(4)] + models += [nn.SyncBatchNorm(4)] models += [ReLU()] models += [nn.Conv2D(4, 4, 3, groups=4)] models += [nn.InstanceNorm2D(4)] diff --git a/tests/test_ofa_layers.py b/tests/test_ofa_layers.py index 78676be5..41137544 100644 --- a/tests/test_ofa_layers.py +++ b/tests/test_ofa_layers.py @@ -30,6 +30,7 @@ class ModelCase1(nn.Layer): super(ModelCase1, self).__init__() models = [SuperConv2D(3, 4, 3, bias_attr=False)] models += [SuperConv2D(4, 4, 3, groups=4)] + models += [SuperSyncBatchNorm(4)] models += [SuperConv2D(4, 4, 3, groups=2)] models += [SuperConv2DTranspose(4, 4, 3, bias_attr=False)] models += [SuperConv2DTranspose(4, 4, 3, groups=4)] -- GitLab