add_sync (#702)

* add_syncbn

add_sync (#702)
* add_syncbn
2e8ad253 · Chang Xu · GitHub · e7a02b5c · 2e8ad253 · 2e8ad253
4 changed file
--- a/paddleslim/nas/ofa/convert_super.py
+++ b/paddleslim/nas/ofa/convert_super.py
@@ -29,7 +29,7 @@ if pd_ver == 185:
    Layer = paddle.fluid.dygraph.Layer
 else:
    import paddle.nn as nn
-    from paddle.nn import Conv2D, Conv2DTranspose, Linear, LayerNorm, Embedding
+    from paddle.nn import Conv2D, Conv2DTranspose, Linear, LayerNorm, Embedding, SyncBatchNorm
    from paddle import ParamAttr
    from .layers import *
    from . import layers
@@ -287,6 +287,44 @@ class Convert:
                ) if pd_ver == 185 else layers.SuperBatchNorm2D(**new_attr_dict)
                model[idx] = layer

+            elif isinstance(layer, SyncBatchNorm) and (
+                    getattr(self.context, 'expand', None) != None or
+                    getattr(self.context, 'channel', None) != None):
+                # num_features in SyncBatchNorm don't change after last weight operators
+                if idx > last_weight_layer_idx:
+                    continue
+
+                attr_dict = layer.__dict__
+                new_attr_name = ['momentum', 'epsilon', 'bias_attr']
+
+                new_attr_name += ['weight_attr', 'data_format', 'name']
+
+                self._change_name(layer, pd_ver)
+                new_attr_dict = dict.fromkeys(new_attr_name, None)
+                new_attr_dict['num_features'] = None
+
+                new_key = 'num_channels' if 'num_channels' in new_attr_dict.keys(
+                ) else 'num_features'
+
+                if self.context.expand:
+                    new_attr_dict[new_key] = int(
+                        self.context.expand *
+                        layer._parameters['weight'].shape[0])
+                elif self.context.channel:
+                    new_attr_dict[new_key] = max(cur_channel)
+                else:
+                    new_attr_dict[new_key] = attr_dict[
+                        '_num_channels'] if '_num_channels' in attr_dict.keys(
+                        ) else attr_dict['_num_features']
+
+                for attr in new_attr_name:
+                    new_attr_dict[attr] = attr_dict['_' + attr]
+
+                del layer, attr_dict
+
+                layer = layers.SuperSyncBatchNorm(**new_attr_dict)
+                model[idx] = layer
+
            ### assume output_size = None, filter_size != None
            ### NOTE: output_size != None may raise error, solve when it happend. 
            elif isinstance(layer, Conv2DTranspose):
@@ -651,13 +689,23 @@ class Convert:
                raise NotImplementedError("name error")
            return net

+        def get_split_names(layer, name_list):
+            if name_list:
+                self.name_list.append(name_list)
+            for _, (name, sublayer) in enumerate(layer.named_children()):
+                if sublayer.named_children():
+                    get_split_names(sublayer, name_list + [name])
+
        if isinstance(network, Layer):
-            for idx, (name, sublayer) in enumerate(network.named_sublayers()):
-                if len(name.split('.')) > 1:
-                    net = split_prefix(network, name.split('.')[:-1])
+            curr_id = 0
+            self.name_list = []
+            get_split_names(network, [])
+            for idx, nl in enumerate(self.name_list):
+                if len(nl) > 1:
+                    net = split_prefix(network, nl[:-1])
                else:
                    net = network
-                setattr(net, name.split('.')[-1], model[idx])
+                setattr(net, nl[-1], model[idx])

        return network


--- a/paddleslim/nas/ofa/layers.py
+++ b/paddleslim/nas/ofa/layers.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -29,7 +29,8 @@ __all__ = [
    'SuperConv2D', 'SuperConv2DTranspose', 'SuperSeparableConv2D',
    'SuperBatchNorm2D', 'SuperLinear', 'SuperInstanceNorm2D',
    'SuperGroupConv2D', 'SuperDepthwiseConv2D', 'SuperGroupConv2DTranspose',
-    'SuperDepthwiseConv2DTranspose', 'SuperLayerNorm', 'SuperEmbedding'
+    'SuperDepthwiseConv2DTranspose', 'SuperLayerNorm', 'SuperEmbedding',
+    'SuperSyncBatchNorm'
 ]

 _logger = get_logger(__name__, level=logging.INFO)
@@ -956,6 +957,42 @@ class SuperBatchNorm2D(nn.BatchNorm2D):
            data_format=self._data_format)


+class SuperSyncBatchNorm(nn.SyncBatchNorm):
+    def __init__(self,
+                 num_features,
+                 momentum=0.9,
+                 epsilon=1e-05,
+                 weight_attr=None,
+                 bias_attr=None,
+                 data_format='NCHW',
+                 name=None):
+        super(SuperSyncBatchNorm,
+              self).__init__(num_features, momentum, epsilon, weight_attr,
+                             bias_attr, data_format, name)
+
+    def forward(self, input):
+
+        feature_dim = int(input.shape[1])
+
+        weight = self.weight[:feature_dim]
+        bias = self.bias[:feature_dim]
+        mean = self._mean[:feature_dim]
+        variance = self._variance[:feature_dim]
+
+        mean_out = mean
+        # variance and variance out share the same memory
+        variance_out = variance
+
+        attrs = ("momentum", self._momentum, "epsilon", self._epsilon,
+                 "is_test", not self.training, "data_layout", self._data_format,
+                 "use_mkldnn", False, "fuse_with_relu", False,
+                 "use_global_stats", False, 'trainable_statistics', False)
+        sync_batch_norm_out, _, _, _, _, _ = core.ops.sync_batch_norm(
+            input, weight, bias, mean, variance, mean_out, variance_out, *attrs)
+
+        return sync_batch_norm_out
+
+
 class SuperInstanceNorm2D(nn.InstanceNorm2D):
    """
    This interface is used to construct a callable object of the ``SuperBatchNorm2D`` class. 

--- a/tests/test_ofa.py
+++ b/tests/test_ofa.py
@@ -37,6 +37,7 @@ class ModelConv(nn.Layer):
            models = []
            models += [nn.Conv2D(3, 4, 3, padding=1)]
            models += [nn.InstanceNorm2D(4)]
+            models += [nn.SyncBatchNorm(4)]
            models += [ReLU()]
            models += [nn.Conv2D(4, 4, 3, groups=4)]
            models += [nn.InstanceNorm2D(4)]

--- a/tests/test_ofa_layers.py
+++ b/tests/test_ofa_layers.py
@@ -30,6 +30,7 @@ class ModelCase1(nn.Layer):
        super(ModelCase1, self).__init__()
        models = [SuperConv2D(3, 4, 3, bias_attr=False)]
        models += [SuperConv2D(4, 4, 3, groups=4)]
+        models += [SuperSyncBatchNorm(4)]
        models += [SuperConv2D(4, 4, 3, groups=2)]
        models += [SuperConv2DTranspose(4, 4, 3, bias_attr=False)]
        models += [SuperConv2DTranspose(4, 4, 3, groups=4)]