diff --git a/paddleslim/nas/ofa/convert_super.py b/paddleslim/nas/ofa/convert_super.py index 12622da55d367fa1a73235d9896ecf742d75c0d7..66e327a9abd1f9c23227d537f99b5d52020d21b2 100644 --- a/paddleslim/nas/ofa/convert_super.py +++ b/paddleslim/nas/ofa/convert_super.py @@ -23,12 +23,14 @@ pd_ver = get_paddle_version() if pd_ver == 185: import paddle.fluid.dygraph.nn as nn from paddle.fluid.dygraph.nn import Conv2D, Conv2DTranspose, Linear, LayerNorm, Embedding + from paddle.fluid import ParamAttr from .layers import * from . import layers Layer = paddle.fluid.dygraph.Layer else: import paddle.nn as nn from paddle.nn import Conv2D, Conv2DTranspose, Linear, LayerNorm, Embedding + from paddle import ParamAttr from .layers_new import * from . import layers_new as layers Layer = paddle.nn.Layer @@ -44,6 +46,22 @@ class Convert: def __init__(self, context): self.context = context + def _change_name(self, layer, pd_ver, has_bias=True, conv=False): + if conv: + w_attr = layer._param_attr + else: + w_attr = layer._param_attr if pd_ver == 185 else layer._weight_attr + + if isinstance(w_attr, ParamAttr): + if w_attr != None and not isinstance(w_attr, bool): + w_attr.name = 'super_' + w_attr.name + + if has_bias: + if isinstance(layer._bias_attr, ParamAttr): + if layer._bias_attr != None and not isinstance(layer._bias_attr, + bool): + layer._bias_attr.name = 'super_' + layer._bias_attr.name + def convert(self, network): # search the first and last weight layer, don't change out channel of the last weight layer # don't change in channel of the first weight layer @@ -88,6 +106,7 @@ class Convert: 'weight_attr', 'data_format', 'padding_mode' ] + self._change_name(layer, pd_ver, conv=True) new_attr_dict = dict.fromkeys(new_attr_name, None) new_attr_dict['candidate_config'] = dict() if pd_ver == 185: @@ -104,7 +123,7 @@ class Convert: fks = '_filter_size' if '_filter_size' in attr_dict.keys( ) else '_kernel_size' - ks = list(attr_dict[fks]) if isinstance( + ks = [attr_dict[fks]] if isinstance( attr_dict[fks], numbers.Integral) else attr_dict[fks] if self.kernel_size and int(ks[0]) != 1: @@ -214,6 +233,7 @@ class Convert: else: new_attr_name += ['weight_attr', 'data_format', 'name'] + self._change_name(layer, pd_ver) new_attr_dict = dict.fromkeys(new_attr_name, None) if pd_ver == 185: new_attr_dict['num_channels'] = None @@ -237,8 +257,9 @@ class Convert: del layer, attr_dict - layer = getattr(layers, 'SuperBatchNorm', SuperBatchNorm2D)( - **new_attr_dict) + layer = layers.SuperBatchNorm( + **new_attr_dict + ) if pd_ver == 185 else layers.SuperBatchNorm2D(**new_attr_dict) model[idx] = layer ### assume output_size = None, filter_size != None @@ -273,12 +294,14 @@ class Convert: new_attr_dict['in_channels'] = None new_attr_dict['out_channels'] = None new_attr_dict['kernel_size'] = None + + self._change_name(layer, pd_ver, conv=True) self.kernel_size = getattr(self.context, 'kernel_size', None) # if the kernel_size of conv transpose is 1, don't change it. fks = '_filter_size' if '_filter_size' in attr_dict.keys( ) else '_kernel_size' - ks = list(attr_dict[fks]) if isinstance( + ks = [attr_dict[fks]] if isinstance( attr_dict[fks], numbers.Integral) else attr_dict[fks] if self.kernel_size and int(ks[0]) != 1: @@ -381,7 +404,7 @@ class Convert: attr_dict = layer.__dict__ key = attr_dict['_full_name'] if pd_ver == 185: - new_attr_name = ['param_attr', 'bias_attr', 'act', 'dtype'] + new_attr_name = ['act', 'dtype'] else: new_attr_name = ['weight_attr', 'bias_attr'] in_nc, out_nc = layer._parameters['weight'].shape @@ -395,10 +418,8 @@ class Convert: new_attr_dict['in_features'] = None new_attr_dict['out_features'] = None - in_key = '_input_dim' if '_input_dim' in attr_dict.keys( - ) else '_in_features' - out_key = '_output_dim' if '_output_dim' in attr_dict.keys( - ) else '_out_features' + in_key = '_input_dim' if pd_ver == 185 else '_in_features' + out_key = '_output_dim' if pd_ver == 185 else '_out_features' attr_dict[in_key] = in_nc attr_dict[out_key] = out_nc if self.context.expand: @@ -461,6 +482,8 @@ class Convert: ] else: new_attr_name = ['bias_attr', 'epsilon', 'weight_attr'] + + self._change_name(layer, pd_ver) new_attr_dict = dict.fromkeys(new_attr_name, None) if pd_ver == 185: new_attr_dict['num_channels'] = None @@ -485,8 +508,10 @@ class Convert: del layer, attr_dict - layer = getattr(layers, 'SuperInstanceNorm2D', - 'SuperInstanceNorm')(**new_attr_dict) + layer = layers.SuperInstanceNorm( + **new_attr_dict + ) if pd_ver == 185 else layers.SuperInstanceNorm2D( + **new_attr_dict) model[idx] = layer elif isinstance(layer, LayerNorm) and ( @@ -505,6 +530,7 @@ class Convert: else: new_attr_name += ['weight_attr'] + self._change_name(layer, pd_ver) new_attr_dict = dict.fromkeys(new_attr_name, None) new_attr_dict['normalized_shape'] = None if self.context.expand: @@ -540,6 +566,8 @@ class Convert: 'weight_attr', 'name' ] + self._change_name(layer, pd_ver, has_bias=False) + new_attr_dict = dict.fromkeys(new_attr_name, None) new_attr_dict['candidate_config'] = dict() bef_size = attr_dict['_size'] diff --git a/tests/test_ofa.py b/tests/test_ofa.py index 1937c19d252b4de295fce6f9032878ebb6b33061..462605b78e4b1604fb8adf87d2f39d0354a94d81 100644 --- a/tests/test_ofa.py +++ b/tests/test_ofa.py @@ -92,8 +92,16 @@ class ModelConv2(nn.Layer): super(ModelConv2, self).__init__() with supernet(expand_ratio=(1, 2, 4)) as ofa_super: models = [] - models += [nn.Conv2DTranspose(4, 4, 3)] - models += [nn.BatchNorm2D(4)] + models += [ + nn.Conv2DTranspose( + 4, 4, 3, weight_attr=paddle.ParamAttr(name='conv1_w')) + ] + models += [ + nn.BatchNorm2D( + 4, + weight_attr=paddle.ParamAttr(name='bn1_w'), + bias_attr=paddle.ParamAttr(name='bn1_b')) + ] models += [ReLU()] models += [nn.Conv2D(4, 4, 3)] models += [nn.BatchNorm2D(4)] @@ -197,9 +205,25 @@ class ModelLinear2(nn.Layer): super(ModelLinear2, self).__init__() with supernet(expand_ratio=None) as ofa_super: models = [] - models += [nn.Embedding(num_embeddings=64, embedding_dim=64)] - models += [nn.Linear(64, 128)] - models += [nn.LayerNorm(128)] + models += [ + nn.Embedding( + num_embeddings=64, + embedding_dim=64, + weight_attr=paddle.ParamAttr(name='emb')) + ] + models += [ + nn.Linear( + 64, + 128, + weight_attr=paddle.ParamAttr(name='fc1_w'), + bias_attr=paddle.ParamAttr(name='fc1_b')) + ] + models += [ + nn.LayerNorm( + 128, + weight_attr=paddle.ParamAttr(name='ln1_w'), + bias_attr=paddle.ParamAttr(name='ln1_b')) + ] models += [nn.Linear(128, 256)] models = ofa_super.convert(models) self.models = paddle.nn.Sequential(*models)