From 4d69eeaadbdf67d4ed3a6318f0207b0ecc21c8c6 Mon Sep 17 00:00:00 2001 From: Jiabin Yang Date: Mon, 19 Apr 2021 12:10:16 +0800 Subject: [PATCH] Fix sublayer (#31824) * fix sublayer error with include_sublayers=False * add ut * refactor include_sublayers related api * fix ut * fix ut of transformer * fix ut of transformer * remove useless code * change sublayer api * polish code * add test for include_self=True --- python/paddle/fluid/dygraph/layers.py | 48 ++++++------------- python/paddle/fluid/dygraph/parallel.py | 10 +--- .../transformer_dygraph_model.py | 2 +- .../tests/unittests/test_imperative_basic.py | 2 +- .../test_imperative_named_members.py | 6 +-- .../fluid/tests/unittests/test_layers.py | 30 ++++++++++++ 6 files changed, 50 insertions(+), 48 deletions(-) diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index b495976474..18dfff434a 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -516,9 +516,6 @@ class Layer(core.Layer): def parameters(self, include_sublayers=True): """Returns a list of all Parameters from current layer and its sub-layers. - Parameters: - include_sublayers(bool, optional): Whether include the parameters of sublayers. If True, also include the parameters from sublayers. Default: True - Returns: list of Tensor : a list of Parameters. @@ -588,11 +585,11 @@ class Layer(core.Layer): memo.add(layer) yield name, layer - def sublayers(self, include_sublayers=True): + def sublayers(self, include_self=False): """Returns a list of sub layers. Parameters: - include_sublayers(bool, optional): Whether return the sublayers of sublayers. If True, also include the sublayers of sublayers. Default: True + include_self(bool, optional): Whether return self as sublayers. Default: False Returns: list of Layer : a list of sub layers. @@ -619,8 +616,7 @@ class Layer(core.Layer): """ ret = [ layer - for _, layer in self.named_sublayers( - include_sublayers=include_sublayers) + for _, layer in self.named_sublayers(include_self=include_self) ] return ret @@ -651,8 +647,7 @@ class Layer(core.Layer): params_set = set() named_sublayers = self.named_sublayers( prefix=prefix, - include_sublayers=include_sublayers, - include_self=True) + include_self=True) if include_sublayers else zip([prefix], [self]) for layer_prefix, sublayer in named_sublayers: params = sublayer._parameters.items() for key, param in params: @@ -662,18 +657,13 @@ class Layer(core.Layer): name = layer_prefix + ('.' if layer_prefix else '') + key yield name, param - def named_sublayers(self, - prefix='', - include_sublayers=True, - include_self=False, - layers_set=None): + def named_sublayers(self, prefix='', include_self=False, layers_set=None): """ Returns an iterator over all sublayers in the Layer, yielding tuple of name and sublayer. The duplicate sublayer will only be yielded once. Parameters: prefix(str, optional): Prefix to prepend to all parameter names. Default: ''. - include_sublayers(bool, optional): Whether include the sublayers. Default: True. include_self(bool, optional): Whether include the Layer itself. Default: False. layers_set(set, optioanl): The set to record duplicate sublayers. Default: None. @@ -697,17 +687,14 @@ class Layer(core.Layer): if include_self and self not in layers_set: layers_set.add(self) yield prefix, self - if include_sublayers: - for key, layer in self._sub_layers.items(): - if layer is None: - continue - layer_prefix = prefix + ('.' if prefix else '') + key - for p, l in layer.named_sublayers( - prefix=layer_prefix, - include_sublayers=include_sublayers, - include_self=True, - layers_set=layers_set): - yield p, l + for key, layer in self._sub_layers.items(): + if layer is None: + continue + layer_prefix = prefix + ('.' if prefix else '') + key + for p, l in layer.named_sublayers( + prefix=layer_prefix, include_self=True, + layers_set=layers_set): + yield p, l def register_buffer(self, name, tensor, persistable=True): """ @@ -844,8 +831,7 @@ class Layer(core.Layer): buffers_set = set() named_sublayers = self.named_sublayers( prefix=prefix, - include_sublayers=include_sublayers, - include_self=True) + include_self=True) if include_sublayers else zip([prefix], [self]) for layer_prefix, sublayer in named_sublayers: buffers = sublayer._buffers.items() for key, buffer in buffers: @@ -1263,16 +1249,12 @@ class Layer(core.Layer): return destination @framework.deprecate_stat_dict - def set_state_dict(self, - state_dict, - include_sublayers=True, - use_structured_name=True): + def set_state_dict(self, state_dict, use_structured_name=True): ''' Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict Parameters: state_dict(dict) : Dict contains all the parameters and persistable buffers. - include_sublayers(bool, optional) : If true, also include the parameters and peresistable buffers from sublayers. Default: True use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key. Default: True Returns: diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index b80621e21f..b73592eac3 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -621,16 +621,12 @@ class DataParallel(layers.Layer): structured_name_prefix=structured_name_prefix) @framework.deprecate_stat_dict - def set_state_dict(self, - state_dict, - include_sublayers=True, - use_structured_name=True): + def set_state_dict(self, state_dict, use_structured_name=True): ''' Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict Parameters: state_dict(dict) : Dict contains all the parameters and persistable buffers. - include_sublayers(bool, optional) : If true, also include the parameters and peresistable buffers from sublayers. Default: True use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key. Default: True Returns: @@ -656,9 +652,7 @@ class DataParallel(layers.Layer): ''' self._layers.set_state_dict( - state_dict, - include_sublayers=include_sublayers, - use_structured_name=use_structured_name) + state_dict, use_structured_name=use_structured_name) # [aliases] Compatible with old method names set_dict = set_state_dict diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py index 1fee1c1ef6..07e9b1ac62 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py @@ -54,7 +54,7 @@ class PrePostProcessLayer(Layer): self.functors.append( self.add_sublayer( "layer_norm_%d" % len( - self.sublayers(include_sublayers=False)), + [layer for layer in self.children()]), LayerNorm( normalized_shape=d_model, param_attr=fluid.ParamAttr( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index cb48013902..e6e7b8222a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -472,7 +472,7 @@ class TestImperative(unittest.TestCase): self.assertEqual("linear_1.b_0", params[3].name) self.assertEqual(len(params), 4) - sublayers = mlp.sublayers(True) + sublayers = mlp.sublayers() self.assertEqual(mlp._linear1, sublayers[0]) self.assertEqual(mlp._linear2, sublayers[1]) self.assertEqual(len(sublayers), 2) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_named_members.py b/python/paddle/fluid/tests/unittests/test_imperative_named_members.py index 721453c512..dfcd6392b4 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_named_members.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_named_members.py @@ -52,10 +52,6 @@ class TestImperativeNamedSubLayers(unittest.TestCase): list_sublayers): self.assertEqual(sublayer, expected_sublayer) - for name, sublayer in model.named_sublayers( - include_sublayers=False): - self.assertEqual(model[name], sublayer) - self.assertListEqual( [l for _, l in list(model.named_sublayers(include_self=True))], [model] + expected_sublayers) @@ -71,7 +67,7 @@ class TestImperativeNamedParameters(unittest.TestCase): named_parameters = list(model.named_parameters()) expected_named_parameters = list() - for prefix, layer in model.named_sublayers(include_sublayers=True): + for prefix, layer in model.named_sublayers(): for name, param in layer.named_parameters( include_sublayers=False): full_name = prefix + ('.' if prefix else '') + name diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 35ecbd6bf1..5da4a1889b 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -3718,6 +3718,36 @@ class TestLayerTrainingAttribute(unittest.TestCase): self.assertFalse(net.training) +class MyLayer(paddle.nn.Layer): + def __init__(self): + super(MyLayer, self).__init__() + self._linear = paddle.nn.Linear(1, 1) + self._dropout = paddle.nn.Dropout(p=0.5) + + def forward(self, input): + temp = self._linear(input) + temp = self._dropout(temp) + return temp + + +class MySuperLayer(paddle.nn.Layer): + def __init__(self): + super(MySuperLayer, self).__init__() + self._mylayer = MyLayer() + + def forward(self, input): + temp = self._mylayer(input) + return temp + + +class TestSubLayerCount(unittest.TestCase): + def test_sublayer(self): + with fluid.dygraph.guard(): + mySuperlayer = MySuperLayer() + self.assertTrue(len(mySuperlayer.sublayers()) == 3) + self.assertTrue(len(mySuperlayer.sublayers(include_self=True)) == 4) + + if __name__ == '__main__': paddle.enable_static() unittest.main() -- GitLab