未验证 提交 4d69eeaa 编写于 作者: J Jiabin Yang 提交者: GitHub

Fix sublayer (#31824)

* fix sublayer error with include_sublayers=False

* add ut

* refactor include_sublayers related api

* fix ut

* fix ut of transformer

* fix ut of transformer

* remove useless code

* change sublayer api

* polish code

* add test for include_self=True
上级 76cb83e8
...@@ -516,9 +516,6 @@ class Layer(core.Layer): ...@@ -516,9 +516,6 @@ class Layer(core.Layer):
def parameters(self, include_sublayers=True): def parameters(self, include_sublayers=True):
"""Returns a list of all Parameters from current layer and its sub-layers. """Returns a list of all Parameters from current layer and its sub-layers.
Parameters:
include_sublayers(bool, optional): Whether include the parameters of sublayers. If True, also include the parameters from sublayers. Default: True
Returns: Returns:
list of Tensor : a list of Parameters. list of Tensor : a list of Parameters.
...@@ -588,11 +585,11 @@ class Layer(core.Layer): ...@@ -588,11 +585,11 @@ class Layer(core.Layer):
memo.add(layer) memo.add(layer)
yield name, layer yield name, layer
def sublayers(self, include_sublayers=True): def sublayers(self, include_self=False):
"""Returns a list of sub layers. """Returns a list of sub layers.
Parameters: Parameters:
include_sublayers(bool, optional): Whether return the sublayers of sublayers. If True, also include the sublayers of sublayers. Default: True include_self(bool, optional): Whether return self as sublayers. Default: False
Returns: Returns:
list of Layer : a list of sub layers. list of Layer : a list of sub layers.
...@@ -619,8 +616,7 @@ class Layer(core.Layer): ...@@ -619,8 +616,7 @@ class Layer(core.Layer):
""" """
ret = [ ret = [
layer layer
for _, layer in self.named_sublayers( for _, layer in self.named_sublayers(include_self=include_self)
include_sublayers=include_sublayers)
] ]
return ret return ret
...@@ -651,8 +647,7 @@ class Layer(core.Layer): ...@@ -651,8 +647,7 @@ class Layer(core.Layer):
params_set = set() params_set = set()
named_sublayers = self.named_sublayers( named_sublayers = self.named_sublayers(
prefix=prefix, prefix=prefix,
include_sublayers=include_sublayers, include_self=True) if include_sublayers else zip([prefix], [self])
include_self=True)
for layer_prefix, sublayer in named_sublayers: for layer_prefix, sublayer in named_sublayers:
params = sublayer._parameters.items() params = sublayer._parameters.items()
for key, param in params: for key, param in params:
...@@ -662,18 +657,13 @@ class Layer(core.Layer): ...@@ -662,18 +657,13 @@ class Layer(core.Layer):
name = layer_prefix + ('.' if layer_prefix else '') + key name = layer_prefix + ('.' if layer_prefix else '') + key
yield name, param yield name, param
def named_sublayers(self, def named_sublayers(self, prefix='', include_self=False, layers_set=None):
prefix='',
include_sublayers=True,
include_self=False,
layers_set=None):
""" """
Returns an iterator over all sublayers in the Layer, yielding tuple of name and sublayer. Returns an iterator over all sublayers in the Layer, yielding tuple of name and sublayer.
The duplicate sublayer will only be yielded once. The duplicate sublayer will only be yielded once.
Parameters: Parameters:
prefix(str, optional): Prefix to prepend to all parameter names. Default: ''. prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
include_sublayers(bool, optional): Whether include the sublayers. Default: True.
include_self(bool, optional): Whether include the Layer itself. Default: False. include_self(bool, optional): Whether include the Layer itself. Default: False.
layers_set(set, optioanl): The set to record duplicate sublayers. Default: None. layers_set(set, optioanl): The set to record duplicate sublayers. Default: None.
...@@ -697,15 +687,12 @@ class Layer(core.Layer): ...@@ -697,15 +687,12 @@ class Layer(core.Layer):
if include_self and self not in layers_set: if include_self and self not in layers_set:
layers_set.add(self) layers_set.add(self)
yield prefix, self yield prefix, self
if include_sublayers:
for key, layer in self._sub_layers.items(): for key, layer in self._sub_layers.items():
if layer is None: if layer is None:
continue continue
layer_prefix = prefix + ('.' if prefix else '') + key layer_prefix = prefix + ('.' if prefix else '') + key
for p, l in layer.named_sublayers( for p, l in layer.named_sublayers(
prefix=layer_prefix, prefix=layer_prefix, include_self=True,
include_sublayers=include_sublayers,
include_self=True,
layers_set=layers_set): layers_set=layers_set):
yield p, l yield p, l
...@@ -844,8 +831,7 @@ class Layer(core.Layer): ...@@ -844,8 +831,7 @@ class Layer(core.Layer):
buffers_set = set() buffers_set = set()
named_sublayers = self.named_sublayers( named_sublayers = self.named_sublayers(
prefix=prefix, prefix=prefix,
include_sublayers=include_sublayers, include_self=True) if include_sublayers else zip([prefix], [self])
include_self=True)
for layer_prefix, sublayer in named_sublayers: for layer_prefix, sublayer in named_sublayers:
buffers = sublayer._buffers.items() buffers = sublayer._buffers.items()
for key, buffer in buffers: for key, buffer in buffers:
...@@ -1263,16 +1249,12 @@ class Layer(core.Layer): ...@@ -1263,16 +1249,12 @@ class Layer(core.Layer):
return destination return destination
@framework.deprecate_stat_dict @framework.deprecate_stat_dict
def set_state_dict(self, def set_state_dict(self, state_dict, use_structured_name=True):
state_dict,
include_sublayers=True,
use_structured_name=True):
''' '''
Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict
Parameters: Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers. state_dict(dict) : Dict contains all the parameters and persistable buffers.
include_sublayers(bool, optional) : If true, also include the parameters and peresistable buffers from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key. use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
Default: True Default: True
Returns: Returns:
......
...@@ -621,16 +621,12 @@ class DataParallel(layers.Layer): ...@@ -621,16 +621,12 @@ class DataParallel(layers.Layer):
structured_name_prefix=structured_name_prefix) structured_name_prefix=structured_name_prefix)
@framework.deprecate_stat_dict @framework.deprecate_stat_dict
def set_state_dict(self, def set_state_dict(self, state_dict, use_structured_name=True):
state_dict,
include_sublayers=True,
use_structured_name=True):
''' '''
Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict
Parameters: Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers. state_dict(dict) : Dict contains all the parameters and persistable buffers.
include_sublayers(bool, optional) : If true, also include the parameters and peresistable buffers from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key. use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
Default: True Default: True
Returns: Returns:
...@@ -656,9 +652,7 @@ class DataParallel(layers.Layer): ...@@ -656,9 +652,7 @@ class DataParallel(layers.Layer):
''' '''
self._layers.set_state_dict( self._layers.set_state_dict(
state_dict, state_dict, use_structured_name=use_structured_name)
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
# [aliases] Compatible with old method names # [aliases] Compatible with old method names
set_dict = set_state_dict set_dict = set_state_dict
......
...@@ -54,7 +54,7 @@ class PrePostProcessLayer(Layer): ...@@ -54,7 +54,7 @@ class PrePostProcessLayer(Layer):
self.functors.append( self.functors.append(
self.add_sublayer( self.add_sublayer(
"layer_norm_%d" % len( "layer_norm_%d" % len(
self.sublayers(include_sublayers=False)), [layer for layer in self.children()]),
LayerNorm( LayerNorm(
normalized_shape=d_model, normalized_shape=d_model,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
......
...@@ -472,7 +472,7 @@ class TestImperative(unittest.TestCase): ...@@ -472,7 +472,7 @@ class TestImperative(unittest.TestCase):
self.assertEqual("linear_1.b_0", params[3].name) self.assertEqual("linear_1.b_0", params[3].name)
self.assertEqual(len(params), 4) self.assertEqual(len(params), 4)
sublayers = mlp.sublayers(True) sublayers = mlp.sublayers()
self.assertEqual(mlp._linear1, sublayers[0]) self.assertEqual(mlp._linear1, sublayers[0])
self.assertEqual(mlp._linear2, sublayers[1]) self.assertEqual(mlp._linear2, sublayers[1])
self.assertEqual(len(sublayers), 2) self.assertEqual(len(sublayers), 2)
......
...@@ -52,10 +52,6 @@ class TestImperativeNamedSubLayers(unittest.TestCase): ...@@ -52,10 +52,6 @@ class TestImperativeNamedSubLayers(unittest.TestCase):
list_sublayers): list_sublayers):
self.assertEqual(sublayer, expected_sublayer) self.assertEqual(sublayer, expected_sublayer)
for name, sublayer in model.named_sublayers(
include_sublayers=False):
self.assertEqual(model[name], sublayer)
self.assertListEqual( self.assertListEqual(
[l for _, l in list(model.named_sublayers(include_self=True))], [l for _, l in list(model.named_sublayers(include_self=True))],
[model] + expected_sublayers) [model] + expected_sublayers)
...@@ -71,7 +67,7 @@ class TestImperativeNamedParameters(unittest.TestCase): ...@@ -71,7 +67,7 @@ class TestImperativeNamedParameters(unittest.TestCase):
named_parameters = list(model.named_parameters()) named_parameters = list(model.named_parameters())
expected_named_parameters = list() expected_named_parameters = list()
for prefix, layer in model.named_sublayers(include_sublayers=True): for prefix, layer in model.named_sublayers():
for name, param in layer.named_parameters( for name, param in layer.named_parameters(
include_sublayers=False): include_sublayers=False):
full_name = prefix + ('.' if prefix else '') + name full_name = prefix + ('.' if prefix else '') + name
......
...@@ -3718,6 +3718,36 @@ class TestLayerTrainingAttribute(unittest.TestCase): ...@@ -3718,6 +3718,36 @@ class TestLayerTrainingAttribute(unittest.TestCase):
self.assertFalse(net.training) self.assertFalse(net.training)
class MyLayer(paddle.nn.Layer):
def __init__(self):
super(MyLayer, self).__init__()
self._linear = paddle.nn.Linear(1, 1)
self._dropout = paddle.nn.Dropout(p=0.5)
def forward(self, input):
temp = self._linear(input)
temp = self._dropout(temp)
return temp
class MySuperLayer(paddle.nn.Layer):
def __init__(self):
super(MySuperLayer, self).__init__()
self._mylayer = MyLayer()
def forward(self, input):
temp = self._mylayer(input)
return temp
class TestSubLayerCount(unittest.TestCase):
def test_sublayer(self):
with fluid.dygraph.guard():
mySuperlayer = MySuperLayer()
self.assertTrue(len(mySuperlayer.sublayers()) == 3)
self.assertTrue(len(mySuperlayer.sublayers(include_self=True)) == 4)
if __name__ == '__main__': if __name__ == '__main__':
paddle.enable_static() paddle.enable_static()
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册