未验证 提交 4d69eeaa 编写于 作者: J Jiabin Yang 提交者: GitHub

Fix sublayer (#31824)

* fix sublayer error with include_sublayers=False

* add ut

* refactor include_sublayers related api

* fix ut

* fix ut of transformer

* fix ut of transformer

* remove useless code

* change sublayer api

* polish code

* add test for include_self=True
上级 76cb83e8
......@@ -516,9 +516,6 @@ class Layer(core.Layer):
def parameters(self, include_sublayers=True):
"""Returns a list of all Parameters from current layer and its sub-layers.
Parameters:
include_sublayers(bool, optional): Whether include the parameters of sublayers. If True, also include the parameters from sublayers. Default: True
Returns:
list of Tensor : a list of Parameters.
......@@ -588,11 +585,11 @@ class Layer(core.Layer):
memo.add(layer)
yield name, layer
def sublayers(self, include_sublayers=True):
def sublayers(self, include_self=False):
"""Returns a list of sub layers.
Parameters:
include_sublayers(bool, optional): Whether return the sublayers of sublayers. If True, also include the sublayers of sublayers. Default: True
include_self(bool, optional): Whether return self as sublayers. Default: False
Returns:
list of Layer : a list of sub layers.
......@@ -619,8 +616,7 @@ class Layer(core.Layer):
"""
ret = [
layer
for _, layer in self.named_sublayers(
include_sublayers=include_sublayers)
for _, layer in self.named_sublayers(include_self=include_self)
]
return ret
......@@ -651,8 +647,7 @@ class Layer(core.Layer):
params_set = set()
named_sublayers = self.named_sublayers(
prefix=prefix,
include_sublayers=include_sublayers,
include_self=True)
include_self=True) if include_sublayers else zip([prefix], [self])
for layer_prefix, sublayer in named_sublayers:
params = sublayer._parameters.items()
for key, param in params:
......@@ -662,18 +657,13 @@ class Layer(core.Layer):
name = layer_prefix + ('.' if layer_prefix else '') + key
yield name, param
def named_sublayers(self,
prefix='',
include_sublayers=True,
include_self=False,
layers_set=None):
def named_sublayers(self, prefix='', include_self=False, layers_set=None):
"""
Returns an iterator over all sublayers in the Layer, yielding tuple of name and sublayer.
The duplicate sublayer will only be yielded once.
Parameters:
prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
include_sublayers(bool, optional): Whether include the sublayers. Default: True.
include_self(bool, optional): Whether include the Layer itself. Default: False.
layers_set(set, optioanl): The set to record duplicate sublayers. Default: None.
......@@ -697,17 +687,14 @@ class Layer(core.Layer):
if include_self and self not in layers_set:
layers_set.add(self)
yield prefix, self
if include_sublayers:
for key, layer in self._sub_layers.items():
if layer is None:
continue
layer_prefix = prefix + ('.' if prefix else '') + key
for p, l in layer.named_sublayers(
prefix=layer_prefix,
include_sublayers=include_sublayers,
include_self=True,
layers_set=layers_set):
yield p, l
for key, layer in self._sub_layers.items():
if layer is None:
continue
layer_prefix = prefix + ('.' if prefix else '') + key
for p, l in layer.named_sublayers(
prefix=layer_prefix, include_self=True,
layers_set=layers_set):
yield p, l
def register_buffer(self, name, tensor, persistable=True):
"""
......@@ -844,8 +831,7 @@ class Layer(core.Layer):
buffers_set = set()
named_sublayers = self.named_sublayers(
prefix=prefix,
include_sublayers=include_sublayers,
include_self=True)
include_self=True) if include_sublayers else zip([prefix], [self])
for layer_prefix, sublayer in named_sublayers:
buffers = sublayer._buffers.items()
for key, buffer in buffers:
......@@ -1263,16 +1249,12 @@ class Layer(core.Layer):
return destination
@framework.deprecate_stat_dict
def set_state_dict(self,
state_dict,
include_sublayers=True,
use_structured_name=True):
def set_state_dict(self, state_dict, use_structured_name=True):
'''
Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict
Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers.
include_sublayers(bool, optional) : If true, also include the parameters and peresistable buffers from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
Default: True
Returns:
......
......@@ -621,16 +621,12 @@ class DataParallel(layers.Layer):
structured_name_prefix=structured_name_prefix)
@framework.deprecate_stat_dict
def set_state_dict(self,
state_dict,
include_sublayers=True,
use_structured_name=True):
def set_state_dict(self, state_dict, use_structured_name=True):
'''
Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict
Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers.
include_sublayers(bool, optional) : If true, also include the parameters and peresistable buffers from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
Default: True
Returns:
......@@ -656,9 +652,7 @@ class DataParallel(layers.Layer):
'''
self._layers.set_state_dict(
state_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
state_dict, use_structured_name=use_structured_name)
# [aliases] Compatible with old method names
set_dict = set_state_dict
......
......@@ -54,7 +54,7 @@ class PrePostProcessLayer(Layer):
self.functors.append(
self.add_sublayer(
"layer_norm_%d" % len(
self.sublayers(include_sublayers=False)),
[layer for layer in self.children()]),
LayerNorm(
normalized_shape=d_model,
param_attr=fluid.ParamAttr(
......
......@@ -472,7 +472,7 @@ class TestImperative(unittest.TestCase):
self.assertEqual("linear_1.b_0", params[3].name)
self.assertEqual(len(params), 4)
sublayers = mlp.sublayers(True)
sublayers = mlp.sublayers()
self.assertEqual(mlp._linear1, sublayers[0])
self.assertEqual(mlp._linear2, sublayers[1])
self.assertEqual(len(sublayers), 2)
......
......@@ -52,10 +52,6 @@ class TestImperativeNamedSubLayers(unittest.TestCase):
list_sublayers):
self.assertEqual(sublayer, expected_sublayer)
for name, sublayer in model.named_sublayers(
include_sublayers=False):
self.assertEqual(model[name], sublayer)
self.assertListEqual(
[l for _, l in list(model.named_sublayers(include_self=True))],
[model] + expected_sublayers)
......@@ -71,7 +67,7 @@ class TestImperativeNamedParameters(unittest.TestCase):
named_parameters = list(model.named_parameters())
expected_named_parameters = list()
for prefix, layer in model.named_sublayers(include_sublayers=True):
for prefix, layer in model.named_sublayers():
for name, param in layer.named_parameters(
include_sublayers=False):
full_name = prefix + ('.' if prefix else '') + name
......
......@@ -3718,6 +3718,36 @@ class TestLayerTrainingAttribute(unittest.TestCase):
self.assertFalse(net.training)
class MyLayer(paddle.nn.Layer):
def __init__(self):
super(MyLayer, self).__init__()
self._linear = paddle.nn.Linear(1, 1)
self._dropout = paddle.nn.Dropout(p=0.5)
def forward(self, input):
temp = self._linear(input)
temp = self._dropout(temp)
return temp
class MySuperLayer(paddle.nn.Layer):
def __init__(self):
super(MySuperLayer, self).__init__()
self._mylayer = MyLayer()
def forward(self, input):
temp = self._mylayer(input)
return temp
class TestSubLayerCount(unittest.TestCase):
def test_sublayer(self):
with fluid.dygraph.guard():
mySuperlayer = MySuperLayer()
self.assertTrue(len(mySuperlayer.sublayers()) == 3)
self.assertTrue(len(mySuperlayer.sublayers(include_self=True)) == 4)
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册