未验证 提交 69f5c0ee 编写于 作者: X Xin Pan 提交者: GitHub

Merge pull request #15557 from panyx0718/imperative

add sugar for fetching parameters and layers
...@@ -207,7 +207,7 @@ framework::LoDTensor& VarBase::GradValue() { ...@@ -207,7 +207,7 @@ framework::LoDTensor& VarBase::GradValue() {
std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
if (grad_op_descs_.empty() && backward_id_ <= 0) { if (grad_op_descs_.empty() && backward_id_ <= 0) {
LOG(WARNING) << "op with no grad: " << op_desc_->Type(); VLOG(3) << "op with no grad: " << op_desc_->Type();
return {}; return {};
} }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import collections
import contextlib import contextlib
import sys import sys
import numpy as np import numpy as np
...@@ -30,31 +31,45 @@ class Layer(core.Layer): ...@@ -30,31 +31,45 @@ class Layer(core.Layer):
def __init__(self, dtype=core.VarDesc.VarType.FP32, name=None): def __init__(self, dtype=core.VarDesc.VarType.FP32, name=None):
self._built = False self._built = False
self._dtype = dtype self._dtype = dtype
self._parameters = collections.OrderedDict()
self._sub_layers = collections.OrderedDict()
def parameters(self, include_sublayers=True):
"""Returns a list of Parameters from current and sub-layers.
Args:
include_sublayers: If true, also include the parameters from
sublayers.
Returns a list of Parameters.
"""
ret = [p for p in self._parameters.values()]
if include_sublayers:
for l in self._sub_layers.values():
for p in l.parameters(include_sublayers):
ret.append(p)
return ret
def parameters(self): def sublayers(self, include_sublayers=True):
params = [] """Returns a list of sub layers.
for key in self.__dict__.keys():
value = self.__dict__[key] Args:
if isinstance(value, framework.Parameter): include_sublayers: If true, also include the layers from sublayers.
params.append(value)
elif isinstance(value, core.Layer): Returns a list of sub layers.
params.extend(value.parameters()) """
elif isinstance(value, collections.Container): ret = [l for l in self._sub_layers.values()]
if len(value) == 0: if include_sublayers:
continue for l in self._sub_layers.values():
if isinstance(value[0], framework.Parameter): for sub_l in l.sublayers(include_sublayers):
params.extend(value) ret.append(sub_l)
elif isinstance(value[0], core.Layer): return ret
for v in value:
params.extend(v.parameters())
return params
def clear_gradients(self): def clear_gradients(self):
for p in self.parameters(): for p in self.parameters():
p._clear_gradient() p._clear_gradient()
def _build_once(self, inputs): def _build_once(self, *args):
pass pass
def __call__(self, *inputs): def __call__(self, *inputs):
...@@ -71,6 +86,66 @@ class Layer(core.Layer): ...@@ -71,6 +86,66 @@ class Layer(core.Layer):
def backward(self, *inputs): def backward(self, *inputs):
raise ValueError("Layer shouldn't implement backward") raise ValueError("Layer shouldn't implement backward")
def add_sublayer(self, name, sublayer):
"""Adds a sub Layer instance.
Added sublayer can be access like self.name.
Args:
name: name of this sublayer.
sublayer: an instance of Layer.
Returns:
the sublayer passed in.
"""
assert isinstance(sublayer, core.Layer)
self._sub_layers[name] = sublayer
return sublayer
def add_parameter(self, name, parameter):
"""Adds a Parameter instance.
Added parameter can be access like self.name.
Args:
name: name of this sublayer.
parameter: an instance of Parameter.
Returns:
the parameter passed in.
"""
assert isinstance(parameter, framework.Parameter)
self._parameters[name] = parameter
return parameter
def __getattr__(self, name):
if name in self._parameters:
return self._parameters[name]
elif name in self._sub_layers:
return self._sub_layers[name]
def __setattr__(self, name, value):
if isinstance(value, framework.Parameter):
params = self.__dict__.get('_parameters', None)
if params is None:
raise ValueError(
"super(YourLayer, self).__init__() should be called first")
params[name] = value
elif isinstance(value, core.Layer):
layers = self.__dict__.get('_sub_layers', None)
if layers is None:
raise ValueError(
"super(YourLayer, self).__init__() should be called first")
layers[name] = value
else:
object.__setattr__(self, name, value)
def __delattr__(self, name):
if name in self._parameters:
del self._parameters[name]
elif name in self._sub_layers:
del self._sub_layers[name]
else:
object.__delattr__(self, name)
class PyLayer(core.PyLayer): class PyLayer(core.PyLayer):
"""Layers composed of user-defined python codes.""" """Layers composed of user-defined python codes."""
......
...@@ -225,9 +225,6 @@ class FC(layers.Layer): ...@@ -225,9 +225,6 @@ class FC(layers.Layer):
act=act, act=act,
name=name) name=name)
def parameters(self):
return [self._w, self._b]
def _build_once(self, input): def _build_once(self, input):
input_shape = input.shape input_shape = input.shape
param_shape = [ param_shape = [
...@@ -478,9 +475,6 @@ class Embedding(layers.Layer): ...@@ -478,9 +475,6 @@ class Embedding(layers.Layer):
dtype=self._dtype, dtype=self._dtype,
is_bias=False) is_bias=False)
def parameters(self):
return [self._w]
def forward(self, input): def forward(self, input):
out = self._helper.create_variable_for_type_inference(self._dtype) out = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op( self._helper.append_op(
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
class L1(fluid.imperative.Layer):
def __init__(self):
super(L1, self).__init__()
self._helper = LayerHelper(
'MyLayer',
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
self.w1 = self._helper.create_parameter(
attr=self._helper.param_attr,
shape=[2, 2],
dtype='float32',
is_bias=False)
self.w2 = self._helper.create_parameter(
attr=self._helper.param_attr,
shape=[2, 2],
dtype='float32',
is_bias=False)
def forward(self):
return self.w1 + self.w2
class L2(fluid.imperative.Layer):
def __init__(self):
super(L2, self).__init__()
self.layer1 = L1()
self.layer2 = L1()
def forward(self):
return self.layer1() + self.layer2()
class L3(fluid.imperative.Layer):
def __init__(self):
super(L3, self).__init__()
self.layer1 = L2()
self.layer2 = L2()
def forward(self):
return self.layer1() + self.layer2()
class TestBaseLayer(unittest.TestCase):
def test_one_level(self):
with fluid.imperative.guard():
l = L1()
ret = l()
self.assertEqual(l.w1.name, "MyLayer_0.w_0")
self.assertEqual(l.w2.name, "MyLayer_0.w_1")
self.assertTrue(np.allclose(ret._numpy(), 0.2 * np.ones([2, 2])))
def test_three_level(self):
with fluid.imperative.guard():
l = L3()
ret = l()
self.assertTrue(np.allclose(ret._numpy(), 0.8 * np.ones([2, 2])))
if __name__ == '__main__':
unittest.main()
...@@ -333,6 +333,18 @@ class TestImperative(unittest.TestCase): ...@@ -333,6 +333,18 @@ class TestImperative(unittest.TestCase):
self.assertTrue(np.allclose(dy_out, static_out)) self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad)) self.assertTrue(np.allclose(dy_grad, static_grad))
params = mlp.parameters(True)
self.assertEqual("FC_0.w_0", params[0].name)
self.assertEqual("FC_0.b_0", params[1].name)
self.assertEqual("FC_1.w_0", params[2].name)
self.assertEqual("FC_1.b_0", params[3].name)
self.assertEqual(len(params), 4)
sublayers = mlp.sublayers(True)
self.assertEqual(mlp._fc1, sublayers[0])
self.assertEqual(mlp._fc2, sublayers[1])
self.assertEqual(len(sublayers), 2)
def test_rnn(self): def test_rnn(self):
np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],
[10.0, 11.0, 12.0]]) [10.0, 11.0, 12.0]])
......
...@@ -33,9 +33,6 @@ class Discriminator(fluid.imperative.Layer): ...@@ -33,9 +33,6 @@ class Discriminator(fluid.imperative.Layer):
self._fc1 = FC(size=32, act='elu', name="d_fc1") self._fc1 = FC(size=32, act='elu', name="d_fc1")
self._fc2 = FC(size=1, name="d_fc2") self._fc2 = FC(size=1, name="d_fc2")
def parameters(self):
return self._fc1.parameters() + self._fc2.parameters()
def forward(self, inputs): def forward(self, inputs):
x = self._fc1(inputs) x = self._fc1(inputs)
return self._fc2(x) return self._fc2(x)
...@@ -48,10 +45,6 @@ class Generator(fluid.imperative.Layer): ...@@ -48,10 +45,6 @@ class Generator(fluid.imperative.Layer):
self._fc2 = FC(size=64, act='elu', name="g_fc2") self._fc2 = FC(size=64, act='elu', name="g_fc2")
self._fc3 = FC(size=1, name="g_fc3") self._fc3 = FC(size=1, name="g_fc3")
def parameters(self):
return self._fc1.parameters() + self._fc2.parameters(
) + self._fc3.parameters()
def forward(self, inputs): def forward(self, inputs):
x = self._fc1(inputs) x = self._fc1(inputs)
x = self._fc2(x) x = self._fc2(x)
......
...@@ -75,16 +75,6 @@ class SimpleLSTMRNN(fluid.imperative.Layer): ...@@ -75,16 +75,6 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
self.hidden_array.append(pre_hidden) self.hidden_array.append(pre_hidden)
self.cell_array.append(pre_cell) self.cell_array.append(pre_cell)
def parameters(self):
parameters = list()
for param in self.weight_1_arr:
parameters.append(param)
for param in self.weight_2_arr:
parameters.append(param)
for bias in self.bias_arr:
parameters.append(bias)
return parameters
def forward(self, input_embedding, init_hidden=None, init_cell=None): def forward(self, input_embedding, init_hidden=None, init_cell=None):
res = [] res = []
for index in range(self._num_steps): for index in range(self._num_steps):
...@@ -177,12 +167,6 @@ class PtbModel(fluid.imperative.Layer): ...@@ -177,12 +167,6 @@ class PtbModel(fluid.imperative.Layer):
def _build_once(self, input, label, init_hidden, init_cell): def _build_once(self, input, label, init_hidden, init_cell):
pass pass
def parameters(self):
parameters = self.simple_lstm_rnn.parameters() + [
self.softmax_weight, self.softmax_bias
] + self.embedding.parameters()
return parameters
def forward(self, input, label, init_hidden, init_cell): def forward(self, input, label, init_hidden, init_cell):
init_h = fluid.layers.reshape( init_h = fluid.layers.reshape(
......
...@@ -21,7 +21,6 @@ import paddle ...@@ -21,7 +21,6 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, BatchNorm, FC from paddle.fluid.imperative.nn import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.imperative.base import to_variable from paddle.fluid.imperative.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
...@@ -173,11 +172,13 @@ class ResNet(fluid.imperative.Layer): ...@@ -173,11 +172,13 @@ class ResNet(fluid.imperative.Layer):
for block in range(len(depth)): for block in range(len(depth)):
shortcut = False shortcut = False
for i in range(depth[block]): for i in range(depth[block]):
bottleneck_block = BottleneckBlock( bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels, num_channels=num_channels,
num_filters=num_filters[block], num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut) shortcut=shortcut))
num_channels = bottleneck_block._num_channels_out num_channels = bottleneck_block._num_channels_out
self.bottleneck_block_list.append(bottleneck_block) self.bottleneck_block_list.append(bottleneck_block)
shortcut = True shortcut = True
...@@ -223,8 +224,7 @@ class TestImperativeResnet(unittest.TestCase): ...@@ -223,8 +224,7 @@ class TestImperativeResnet(unittest.TestCase):
batch_size=batch_size) batch_size=batch_size)
dy_param_init_value = {} dy_param_init_value = {}
for param in fluid.default_main_program().global_block( for param in resnet.parameters():
).all_parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param._numpy()
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
...@@ -247,16 +247,14 @@ class TestImperativeResnet(unittest.TestCase): ...@@ -247,16 +247,14 @@ class TestImperativeResnet(unittest.TestCase):
dy_out = avg_loss._numpy() dy_out = avg_loss._numpy()
if batch_id == 0: if batch_id == 0:
for param in fluid.default_main_program().global_block( for param in resnet.parameters():
).all_parameters():
if param.name not in dy_param_init_value: if param.name not in dy_param_init_value:
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param._numpy()
avg_loss._backward() avg_loss._backward()
dy_grad_value = {} dy_grad_value = {}
for param in fluid.default_main_program().global_block( for param in resnet.parameters():
).all_parameters():
if not param.stop_gradient: if not param.stop_gradient:
np_array = np.array(param._ivar._grad_ivar().value() np_array = np.array(param._ivar._grad_ivar().value()
.get_tensor()) .get_tensor())
...@@ -267,8 +265,7 @@ class TestImperativeResnet(unittest.TestCase): ...@@ -267,8 +265,7 @@ class TestImperativeResnet(unittest.TestCase):
resnet.clear_gradients() resnet.clear_gradients()
dy_param_value = {} dy_param_value = {}
for param in fluid.default_main_program().global_block( for param in resnet.parameters():
).all_parameters():
dy_param_value[param.name] = param._numpy() dy_param_value[param.name] = param._numpy()
with new_program_scope(): with new_program_scope():
...@@ -349,6 +346,7 @@ class TestImperativeResnet(unittest.TestCase): ...@@ -349,6 +346,7 @@ class TestImperativeResnet(unittest.TestCase):
self.assertTrue(np.allclose(static_out, dy_out)) self.assertTrue(np.allclose(static_out, dy_out))
self.assertEqual(len(dy_param_init_value), len(static_param_init_value)) self.assertEqual(len(dy_param_init_value), len(static_param_init_value))
for key, value in six.iteritems(static_param_init_value): for key, value in six.iteritems(static_param_init_value):
self.assertTrue(np.allclose(value, dy_param_init_value[key])) self.assertTrue(np.allclose(value, dy_param_init_value[key]))
self.assertTrue(np.isfinite(value.all())) self.assertTrue(np.isfinite(value.all()))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册