From 89d09b831900bfb84e3f376da2b61fd64389ca35 Mon Sep 17 00:00:00 2001 From: Jiabin Yang Date: Mon, 8 Apr 2019 14:00:33 +0800 Subject: [PATCH] Cherry pick 1.4/ptb fix (#16607) * test=develop, ptb_rnn fix op * test=release/1.4, refine code * test=release/1.4, fix ci failed error --- .../fluid/dygraph/layer_object_helper.py | 4 + python/paddle/fluid/dygraph/nn.py | 97 ++++++++++++------- .../unittests/test_imperative_ptb_rnn.py | 14 +-- 3 files changed, 69 insertions(+), 46 deletions(-) diff --git a/python/paddle/fluid/dygraph/layer_object_helper.py b/python/paddle/fluid/dygraph/layer_object_helper.py index c56652e103c..f8e607aab84 100644 --- a/python/paddle/fluid/dygraph/layer_object_helper.py +++ b/python/paddle/fluid/dygraph/layer_object_helper.py @@ -91,6 +91,10 @@ class LayerObjectHelper(LayerHelperBase): Returns input, param_attr """ + param_attr_in = ParamAttr._to_attr(param_attr_in) + if isinstance(param_attr_in, bool): + raise ValueError('Param_attr should not be False in {}'.format( + self.name)) inputs = inputs_in if (inputs_in is not None) else [] inputs = self._multiple_input(inputs) param_attrs = self._multiple_param_attr(len(inputs), param_attr_in) diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 89253811192..04da8561a37 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -20,7 +20,7 @@ import numpy as np from .. import core from ..layers import utils from . import layers -from ..framework import Variable, OpProtoHolder +from ..framework import Variable, OpProtoHolder, Parameter from ..layers import layer_function_generator from ..param_attr import ParamAttr from ..initializer import Normal, Constant, NumpyArrayInitializer @@ -213,46 +213,69 @@ class FC(layers.Layer): self._param_attr = param_attr self._bias_attr = bias_attr self._act = act + self.__w = list() - def _build_once(self, input): - input_shape = input.shape - param_shape = [ - reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:], 1) - ] + [self._size] - self._w = self.create_parameter( - attr=self._param_attr, - shape=param_shape, - dtype=self._dtype, - is_bias=False) + @property + def _w(self, i=0): + return self.__w[i] - if self._bias_attr: - size = list([self._size]) - self._b = self.create_parameter( - attr=self._bias_attr, - shape=size, - dtype=self._dtype, - is_bias=True) - else: - self._b = None + @_w.setter + def _w(self, value, i=0): + assert isinstance(value, Parameter) + self.__w[i] = value - def forward(self, input): - tmp = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type="mul", - inputs={"X": input, - "Y": self._w}, - outputs={"Out": tmp}, - attrs={ - "x_num_col_dims": self._num_flatten_dims, - "y_num_col_dims": 1 - }) + def _build_once(self, input): + i = 0 + for inp, param in self._helper.iter_inputs_and_params(input, + self._param_attr): + input_shape = inp.shape + + param_shape = [ + reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:], + 1) + ] + [self._size] + self.__w.append( + self.add_parameter( + '_w%d' % i, + self.create_parameter( + attr=param, + shape=param_shape, + dtype=self._dtype, + is_bias=False))) + i += 1 + + size = list([self._size]) + self._b = self.create_parameter( + attr=self._bias_attr, shape=size, dtype=self._dtype, is_bias=True) - pre_bias = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type="sum", - inputs={"X": [tmp]}, - outputs={"Out": pre_bias}, - attrs={"use_mkldnn": False}) + def forward(self, input): + mul_results = list() + i = 0 + for inp, param in self._helper.iter_inputs_and_params(input, + self._param_attr): + tmp = self._helper.create_variable_for_type_inference(self._dtype) + self._helper.append_op( + type="mul", + inputs={"X": inp, + "Y": self.__w[i]}, + outputs={"Out": tmp}, + attrs={ + "x_num_col_dims": self._num_flatten_dims, + "y_num_col_dims": 1 + }) + i += 1 + mul_results.append(tmp) + + if len(mul_results) == 1: + pre_bias = mul_results[0] + else: + pre_bias = self._helper.create_variable_for_type_inference( + self._dtype) + self._helper.append_op( + type="sum", + inputs={"X": mul_results}, + outputs={"Out": pre_bias}, + attrs={"use_mkldnn": False}) if self._b: pre_activation = self._helper.create_variable_for_type_inference( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index 998c675815e..552eb019500 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -200,8 +200,6 @@ class PtbModel(fluid.dygraph.Layer): rnn_out, shape=[-1, self.num_steps, self.hidden_size]) projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = fluid.layers.elementwise_add(projection, self.softmax_bias) - projection = fluid.layers.reshape( - projection, shape=[-1, self.vocab_size]) projection = fluid.layers.reshape( projection, shape=[-1, self.vocab_size]) loss = fluid.layers.softmax_with_cross_entropy( @@ -223,6 +221,7 @@ class TestDygraphPtbRnn(unittest.TestCase): num_steps = 3 init_scale = 0.1 batch_size = 4 + batch_num = 200 with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed @@ -242,7 +241,6 @@ class TestDygraphPtbRnn(unittest.TestCase): dy_loss = None last_hidden = None last_cell = None - batch_num = 200 for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') @@ -282,7 +280,8 @@ class TestDygraphPtbRnn(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace()) sgd = SGDOptimizer(learning_rate=1e-3) - x = fluid.layers.data(name="x", shape=[-1, 3, 1], dtype='int64') + x = fluid.layers.data( + name="x", shape=[-1, num_steps, 1], dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') init_hidden = fluid.layers.data( name="init_hidden", shape=[1], dtype='float32') @@ -332,7 +331,6 @@ class TestDygraphPtbRnn(unittest.TestCase): for k in range(3, len(out)): static_param_updated[static_param_name_list[k - 3]] = out[k] - self.assertTrue(np.allclose(static_loss_value, dy_loss._numpy())) self.assertTrue(np.allclose(static_last_cell_value, last_cell._numpy())) self.assertTrue( @@ -340,13 +338,11 @@ class TestDygraphPtbRnn(unittest.TestCase): for key, value in six.iteritems(static_param_init): # print("static_init name: {}, value {}".format(key, value)) # print("dy_init name: {}, value {}".format(key, dy_param_init[key])) - self.assertTrue(np.allclose(value, dy_param_init[key], atol=1e-5)) + self.assertTrue(np.allclose(value, dy_param_init[key])) for key, value in six.iteritems(static_param_updated): # print("static name: {}, value {}".format(key, value)) # print("dy name: {}, value {}".format(key, dy_param_updated[key])) - self.assertTrue( - np.allclose( - value, dy_param_updated[key], atol=1e-5)) + self.assertTrue(np.allclose(value, dy_param_updated[key])) if __name__ == '__main__': -- GitLab