From 5717376379f10af0c1d59517bef2e1446e0abbfa Mon Sep 17 00:00:00 2001 From: Guo Sheng Date: Mon, 14 Oct 2019 08:53:51 +0800 Subject: [PATCH] Fix basic_gru and docs of gru_unit and dynamic_gru (#19393) (#20596) * Fix docs of gru_unit and dynamic_gru. Fix basic_gru in rnn_impl.py. Add error messages for param_attr setting in layer_norm api. Add int64 dtype for expand. test=develop * Reopen unit-tests of basic_gru/basic_lstm in rnn_impl.py. test=develop * Add unit test for layer_norm api. test=develop * Remove the deprecated gru doc fix. test=develop * Fix basic_gru test coverage. test=develop * Update API.spec. test=develop * Update API.spec. test=develop * Fix test_basic_gru coverage test. test=develop * Update test_basic_gru in test_layers to use fluid.data test=develop * Update test_basic_gru for coverage. test=develop --- paddle/fluid/API.spec | 4 +-- .../paddle/fluid/contrib/layers/rnn_impl.py | 14 ++++++-- python/paddle/fluid/layers/nn.py | 10 +++++- .../tests/unittests/test_basic_gru_api.py | 4 ++- .../tests/unittests/test_basic_gru_unit_op.py | 4 ++- .../tests/unittests/test_basic_lstm_api.py | 2 ++ .../unittests/test_basic_lstm_unit_op.py | 2 ++ .../tests/unittests/test_layer_norm_op.py | 33 +++++++++++++++++++ .../fluid/tests/unittests/test_layers.py | 23 +++++++++++++ 9 files changed, 89 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 256cfb784d5..ad51ca7c5df 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -581,7 +581,7 @@ paddle.fluid.contrib.BasicGRUUnit.set_dict (ArgSpec(args=['self', 'stat_dict', ' paddle.fluid.contrib.BasicGRUUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'bb62a4e57bc58f171091fe78b1e7f7f3')) paddle.fluid.contrib.BasicGRUUnit.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', 'c960169ad78b8306f5dc16d47e609340')) paddle.fluid.contrib.BasicGRUUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.contrib.basic_gru (ArgSpec(args=['input', 'init_hidden', 'hidden_size', 'num_layers', 'sequence_length', 'dropout_prob', 'bidirectional', 'batch_first', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(1, None, 0.0, False, True, None, None, None, None, 'float32', 'basic_gru')), ('document', '0afcbe4fbe1b8c35eda58b4efe48f9fd')) +paddle.fluid.contrib.basic_gru (ArgSpec(args=['input', 'init_hidden', 'hidden_size', 'num_layers', 'sequence_length', 'dropout_prob', 'bidirectional', 'batch_first', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(1, None, 0.0, False, True, None, None, None, None, 'float32', 'basic_gru')), ('document', 'a9930834e44da910282894449a6e8a6d')) paddle.fluid.contrib.BasicLSTMUnit ('paddle.fluid.contrib.layers.rnn_impl.BasicLSTMUnit', ('document', '3d0b2e3172ce58e1304199efee066c99')) paddle.fluid.contrib.BasicLSTMUnit.__init__ (ArgSpec(args=['self', 'name_scope', 'hidden_size', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'forget_bias', 'dtype'], varargs=None, keywords=None, defaults=(None, None, None, None, 1.0, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.BasicLSTMUnit.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'e75f4ab651bed0c9129a9a2c10aaaa7d')) @@ -599,7 +599,7 @@ paddle.fluid.contrib.BasicLSTMUnit.set_dict (ArgSpec(args=['self', 'stat_dict', paddle.fluid.contrib.BasicLSTMUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'bb62a4e57bc58f171091fe78b1e7f7f3')) paddle.fluid.contrib.BasicLSTMUnit.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', 'c960169ad78b8306f5dc16d47e609340')) paddle.fluid.contrib.BasicLSTMUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.contrib.basic_lstm (ArgSpec(args=['input', 'init_hidden', 'init_cell', 'hidden_size', 'num_layers', 'sequence_length', 'dropout_prob', 'bidirectional', 'batch_first', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'forget_bias', 'dtype', 'name'], varargs=None, keywords=None, defaults=(1, None, 0.0, False, True, None, None, None, None, 1.0, 'float32', 'basic_lstm')), ('document', 'fe4d0c3c55a162b8cfe10b05fabb7ce4')) +paddle.fluid.contrib.basic_lstm (ArgSpec(args=['input', 'init_hidden', 'init_cell', 'hidden_size', 'num_layers', 'sequence_length', 'dropout_prob', 'bidirectional', 'batch_first', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'forget_bias', 'dtype', 'name'], varargs=None, keywords=None, defaults=(1, None, 0.0, False, True, None, None, None, None, 1.0, 'float32', 'basic_lstm')), ('document', '2db201a8a2fb268097e220036dfe0b88')) paddle.fluid.contrib.ctr_metric_bundle (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'b68d12366896c41065fc3738393da2aa')) paddle.fluid.data (ArgSpec(args=['name', 'shape', 'dtype', 'lod_level'], varargs=None, keywords=None, defaults=('float32', 0)), ('document', 'a44fce9b5c8919bf5937a1cc0fe484ca')) paddle.fluid.dygraph.Layer ('paddle.fluid.dygraph.layers.Layer', ('document', 'ec49de13e8d63aa20b51d24fc53a1d15')) diff --git a/python/paddle/fluid/contrib/layers/rnn_impl.py b/python/paddle/fluid/contrib/layers/rnn_impl.py index e6a868ada37..cf656e7094c 100644 --- a/python/paddle/fluid/contrib/layers/rnn_impl.py +++ b/python/paddle/fluid/contrib/layers/rnn_impl.py @@ -127,7 +127,7 @@ class BasicGRUUnit(Layer): r_hidden = r * pre_hidden candidate = layers.matmul( - layers.concat([input, pre_hidden], 1), self._candidate_weight) + layers.concat([input, r_hidden], 1), self._candidate_weight) candidate = layers.elementwise_add(candidate, self._candidate_bias) c = self._activation(candidate) @@ -179,6 +179,11 @@ def basic_gru(input, dropout_prob(float|0.0): Dropout prob, dropout ONLY works after rnn output of earch layers, NOT between time steps bidirectional (bool|False): If it is bidirectional + batch_first (bool|True): The shape format of the input and output tensors. If true, + the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false, + the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default + this function accepts input and emits output in batch-major form to be consistent + with most of data format, though a bit less efficient because of extra transposes. param_attr(ParamAttr|None): The parameter attribute for the learnable weight matrix. Note: If it is set to None or one attribute of ParamAttr, gru_unit will @@ -345,7 +350,7 @@ def basic_gru(input, last_hidden = fw_last_hidden if batch_first: - rnn_out = fluid.layser.transpose(rnn_out, [1, 0, 2]) + rnn_out = layers.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden @@ -404,6 +409,11 @@ def basic_lstm(input, dropout_prob(float|0.0): Dropout prob, dropout ONLY work after rnn output of earch layers, NOT between time steps bidirectional (bool|False): If it is bidirectional + batch_first (bool|True): The shape format of the input and output tensors. If true, + the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false, + the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default + this function accepts input and emits output in batch-major form to be consistent + with most of data format, though a bit less efficient because of extra transposes. param_attr(ParamAttr|None): The parameter attribute for the learnable weight matrix. Note: If it is set to None or one attribute of ParamAttr, lstm_unit will diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 22d493db884..653301ca9c1 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1519,6 +1519,7 @@ def gru_unit(input, attrs={ 'activation': 2, # tanh 'gate_activation': 1, # sigmoid + 'origin_mode': origin_mode }) return updated_hidden, reset_hidden_pre, gate @@ -4646,17 +4647,24 @@ def layer_norm(input, input_shape = input.shape param_shape = [reduce(lambda x, y: x * y, input_shape[begin_norm_axis:])] if scale: + assert param_attr is not False, "param_attr should not be False when using scale." scale = helper.create_parameter( attr=helper.param_attr, shape=param_shape, dtype=dtype, default_initializer=Constant(1.0)) inputs['Scale'] = scale + else: + if param_attr: + warnings.warn("param_attr is only avaliable with scale is True.") if shift: - assert bias_attr is not False + assert bias_attr is not False, "bias_attr should not be False when using shift." bias = helper.create_parameter( attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True) inputs['Bias'] = bias + else: + if bias_attr: + warnings.warn("bias_attr is only avaliable with shift is True.") # create output mean_out = helper.create_variable_for_type_inference( diff --git a/python/paddle/fluid/tests/unittests/test_basic_gru_api.py b/python/paddle/fluid/tests/unittests/test_basic_gru_api.py index 37cf56bf19e..ee8a1b7af24 100644 --- a/python/paddle/fluid/tests/unittests/test_basic_gru_api.py +++ b/python/paddle/fluid/tests/unittests/test_basic_gru_api.py @@ -25,6 +25,8 @@ from paddle.fluid import framework import numpy as np +np.set_seed(123) + SIGMOID_THRESHOLD_MIN = -40.0 SIGMOID_THRESHOLD_MAX = 13.0 EXP_MAX_INPUT = 40.0 @@ -65,7 +67,7 @@ def gru_np(input, r_hidden = r * pre_hidden candidate = np.matmul( - np.concatenate([step_in, pre_hidden], 1), candidate_w) + np.concatenate([step_in, r_hidden], 1), candidate_w) candidate += candidate_b c = tanh(candidate) diff --git a/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py b/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py index 6c137f3cce8..597d8306b01 100644 --- a/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py +++ b/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py @@ -25,6 +25,8 @@ from paddle.fluid import framework import numpy as np +np.set_seed(123) + SIGMOID_THRESHOLD_MIN = -40.0 SIGMOID_THRESHOLD_MAX = 13.0 EXP_MAX_INPUT = 40.0 @@ -53,7 +55,7 @@ def step(step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b): r_hidden = r * pre_hidden - candidate = np.matmul(np.concatenate([step_in, pre_hidden], 1), candidate_w) + candidate = np.matmul(np.concatenate([step_in, r_hidden], 1), candidate_w) candidate += candidate_b c = tanh(candidate) diff --git a/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py b/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py index a09d6e79dad..5383632838d 100644 --- a/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py +++ b/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py @@ -25,6 +25,8 @@ from paddle.fluid import framework import numpy as np +np.set_seed(123) + SIGMOID_THRESHOLD_MIN = -40.0 SIGMOID_THRESHOLD_MAX = 13.0 EXP_MAX_INPUT = 40.0 diff --git a/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py b/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py index b79219c26ac..b94ac1db665 100644 --- a/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py +++ b/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py @@ -25,6 +25,8 @@ from paddle.fluid import framework import numpy as np +np.set_seed(123) + SIGMOID_THRESHOLD_MIN = -40.0 SIGMOID_THRESHOLD_MAX = 13.0 EXP_MAX_INPUT = 40.0 diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py index ff68599dce6..d5724b6e3ca 100644 --- a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py @@ -175,5 +175,38 @@ class TestLayerNormOp(unittest.TestCase): self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3) +class TestLayerNormAPI(unittest.TestCase): + def test_case(self): + x = fluid.layers.data( + name='x', + shape=[64, 32, 256], + dtype='float32', + append_batch_size=False) + x = fluid.layers.layer_norm( + x, + scale=True, + shift=True, + begin_norm_axis=1, + epsilon=1e-05, + param_attr=None, + bias_attr=None) + x = fluid.layers.layer_norm( + x, + scale=False, + shift=False, + begin_norm_axis=1, + epsilon=1e-05, + param_attr=None, + bias_attr=None) + x = fluid.layers.layer_norm( + x, + scale=False, + shift=False, + begin_norm_axis=1, + epsilon=1e-05, + param_attr="scale", + bias_attr="shift") + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 2c06b3f3a69..4818c1f675b 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -3060,6 +3060,29 @@ class TestBook(LayerTest): evaluator = fluid.evaluator.EditDistance(predict, label) return evaluator.metrics + def test_basic_gru(self): + input_size = 128 + hidden_size = 256 + with self.static_graph(): + input = fluid.data( + name="input", shape=[None, None, input_size], dtype='float32') + pre_hidden = fluid.data( + name="pre_hidden", shape=[None, hidden_size], dtype='float32') + sequence_length = fluid.data( + name="sequence_length", shape=[None], dtype='int32') + + for bidirectional in [True, False]: + for batch_first in [True, False]: + rnn_out, last_hidden = fluid.contrib.layers.basic_gru( + input, + pre_hidden, + hidden_size=256, + num_layers=2, + sequence_length=sequence_length, + dropout_prob=0.5, + bidirectional=bidirectional, + batch_first=batch_first) + if __name__ == '__main__': unittest.main() -- GitLab