未验证 提交 57173763 编写于 作者: G Guo Sheng 提交者: GitHub

Fix basic_gru and docs of gru_unit and dynamic_gru (#19393) (#20596)

* Fix docs of gru_unit and dynamic_gru.

Fix basic_gru in rnn_impl.py.

Add error messages for param_attr setting in layer_norm api.

Add int64 dtype for expand.

test=develop

* Reopen unit-tests of basic_gru/basic_lstm in rnn_impl.py.
test=develop

* Add unit test for layer_norm api.
test=develop

* Remove the deprecated gru doc fix. test=develop

* Fix basic_gru test coverage. test=develop

* Update API.spec. test=develop

* Update API.spec. test=develop

* Fix test_basic_gru coverage test. test=develop

* Update test_basic_gru in test_layers to use fluid.data
test=develop

* Update test_basic_gru for coverage. test=develop
上级 de470ff9
...@@ -581,7 +581,7 @@ paddle.fluid.contrib.BasicGRUUnit.set_dict (ArgSpec(args=['self', 'stat_dict', ' ...@@ -581,7 +581,7 @@ paddle.fluid.contrib.BasicGRUUnit.set_dict (ArgSpec(args=['self', 'stat_dict', '
paddle.fluid.contrib.BasicGRUUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'bb62a4e57bc58f171091fe78b1e7f7f3')) paddle.fluid.contrib.BasicGRUUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'bb62a4e57bc58f171091fe78b1e7f7f3'))
paddle.fluid.contrib.BasicGRUUnit.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', 'c960169ad78b8306f5dc16d47e609340')) paddle.fluid.contrib.BasicGRUUnit.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', 'c960169ad78b8306f5dc16d47e609340'))
paddle.fluid.contrib.BasicGRUUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.BasicGRUUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.basic_gru (ArgSpec(args=['input', 'init_hidden', 'hidden_size', 'num_layers', 'sequence_length', 'dropout_prob', 'bidirectional', 'batch_first', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(1, None, 0.0, False, True, None, None, None, None, 'float32', 'basic_gru')), ('document', '0afcbe4fbe1b8c35eda58b4efe48f9fd')) paddle.fluid.contrib.basic_gru (ArgSpec(args=['input', 'init_hidden', 'hidden_size', 'num_layers', 'sequence_length', 'dropout_prob', 'bidirectional', 'batch_first', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(1, None, 0.0, False, True, None, None, None, None, 'float32', 'basic_gru')), ('document', 'a9930834e44da910282894449a6e8a6d'))
paddle.fluid.contrib.BasicLSTMUnit ('paddle.fluid.contrib.layers.rnn_impl.BasicLSTMUnit', ('document', '3d0b2e3172ce58e1304199efee066c99')) paddle.fluid.contrib.BasicLSTMUnit ('paddle.fluid.contrib.layers.rnn_impl.BasicLSTMUnit', ('document', '3d0b2e3172ce58e1304199efee066c99'))
paddle.fluid.contrib.BasicLSTMUnit.__init__ (ArgSpec(args=['self', 'name_scope', 'hidden_size', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'forget_bias', 'dtype'], varargs=None, keywords=None, defaults=(None, None, None, None, 1.0, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.BasicLSTMUnit.__init__ (ArgSpec(args=['self', 'name_scope', 'hidden_size', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'forget_bias', 'dtype'], varargs=None, keywords=None, defaults=(None, None, None, None, 1.0, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.BasicLSTMUnit.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'e75f4ab651bed0c9129a9a2c10aaaa7d')) paddle.fluid.contrib.BasicLSTMUnit.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'e75f4ab651bed0c9129a9a2c10aaaa7d'))
...@@ -599,7 +599,7 @@ paddle.fluid.contrib.BasicLSTMUnit.set_dict (ArgSpec(args=['self', 'stat_dict', ...@@ -599,7 +599,7 @@ paddle.fluid.contrib.BasicLSTMUnit.set_dict (ArgSpec(args=['self', 'stat_dict',
paddle.fluid.contrib.BasicLSTMUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'bb62a4e57bc58f171091fe78b1e7f7f3')) paddle.fluid.contrib.BasicLSTMUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'bb62a4e57bc58f171091fe78b1e7f7f3'))
paddle.fluid.contrib.BasicLSTMUnit.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', 'c960169ad78b8306f5dc16d47e609340')) paddle.fluid.contrib.BasicLSTMUnit.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', 'c960169ad78b8306f5dc16d47e609340'))
paddle.fluid.contrib.BasicLSTMUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.BasicLSTMUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.basic_lstm (ArgSpec(args=['input', 'init_hidden', 'init_cell', 'hidden_size', 'num_layers', 'sequence_length', 'dropout_prob', 'bidirectional', 'batch_first', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'forget_bias', 'dtype', 'name'], varargs=None, keywords=None, defaults=(1, None, 0.0, False, True, None, None, None, None, 1.0, 'float32', 'basic_lstm')), ('document', 'fe4d0c3c55a162b8cfe10b05fabb7ce4')) paddle.fluid.contrib.basic_lstm (ArgSpec(args=['input', 'init_hidden', 'init_cell', 'hidden_size', 'num_layers', 'sequence_length', 'dropout_prob', 'bidirectional', 'batch_first', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'forget_bias', 'dtype', 'name'], varargs=None, keywords=None, defaults=(1, None, 0.0, False, True, None, None, None, None, 1.0, 'float32', 'basic_lstm')), ('document', '2db201a8a2fb268097e220036dfe0b88'))
paddle.fluid.contrib.ctr_metric_bundle (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'b68d12366896c41065fc3738393da2aa')) paddle.fluid.contrib.ctr_metric_bundle (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'b68d12366896c41065fc3738393da2aa'))
paddle.fluid.data (ArgSpec(args=['name', 'shape', 'dtype', 'lod_level'], varargs=None, keywords=None, defaults=('float32', 0)), ('document', 'a44fce9b5c8919bf5937a1cc0fe484ca')) paddle.fluid.data (ArgSpec(args=['name', 'shape', 'dtype', 'lod_level'], varargs=None, keywords=None, defaults=('float32', 0)), ('document', 'a44fce9b5c8919bf5937a1cc0fe484ca'))
paddle.fluid.dygraph.Layer ('paddle.fluid.dygraph.layers.Layer', ('document', 'ec49de13e8d63aa20b51d24fc53a1d15')) paddle.fluid.dygraph.Layer ('paddle.fluid.dygraph.layers.Layer', ('document', 'ec49de13e8d63aa20b51d24fc53a1d15'))
......
...@@ -127,7 +127,7 @@ class BasicGRUUnit(Layer): ...@@ -127,7 +127,7 @@ class BasicGRUUnit(Layer):
r_hidden = r * pre_hidden r_hidden = r * pre_hidden
candidate = layers.matmul( candidate = layers.matmul(
layers.concat([input, pre_hidden], 1), self._candidate_weight) layers.concat([input, r_hidden], 1), self._candidate_weight)
candidate = layers.elementwise_add(candidate, self._candidate_bias) candidate = layers.elementwise_add(candidate, self._candidate_bias)
c = self._activation(candidate) c = self._activation(candidate)
...@@ -179,6 +179,11 @@ def basic_gru(input, ...@@ -179,6 +179,11 @@ def basic_gru(input,
dropout_prob(float|0.0): Dropout prob, dropout ONLY works after rnn output of earch layers, dropout_prob(float|0.0): Dropout prob, dropout ONLY works after rnn output of earch layers,
NOT between time steps NOT between time steps
bidirectional (bool|False): If it is bidirectional bidirectional (bool|False): If it is bidirectional
batch_first (bool|True): The shape format of the input and output tensors. If true,
the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false,
the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default
this function accepts input and emits output in batch-major form to be consistent
with most of data format, though a bit less efficient because of extra transposes.
param_attr(ParamAttr|None): The parameter attribute for the learnable param_attr(ParamAttr|None): The parameter attribute for the learnable
weight matrix. Note: weight matrix. Note:
If it is set to None or one attribute of ParamAttr, gru_unit will If it is set to None or one attribute of ParamAttr, gru_unit will
...@@ -345,7 +350,7 @@ def basic_gru(input, ...@@ -345,7 +350,7 @@ def basic_gru(input,
last_hidden = fw_last_hidden last_hidden = fw_last_hidden
if batch_first: if batch_first:
rnn_out = fluid.layser.transpose(rnn_out, [1, 0, 2]) rnn_out = layers.transpose(rnn_out, [1, 0, 2])
return rnn_out, last_hidden return rnn_out, last_hidden
...@@ -404,6 +409,11 @@ def basic_lstm(input, ...@@ -404,6 +409,11 @@ def basic_lstm(input,
dropout_prob(float|0.0): Dropout prob, dropout ONLY work after rnn output of earch layers, dropout_prob(float|0.0): Dropout prob, dropout ONLY work after rnn output of earch layers,
NOT between time steps NOT between time steps
bidirectional (bool|False): If it is bidirectional bidirectional (bool|False): If it is bidirectional
batch_first (bool|True): The shape format of the input and output tensors. If true,
the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false,
the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default
this function accepts input and emits output in batch-major form to be consistent
with most of data format, though a bit less efficient because of extra transposes.
param_attr(ParamAttr|None): The parameter attribute for the learnable param_attr(ParamAttr|None): The parameter attribute for the learnable
weight matrix. Note: weight matrix. Note:
If it is set to None or one attribute of ParamAttr, lstm_unit will If it is set to None or one attribute of ParamAttr, lstm_unit will
......
...@@ -1519,6 +1519,7 @@ def gru_unit(input, ...@@ -1519,6 +1519,7 @@ def gru_unit(input,
attrs={ attrs={
'activation': 2, # tanh 'activation': 2, # tanh
'gate_activation': 1, # sigmoid 'gate_activation': 1, # sigmoid
'origin_mode': origin_mode
}) })
return updated_hidden, reset_hidden_pre, gate return updated_hidden, reset_hidden_pre, gate
...@@ -4646,17 +4647,24 @@ def layer_norm(input, ...@@ -4646,17 +4647,24 @@ def layer_norm(input,
input_shape = input.shape input_shape = input.shape
param_shape = [reduce(lambda x, y: x * y, input_shape[begin_norm_axis:])] param_shape = [reduce(lambda x, y: x * y, input_shape[begin_norm_axis:])]
if scale: if scale:
assert param_attr is not False, "param_attr should not be False when using scale."
scale = helper.create_parameter( scale = helper.create_parameter(
attr=helper.param_attr, attr=helper.param_attr,
shape=param_shape, shape=param_shape,
dtype=dtype, dtype=dtype,
default_initializer=Constant(1.0)) default_initializer=Constant(1.0))
inputs['Scale'] = scale inputs['Scale'] = scale
else:
if param_attr:
warnings.warn("param_attr is only avaliable with scale is True.")
if shift: if shift:
assert bias_attr is not False assert bias_attr is not False, "bias_attr should not be False when using shift."
bias = helper.create_parameter( bias = helper.create_parameter(
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True) attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True)
inputs['Bias'] = bias inputs['Bias'] = bias
else:
if bias_attr:
warnings.warn("bias_attr is only avaliable with shift is True.")
# create output # create output
mean_out = helper.create_variable_for_type_inference( mean_out = helper.create_variable_for_type_inference(
......
...@@ -25,6 +25,8 @@ from paddle.fluid import framework ...@@ -25,6 +25,8 @@ from paddle.fluid import framework
import numpy as np import numpy as np
np.set_seed(123)
SIGMOID_THRESHOLD_MIN = -40.0 SIGMOID_THRESHOLD_MIN = -40.0
SIGMOID_THRESHOLD_MAX = 13.0 SIGMOID_THRESHOLD_MAX = 13.0
EXP_MAX_INPUT = 40.0 EXP_MAX_INPUT = 40.0
...@@ -65,7 +67,7 @@ def gru_np(input, ...@@ -65,7 +67,7 @@ def gru_np(input,
r_hidden = r * pre_hidden r_hidden = r * pre_hidden
candidate = np.matmul( candidate = np.matmul(
np.concatenate([step_in, pre_hidden], 1), candidate_w) np.concatenate([step_in, r_hidden], 1), candidate_w)
candidate += candidate_b candidate += candidate_b
c = tanh(candidate) c = tanh(candidate)
......
...@@ -25,6 +25,8 @@ from paddle.fluid import framework ...@@ -25,6 +25,8 @@ from paddle.fluid import framework
import numpy as np import numpy as np
np.set_seed(123)
SIGMOID_THRESHOLD_MIN = -40.0 SIGMOID_THRESHOLD_MIN = -40.0
SIGMOID_THRESHOLD_MAX = 13.0 SIGMOID_THRESHOLD_MAX = 13.0
EXP_MAX_INPUT = 40.0 EXP_MAX_INPUT = 40.0
...@@ -53,7 +55,7 @@ def step(step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b): ...@@ -53,7 +55,7 @@ def step(step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b):
r_hidden = r * pre_hidden r_hidden = r * pre_hidden
candidate = np.matmul(np.concatenate([step_in, pre_hidden], 1), candidate_w) candidate = np.matmul(np.concatenate([step_in, r_hidden], 1), candidate_w)
candidate += candidate_b candidate += candidate_b
c = tanh(candidate) c = tanh(candidate)
......
...@@ -25,6 +25,8 @@ from paddle.fluid import framework ...@@ -25,6 +25,8 @@ from paddle.fluid import framework
import numpy as np import numpy as np
np.set_seed(123)
SIGMOID_THRESHOLD_MIN = -40.0 SIGMOID_THRESHOLD_MIN = -40.0
SIGMOID_THRESHOLD_MAX = 13.0 SIGMOID_THRESHOLD_MAX = 13.0
EXP_MAX_INPUT = 40.0 EXP_MAX_INPUT = 40.0
......
...@@ -25,6 +25,8 @@ from paddle.fluid import framework ...@@ -25,6 +25,8 @@ from paddle.fluid import framework
import numpy as np import numpy as np
np.set_seed(123)
SIGMOID_THRESHOLD_MIN = -40.0 SIGMOID_THRESHOLD_MIN = -40.0
SIGMOID_THRESHOLD_MAX = 13.0 SIGMOID_THRESHOLD_MAX = 13.0
EXP_MAX_INPUT = 40.0 EXP_MAX_INPUT = 40.0
......
...@@ -175,5 +175,38 @@ class TestLayerNormOp(unittest.TestCase): ...@@ -175,5 +175,38 @@ class TestLayerNormOp(unittest.TestCase):
self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3) self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3)
class TestLayerNormAPI(unittest.TestCase):
def test_case(self):
x = fluid.layers.data(
name='x',
shape=[64, 32, 256],
dtype='float32',
append_batch_size=False)
x = fluid.layers.layer_norm(
x,
scale=True,
shift=True,
begin_norm_axis=1,
epsilon=1e-05,
param_attr=None,
bias_attr=None)
x = fluid.layers.layer_norm(
x,
scale=False,
shift=False,
begin_norm_axis=1,
epsilon=1e-05,
param_attr=None,
bias_attr=None)
x = fluid.layers.layer_norm(
x,
scale=False,
shift=False,
begin_norm_axis=1,
epsilon=1e-05,
param_attr="scale",
bias_attr="shift")
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -3060,6 +3060,29 @@ class TestBook(LayerTest): ...@@ -3060,6 +3060,29 @@ class TestBook(LayerTest):
evaluator = fluid.evaluator.EditDistance(predict, label) evaluator = fluid.evaluator.EditDistance(predict, label)
return evaluator.metrics return evaluator.metrics
def test_basic_gru(self):
input_size = 128
hidden_size = 256
with self.static_graph():
input = fluid.data(
name="input", shape=[None, None, input_size], dtype='float32')
pre_hidden = fluid.data(
name="pre_hidden", shape=[None, hidden_size], dtype='float32')
sequence_length = fluid.data(
name="sequence_length", shape=[None], dtype='int32')
for bidirectional in [True, False]:
for batch_first in [True, False]:
rnn_out, last_hidden = fluid.contrib.layers.basic_gru(
input,
pre_hidden,
hidden_size=256,
num_layers=2,
sequence_length=sequence_length,
dropout_prob=0.5,
bidirectional=bidirectional,
batch_first=batch_first)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册