From e7bbad6c4985bd1179acacf49714e4011382bcba Mon Sep 17 00:00:00 2001 From: Guo Sheng Date: Mon, 10 Feb 2020 16:04:41 +0800 Subject: [PATCH] Fix the leaving out of rnn_memory_helper_grad's output vars. test=develop (#22499) --- python/paddle/fluid/backward.py | 2 +- .../tests/unittests/test_rnn_cell_api.py | 165 ++++++++++++++++++ 2 files changed, 166 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 966d4611c0..bbe83493d7 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -1038,7 +1038,7 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map): ''' if op_desc.type() not in ['rnn_memory_helper_grad']: ops_to_remove.append(op_idx) - continue + continue new_vars = set() # create new gradient variables diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py index f553b55ddd..7b9e23881f 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py @@ -245,5 +245,170 @@ class TestRnnUtil(unittest.TestCase): pass +class EncoderCell(RNNCell): + """Encoder Cell""" + + def __init__( + self, + num_layers, + hidden_size, + dropout_prob=0., + init_scale=0.1, ): + self.num_layers = num_layers + self.hidden_size = hidden_size + self.dropout_prob = dropout_prob + self.lstm_cells = [] + + for i in range(num_layers): + self.lstm_cells.append(LSTMCell(hidden_size)) + + def call(self, step_input, states): + new_states = [] + for i in range(self.num_layers): + out, new_state = self.lstm_cells[i](step_input, states[i]) + step_input = layers.dropout( + out, + self.dropout_prob, ) if self.dropout_prob else out + new_states.append(new_state) + return step_input, new_states + + @property + def state_shape(self): + return [cell.state_shape for cell in self.lstm_cells] + + +class DecoderCell(RNNCell): + """Decoder Cell""" + + def __init__(self, num_layers, hidden_size, dropout_prob=0.): + self.num_layers = num_layers + self.hidden_size = hidden_size + self.dropout_prob = dropout_prob + self.lstm_cells = [] + for i in range(num_layers): + self.lstm_cells.append(LSTMCell(hidden_size)) + + def call(self, step_input, states): + new_lstm_states = [] + for i in range(self.num_layers): + out, new_lstm_state = self.lstm_cells[i](step_input, states[i]) + step_input = layers.dropout( + out, + self.dropout_prob, ) if self.dropout_prob else out + new_lstm_states.append(new_lstm_state) + return step_input, new_lstm_states + + +def def_seq2seq_model(num_layers, hidden_size, dropout_prob, src_vocab_size, + trg_vocab_size): + "vanilla seq2seq model" + # data + source = fluid.data(name="src", shape=[None, None], dtype="int64") + source_length = fluid.data( + name="src_sequence_length", shape=[None], dtype="int64") + target = fluid.data(name="trg", shape=[None, None], dtype="int64") + target_length = fluid.data( + name="trg_sequence_length", shape=[None], dtype="int64") + label = fluid.data(name="label", shape=[None, None, 1], dtype="int64") + + # embedding + src_emb = fluid.embedding(source, (src_vocab_size, hidden_size)) + tar_emb = fluid.embedding(target, (src_vocab_size, hidden_size)) + + # encoder + enc_cell = EncoderCell(num_layers, hidden_size, dropout_prob) + enc_output, enc_final_state = dynamic_rnn( + cell=enc_cell, inputs=src_emb, sequence_length=source_length) + + # decoder + dec_cell = DecoderCell(num_layers, hidden_size, dropout_prob) + dec_output, dec_final_state = dynamic_rnn( + cell=dec_cell, inputs=tar_emb, initial_states=enc_final_state) + logits = layers.fc(dec_output, + size=trg_vocab_size, + num_flatten_dims=len(dec_output.shape) - 1, + bias_attr=False) + + # loss + loss = layers.softmax_with_cross_entropy( + logits=logits, label=label, soft_label=False) + loss = layers.unsqueeze(loss, axes=[2]) + max_tar_seq_len = layers.shape(target)[1] + tar_mask = layers.sequence_mask( + target_length, maxlen=max_tar_seq_len, dtype="float") + loss = loss * tar_mask + loss = layers.reduce_mean(loss, dim=[0]) + loss = layers.reduce_sum(loss) + + # optimizer + optimizer = fluid.optimizer.Adam(0.001) + optimizer.minimize(loss) + return loss + + +class TestSeq2SeqModel(unittest.TestCase): + """ + Test cases to confirm seq2seq api training correctly. + """ + + def setUp(self): + np.random.seed(123) + self.model_hparams = { + "num_layers": 2, + "hidden_size": 128, + "dropout_prob": 0.1, + "src_vocab_size": 100, + "trg_vocab_size": 100 + } + + self.iter_num = iter_num = 2 + self.batch_size = batch_size = 4 + src_seq_len = 10 + trg_seq_len = 12 + self.data = { + "src": np.random.randint( + 2, self.model_hparams["src_vocab_size"], + (iter_num * batch_size, src_seq_len)).astype("int64"), + "src_sequence_length": np.random.randint( + 1, src_seq_len, (iter_num * batch_size, )).astype("int64"), + "trg": np.random.randint( + 2, self.model_hparams["src_vocab_size"], + (iter_num * batch_size, trg_seq_len)).astype("int64"), + "trg_sequence_length": np.random.randint( + 1, trg_seq_len, (iter_num * batch_size, )).astype("int64"), + "label": np.random.randint( + 2, self.model_hparams["src_vocab_size"], + (iter_num * batch_size, trg_seq_len, 1)).astype("int64"), + } + + place = core.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else core.CPUPlace() + self.exe = Executor(place) + + def test_seq2seq_model(self): + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + cost = def_seq2seq_model(**self.model_hparams) + self.exe.run(startup_program) + for iter_idx in range(self.iter_num): + cost_val = self.exe.run(feed={ + "src": self.data["src"][iter_idx * self.batch_size:( + iter_idx + 1) * self.batch_size, :], + "src_sequence_length": self.data["src_sequence_length"] + [iter_idx * self.batch_size:(iter_idx + 1) * + self.batch_size], + "trg": self.data["trg"][iter_idx * self.batch_size:( + iter_idx + 1) * self.batch_size, :], + "trg_sequence_length": self.data["trg_sequence_length"][ + iter_idx * self.batch_size:(iter_idx + 1 + ) * self.batch_size], + "label": self.data["label"][iter_idx * self.batch_size:( + iter_idx + 1) * self.batch_size] + }, + fetch_list=[cost])[0] + print("iter_idx: %d, cost: %f" % (iter_idx, cost_val)) + + if __name__ == '__main__': unittest.main() -- GitLab