From 0c71c839ec6af875e950c3413522ea0503d925f2 Mon Sep 17 00:00:00 2001 From: zhaoyuchen2018 <45989343+zhaoyuchen2018@users.noreply.github.com> Date: Thu, 15 Aug 2019 19:57:55 +0800 Subject: [PATCH] Fix recurrent op not update grade issue (#18581) * Fix recurrent op fails For the variable used in outter block, copy sub block's grad variable to outter block test=develop Signed-off-by: zhaoyuchen * Fix unicode error test=develop Signed-off-by: zhaoyuchen * Refine test code test=develop Signed-off-by: zhaoyuchen * Fix seq2seq case fails test=develop Signed-off-by: zhaoyuchen * remove unreasonable code. test=develop Signed-off-by: zhaoyuchen * Refine code according to comment test=develop Signed-off-by: zhaoyuchen --- python/paddle/fluid/layers/control_flow.py | 2 +- .../tests/unittests/test_recurrent_op.py | 138 ++++++++++++++++++ 2 files changed, 139 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 80a2167b8f..812603ba14 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -576,7 +576,7 @@ class StaticRNN(object): if in_var_name not in local_inputs: params.append(in_var_name) - parameters = [parent_block.var(name) for name in params] + parameters = [parent_block.var(name) for name in set(params)] step_scope = parent_block.create_var( type=core.VarDesc.VarType.STEP_SCOPES) diff --git a/python/paddle/fluid/tests/unittests/test_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_recurrent_op.py index 02a8f38514..8354f6e65b 100644 --- a/python/paddle/fluid/tests/unittests/test_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_recurrent_op.py @@ -464,5 +464,143 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1): return rnn() +class RecurrentOpSubBlockTest(RecurrentOpTest1): + ''' + Test RNNOp with subblock variable + equation: + y_ = emb * w1 + h_t = \concat([x, h_{t-1}]) + h_t = h_t * w2 + h_t = \\unsqueeze(h_t, 1) + h_t = \dot_attention(h_t, y_) + h_t = \squeeze(h_t, 1) + y = h_t + vars: + - x + - w1 + - w2 + memories: + - h + outputs: + - y + ''' + + class PySimpleRNN5(PyRNNBase): + def __init__(self, input_shape, output_shape): + super(RecurrentOpSubBlockTest.PySimpleRNN5, self).__init__( + input_shape, output_shape) + + seq_len, batch_size, input_dim = input_shape + self.w1 = np.random.uniform( + -0.1, 0.1, size=(input_dim, input_dim)).astype("float32") + self.w2 = np.random.uniform( + -0.1, 0.1, size=(input_dim * 2, input_dim)).astype("float32") + + self.emb = np.random.uniform( + -0.1, 0.1, size=(seq_len, batch_size, + input_dim)).astype("float32") + + men_dim = (seq_len, batch_size, input_dim) + self.mems = np.zeros(shape=men_dim).astype("float32") + self.oy = np.matmul(self.emb, self.w1) + + def step(self, step_id, x): + def dot_attention(query, memory): + attn = np.matmul(query, memory.transpose((0, 2, 1))) + weight = softmax(attn) + weight_memory = np.matmul(weight, memory) + return weight_memory, weight + + def softmax(x): + return np.exp(x) / sum(np.exp(x)) + + if step_id == 0: + pre_mem = np.zeros_like(x) + else: + pre_mem = self.mems[step_id - 1] + concat_in = np.concatenate([x, pre_mem], 1) + new_mem = np.matmul(concat_in, self.w2) + + new_mem = np.expand_dims(new_mem, 1) + new_mem, _ = dot_attention(new_mem, self.oy) + new_mem = np.squeeze(new_mem, 1) + + self.mems[step_id] = new_mem + self.y[step_id] = self.mems[step_id] + + input_dim = 2 + batch_size = 3 + sent_len = 3 + + def setUp(self): + self.setup_program() + + self.data_field = {"x", "emb", "w1", "w2"} + + self.input_shape = (self.sent_len, self.batch_size, self.input_dim) + self.output_shape = (self.sent_len, self.batch_size, self.input_dim) + self.py_rnn = RecurrentOpSubBlockTest.PySimpleRNN5(self.input_shape, + self.output_shape) + + with fluid.program_guard(self.main_program, self.startup_program): + rnn_out = self.create_rnn_op() + self.output = layers.mean(rnn_out) + + def create_rnn_op(self): + x = layers.data( + shape=[self.sent_len, self.batch_size, self.input_dim], + dtype='float32', + name='x', + append_batch_size=False) + x.stop_gradient = False + + emb = layers.data( + name='emb', + shape=[self.sent_len, self.batch_size, self.input_dim], + dtype='float32', + append_batch_size=False) + emb.stop_gradient = False + + w1 = layers.data( + shape=[self.input_dim, self.input_dim], + dtype='float32', + name='w1', + append_batch_size=False) + w1.stop_gradient = False + w2 = layers.data( + shape=[self.input_dim * 2, self.input_dim], + dtype='float32', + name='w2', + append_batch_size=False) + w2.stop_gradient = False + + rnn = layers.StaticRNN() + + def dot_attention(query, memory): + attn = layers.matmul(query, memory, transpose_y=True) + weight = layers.softmax(attn) + weight_memory = layers.matmul(weight, memory) + + return weight_memory, weight + + y = layers.matmul(emb, w1) + with rnn.step(): + pre_h = rnn.memory( + shape=(self.sent_len, self.input_dim), + batch_ref=x, + init_value=0.0) + step_in = rnn.step_input(x) + concat_in = layers.concat([step_in, pre_h], 1) + new_h = layers.matmul(concat_in, w2) + new_h = layers.unsqueeze(new_h, [1]) + new_h, _ = dot_attention(new_h, y) + new_h = layers.squeeze(new_h, [1]) + + rnn.update_memory(pre_h, new_h) + rnn.step_output(new_h) + + return rnn() + + if __name__ == '__main__': unittest.main() -- GitLab