remove paddle.fluid.contrib.layers.BasicLSTMUnit、basic_lstm、BasicGRUUnit、basic_gru (#49268)

* rm paddle.fluid.contrib.layers.BasicLSTMUnit basic_lstm BasicGRUUnit basic_gru * rm dependency in __init__.py

remove paddle.fluid.contrib.layers.BasicLSTMUnit、basic_lstm、BasicGRUUnit、basic_gru (#49268)
* rm paddle.fluid.contrib.layers.BasicLSTMUnit basic_lstm BasicGRUUnit basic_gru * rm dependency in __init__.py
a1319074 · zqw_1997 · GitHub · cb34ee0f · a1319074 · cb34ee0f
12 changed file
--- a/python/paddle/fluid/contrib/layers/__init__.py
+++ b/python/paddle/fluid/contrib/layers/__init__.py
@@ -15,11 +15,9 @@
 from . import nn
 from .nn import *

-from .rnn_impl import *
 from . import metric_op
 from .metric_op import *

 __all__ = []
 __all__ += nn.__all__
-__all__ += rnn_impl.__all__
 __all__ += metric_op.__all__
--- a/python/paddle/fluid/contrib/layers/rnn_impl.py
+++ b/python/paddle/fluid/contrib/layers/rnn_impl.py
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -444,10 +444,6 @@ list(REMOVE_ITEM TEST_OPS
 list(REMOVE_ITEM TEST_OPS test_imperative_ocr_attention_model)
 list(REMOVE_ITEM TEST_OPS test_async_ssa_graph_executor_mnist)
 list(REMOVE_ITEM TEST_OPS test_install_check)
-list(REMOVE_ITEM TEST_OPS test_basic_gru_api)
-list(REMOVE_ITEM TEST_OPS test_basic_gru_unit_op)
-list(REMOVE_ITEM TEST_OPS test_basic_lstm_api)
-list(REMOVE_ITEM TEST_OPS test_basic_lstm_unit_op)
 list(REMOVE_ITEM TEST_OPS test_fuse_all_reduce_pass)
 list(REMOVE_ITEM TEST_OPS test_fuse_bn_act_pass)
 list(REMOVE_ITEM TEST_OPS test_fuse_bn_add_act_pass)

--- a/python/paddle/fluid/tests/unittests/test_basic_gru_api.py
+++ b/python/paddle/fluid/tests/unittests/test_basic_gru_api.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy
-import numpy as np
-
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-import paddle.fluid.layers as layers
-from paddle.fluid import framework
-from paddle.fluid.contrib.layers import basic_gru
-from paddle.fluid.executor import Executor
-
-np.set_seed(123)
-
-SIGMOID_THRESHOLD_MIN = -40.0
-SIGMOID_THRESHOLD_MAX = 13.0
-EXP_MAX_INPUT = 40.0
-
-
-def sigmoid(x):
-    y = np.copy(x)
-    y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN
-    y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX
-    return 1.0 / (1.0 + np.exp(-y))
-
-
-def tanh(x):
-    y = -2.0 * x
-    y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT
-    return (2.0 / (1.0 + np.exp(y))) - 1.0
-
-
-def gru_np(
-    input,
-    init_h,
-    hidden_size,
-    gate_weight,
-    gate_bias,
-    candidate_weight,
-    candidate_bias,
-    num_layers=1,
-    batch_first=False,
-    is_bidirect=False,
-    sequence_length=None,
-):
-    def step(step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b):
-        concat_1 = np.concatenate([step_in, pre_hidden], 1)
-
-        gate_input = np.matmul(concat_1, gate_w)
-        gate_input += gate_b
-        gate_input = sigmoid(gate_input)
-        r, u = np.split(gate_input, indices_or_sections=2, axis=1)
-
-        r_hidden = r * pre_hidden
-
-        candidate = np.matmul(
-            np.concatenate([step_in, r_hidden], 1), candidate_w
-        )
-
-        candidate += candidate_b
-        c = tanh(candidate)
-
-        new_hidden = u * pre_hidden + (1 - u) * c
-
-        return new_hidden
-
-    if batch_first:
-        input = np.tranpose(input, [1, 0, 2])
-
-    batch_size = input.shape[1]
-    mask = None
-    if sequence_length is not None:
-        max_seq_len = input.shape[0]
-
-        mask = np.zeros([batch_size, max_seq_len])
-
-        for i, len in enumerate(sequence_length):
-            mask[i, :len] = 1.0
-
-        mask = np.transpose(mask, [1, 0])
-
-    direc_num = 1
-    if is_bidirect:
-        direc_num = 2
-    if init_h:
-        init_h = np.reshape(
-            init_h, shape=[num_layers, direc_num, -1, hidden_size]
-        )
-    else:
-        init_h = np.zeros([num_layers, direc_num, batch_size, hidden_size])
-
-    def get_single_direction_output(rnn_input, mask=None, direc_index=0):
-        seq_len = rnn_input.shape[0]
-
-        output = []
-        # init pre hidden
-        pre_hidden_array = []
-        for i in range(num_layers):
-            pre_hidden_array.append(init_h[i, direc_index])
-
-        for i in range(seq_len):
-            step_input = rnn_input[i]
-
-            if mask is not None:
-                step_mask = mask[i]
-                step_mask = np.reshape(step_mask, [-1, 1])
-
-            for i in range(num_layers):
-                new_hidden = step(
-                    step_input,
-                    pre_hidden_array[i],
-                    gate_weight[direc_index * num_layers + i],
-                    gate_bias[direc_index * num_layers + i],
-                    candidate_weight[direc_index * num_layers + i],
-                    candidate_bias[direc_index * num_layers + i],
-                )
-
-                if mask is not None:
-                    new_hidden = (
-                        new_hidden * step_mask
-                        + (1 - step_mask) * pre_hidden_array[i]
-                    )
-
-                pre_hidden_array[i] = new_hidden
-
-                step_input = new_hidden
-            output.append(step_input)
-        rnn_out = np.concatenate(output, 0)
-        rnn_out = np.reshape(rnn_out, [seq_len, -1, hidden_size])
-
-        last_hidden_out = np.concatenate(pre_hidden_array, 0)
-        last_hidden_out = np.reshape(
-            last_hidden_out, [num_layers, -1, hidden_size]
-        )
-
-        return rnn_out, last_hidden_out
-
-    fw_rnn_out, fw_last_hidden = get_single_direction_output(
-        input, mask, direc_index=0
-    )
-
-    if is_bidirect:
-        bw_input = input[::-1]
-        bw_mask = None
-        if mask is not None:
-            bw_mask = mask[::-1]
-
-        bw_rnn_out, bw_last_hidden = get_single_direction_output(
-            bw_input, bw_mask, direc_index=1
-        )
-
-        bw_rnn_out = bw_rnn_out[::-1]
-
-        rnn_out = np.concatenate([fw_rnn_out, bw_rnn_out], 2)
-        last_hidden = np.concatenate([fw_last_hidden, bw_last_hidden], 1)
-        last_hidden = np.reshape(
-            last_hidden, [num_layers * direc_num, -1, hidden_size]
-        )
-
-        if batch_first:
-            rnn_out = np.transpose(rnn_out, [1, 0, 2])
-
-        return rnn_out, last_hidden
-    else:
-        rnn_out = fw_rnn_out
-        last_hidden = fw_last_hidden
-
-        if batch_first:
-            rnn_out = np.transpose(rnn_out, [1, 0, 2])
-
-        return rnn_out, last_hidden
-
-
-class TestBasicGRUApi(unittest.TestCase):
-    def setUp(self):
-        self.hidden_size = 10
-        self.batch_size = 5
-        self.seq_len = 6
-        self.num_layers = 2
-        self.is_bidirect = True
-        self.batch_first = False
-
-    def test_run(self):
-        x = layers.data(
-            name='x',
-            shape=[-1, self.batch_size, self.hidden_size],
-            dtype='float32',
-        )
-        sequence_length = layers.data(
-            name="sequence_length", shape=[-1], dtype='float32'
-        )
-
-        rnn_out, last_hidden = basic_gru(
-            x,
-            None,
-            self.hidden_size,
-            num_layers=self.num_layers,
-            batch_first=self.batch_first,
-            bidirectional=self.is_bidirect,
-            sequence_length=sequence_length,
-        )
-
-        last_hidden.persisbale = True
-        rnn_out.persisbale = True
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-
-        exe = Executor(place)
-        exe.run(framework.default_startup_program())
-
-        param_list = fluid.default_main_program().block(0).all_parameters()
-
-        # process weight and bias
-        gate_weight = []
-        gate_bias = []
-        candidate_weight = []
-        candidate_bias = []
-
-        for i in range(self.num_layers):
-            gate_w_name = "basic_gru_layers_" + str(i) + "/BasicGRUUnit_0.w_0"
-            gate_b_name = "basic_gru_layers_" + str(i) + "/BasicGRUUnit_0.b_0"
-            candidate_w_name = (
-                "basic_gru_layers_" + str(i) + "/BasicGRUUnit_0.w_1"
-            )
-            candidate_b_name = (
-                "basic_gru_layers_" + str(i) + "/BasicGRUUnit_0.b_1"
-            )
-
-            gate_w = np.array(
-                fluid.global_scope().find_var(gate_w_name).get_tensor()
-            )
-            gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype(
-                'float32'
-            )
-            fluid.global_scope().find_var(gate_w_name).get_tensor().set(
-                gate_w, place
-            )
-
-            gate_b = np.array(
-                fluid.global_scope().find_var(gate_b_name).get_tensor()
-            )
-            gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype(
-                'float32'
-            )
-            fluid.global_scope().find_var(gate_b_name).get_tensor().set(
-                gate_b, place
-            )
-
-            candidate_w = np.array(
-                fluid.global_scope().find_var(candidate_w_name).get_tensor()
-            )
-            candidate_w = np.random.uniform(
-                -0.1, 0.1, size=candidate_w.shape
-            ).astype('float32')
-            fluid.global_scope().find_var(candidate_w_name).get_tensor().set(
-                candidate_w, place
-            )
-
-            candidate_b = np.array(
-                fluid.global_scope().find_var(candidate_b_name).get_tensor()
-            )
-            candidate_b = np.random.uniform(
-                -0.1, 0.1, size=candidate_b.shape
-            ).astype('float32')
-            fluid.global_scope().find_var(candidate_b_name).get_tensor().set(
-                candidate_b, place
-            )
-
-            gate_weight.append(gate_w)
-            gate_bias.append(gate_b)
-            candidate_weight.append(candidate_w)
-            candidate_bias.append(candidate_b)
-
-        if self.is_bidirect:
-            for i in range(self.num_layers):
-                gate_w_name = (
-                    "basic_gru_reverse_layers_" + str(i) + "/BasicGRUUnit_0.w_0"
-                )
-                gate_b_name = (
-                    "basic_gru_reverse_layers_" + str(i) + "/BasicGRUUnit_0.b_0"
-                )
-                candidate_w_name = (
-                    "basic_gru_reverse_layers_" + str(i) + "/BasicGRUUnit_0.w_1"
-                )
-                candidate_b_name = (
-                    "basic_gru_reverse_layers_" + str(i) + "/BasicGRUUnit_0.b_1"
-                )
-
-                gate_w = np.array(
-                    fluid.global_scope().find_var(gate_w_name).get_tensor()
-                )
-                gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype(
-                    'float32'
-                )
-                fluid.global_scope().find_var(gate_w_name).get_tensor().set(
-                    gate_w, place
-                )
-
-                gate_b = np.array(
-                    fluid.global_scope().find_var(gate_b_name).get_tensor()
-                )
-                gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype(
-                    'float32'
-                )
-                fluid.global_scope().find_var(gate_b_name).get_tensor().set(
-                    gate_b, place
-                )
-
-                candidate_w = np.array(
-                    fluid.global_scope().find_var(candidate_w_name).get_tensor()
-                )
-                candidate_w = np.random.uniform(
-                    -0.1, 0.1, size=candidate_w.shape
-                ).astype('float32')
-                fluid.global_scope().find_var(
-                    candidate_w_name
-                ).get_tensor().set(candidate_w, place)
-
-                candidate_b = np.array(
-                    fluid.global_scope().find_var(candidate_b_name).get_tensor()
-                )
-                candidate_b = np.random.uniform(
-                    -0.1, 0.1, size=candidate_b.shape
-                ).astype('float32')
-                fluid.global_scope().find_var(
-                    candidate_b_name
-                ).get_tensor().set(candidate_b, place)
-
-                gate_weight.append(gate_w)
-                gate_bias.append(gate_b)
-                candidate_weight.append(candidate_w)
-                candidate_bias.append(candidate_b)
-
-        step_input_np = np.random.uniform(
-            -0.1, 0.1, (self.seq_len, self.batch_size, self.hidden_size)
-        ).astype('float32')
-        sequence_length_np = np.random.randint(
-            self.seq_len // 2, self.seq_len, size=(self.batch_size)
-        ).astype('int64')
-
-        out = exe.run(
-            feed={'x': step_input_np, 'sequence_length': sequence_length_np},
-            fetch_list=[rnn_out, last_hidden],
-        )
-
-        api_rnn_out = out[0]
-        api_last_hidden = out[1]
-
-        np_out = gru_np(
-            step_input_np,
-            None,
-            self.hidden_size,
-            gate_weight,
-            gate_bias,
-            candidate_weight,
-            candidate_bias,
-            num_layers=self.num_layers,
-            batch_first=self.batch_first,
-            is_bidirect=self.is_bidirect,
-            sequence_length=sequence_length_np,
-        )
-
-        np.testing.assert_allclose(api_rnn_out, np_out[0], rtol=0.0001, atol=0)
-
-        np.testing.assert_allclose(
-            api_last_hidden, np_out[1], rtol=0.0001, atol=0
-        )
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py
+++ b/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy
-import numpy as np
-
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-import paddle.fluid.layers as layers
-from paddle.fluid import framework
-from paddle.fluid.contrib.layers import BasicGRUUnit
-from paddle.fluid.executor import Executor
-
-np.set_seed(123)
-
-SIGMOID_THRESHOLD_MIN = -40.0
-SIGMOID_THRESHOLD_MAX = 13.0
-EXP_MAX_INPUT = 40.0
-
-
-def sigmoid(x):
-    y = np.copy(x)
-    y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN
-    y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX
-    return 1.0 / (1.0 + np.exp(-y))
-
-
-def tanh(x):
-    y = -2.0 * x
-    y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT
-    return (2.0 / (1.0 + np.exp(y))) - 1.0
-
-
-def step(step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b):
-    concat_1 = np.concatenate([step_in, pre_hidden], 1)
-
-    gate_input = np.matmul(concat_1, gate_w)
-    gate_input += gate_b
-    gate_input = sigmoid(gate_input)
-    r, u = np.split(gate_input, indices_or_sections=2, axis=1)
-
-    r_hidden = r * pre_hidden
-
-    candidate = np.matmul(np.concatenate([step_in, r_hidden], 1), candidate_w)
-
-    candidate += candidate_b
-    c = tanh(candidate)
-
-    new_hidden = u * pre_hidden + (1 - u) * c
-
-    return new_hidden
-
-
-class TestBasicGRUUnit(unittest.TestCase):
-    def setUp(self):
-        self.hidden_size = 5
-        self.batch_size = 5
-
-    def test_run(self):
-        x = layers.data(name='x', shape=[-1, self.hidden_size], dtype='float32')
-        pre_hidden = layers.data(
-            name="pre_hidden", shape=[-1, self.hidden_size], dtype='float32'
-        )
-        gru_unit = BasicGRUUnit("gru_unit", self.hidden_size)
-
-        new_hidden = gru_unit(x, pre_hidden)
-
-        new_hidden.persisbale = True
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-
-        exe = Executor(place)
-        exe.run(framework.default_startup_program())
-
-        param_list = fluid.default_main_program().block(0).all_parameters()
-
-        # process weight and bias
-
-        gate_w_name = "gru_unit/BasicGRUUnit_0.w_0"
-        gate_b_name = "gru_unit/BasicGRUUnit_0.b_0"
-        candidate_w_name = "gru_unit/BasicGRUUnit_0.w_1"
-        candidate_b_name = "gru_unit/BasicGRUUnit_0.b_1"
-
-        gate_w = np.array(
-            fluid.global_scope().find_var(gate_w_name).get_tensor()
-        )
-        gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype(
-            'float32'
-        )
-        fluid.global_scope().find_var(gate_w_name).get_tensor().set(
-            gate_w, place
-        )
-
-        gate_b = np.array(
-            fluid.global_scope().find_var(gate_b_name).get_tensor()
-        )
-        gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype(
-            'float32'
-        )
-        fluid.global_scope().find_var(gate_b_name).get_tensor().set(
-            gate_b, place
-        )
-
-        candidate_w = np.array(
-            fluid.global_scope().find_var(candidate_w_name).get_tensor()
-        )
-        candidate_w = np.random.uniform(
-            -0.1, 0.1, size=candidate_w.shape
-        ).astype('float32')
-        fluid.global_scope().find_var(candidate_w_name).get_tensor().set(
-            candidate_w, place
-        )
-
-        candidate_b = np.array(
-            fluid.global_scope().find_var(candidate_b_name).get_tensor()
-        )
-        candidate_b = np.random.uniform(
-            -0.1, 0.1, size=candidate_b.shape
-        ).astype('float32')
-        fluid.global_scope().find_var(candidate_b_name).get_tensor().set(
-            candidate_b, place
-        )
-
-        step_input_np = np.random.uniform(
-            -0.1, 0.1, (self.batch_size, self.hidden_size)
-        ).astype('float32')
-        pre_hidden_np = np.random.uniform(
-            -0.1, 0.1, (self.batch_size, self.hidden_size)
-        ).astype('float32')
-
-        out = exe.run(
-            feed={'x': step_input_np, 'pre_hidden': pre_hidden_np},
-            fetch_list=[new_hidden],
-        )
-
-        api_out = out[0]
-
-        np_out = step(
-            step_input_np,
-            pre_hidden_np,
-            gate_w,
-            gate_b,
-            candidate_w,
-            candidate_b,
-        )
-
-        np.testing.assert_allclose(api_out, np_out, rtol=0.0001, atol=0)
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py
+++ b/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy
-import numpy as np
-
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-import paddle.fluid.layers as layers
-from paddle.fluid import framework
-from paddle.fluid.contrib.layers import basic_lstm
-from paddle.fluid.executor import Executor
-
-np.set_seed(123)
-
-SIGMOID_THRESHOLD_MIN = -40.0
-SIGMOID_THRESHOLD_MAX = 13.0
-EXP_MAX_INPUT = 40.0
-
-
-def sigmoid(x):
-    y = np.copy(x)
-    y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN
-    y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX
-    return 1.0 / (1.0 + np.exp(-y))
-
-
-def tanh(x):
-    y = -2.0 * x
-    y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT
-    return (2.0 / (1.0 + np.exp(y))) - 1.0
-
-
-def lstm_np(
-    input,
-    init_h,
-    init_c,
-    hidden_size,
-    gate_weight,
-    gate_bias,
-    num_layers=1,
-    batch_first=False,
-    is_bidirect=False,
-    sequence_length=None,
-    forget_bias=1.0,
-):
-    def step(step_in, pre_hidden, pre_cell, gate_w, gate_b):
-        concat_1 = np.concatenate([step_in, pre_hidden], 1)
-
-        gate_input = np.matmul(concat_1, gate_w)
-        gate_input += gate_b
-        i, j, f, o = np.split(gate_input, indices_or_sections=4, axis=1)
-
-        new_cell = pre_cell * sigmoid(f + forget_bias) + sigmoid(i) * tanh(j)
-        new_hidden = tanh(new_cell) * sigmoid(o)
-
-        return new_hidden, new_cell
-
-    mask = None
-
-    if batch_first:
-        input = np.tranpose(input, [1, 0, 2])
-        if mask is not None:
-            mask = np.transpose(mask, [1, 0])
-
-    batch_size = input.shape[1]
-    if sequence_length is not None:
-        max_seq_len = input.shape[0]
-
-        mask = np.zeros([batch_size, max_seq_len])
-
-        for i, len in enumerate(sequence_length):
-            mask[i, :len] = 1.0
-
-        mask = np.transpose(mask, [1, 0])
-
-    direc_num = 1
-    if is_bidirect:
-        direc_num = 2
-    if init_h:
-        init_h = np.reshape(init_h, [num_layers, direc_num, -1, hidden_size])
-        init_c = np.reshape(init_c, [num_layers, direc_num, -1, hidden_size])
-    else:
-        init_h = np.zeros([num_layers, direc_num, batch_size, hidden_size])
-        init_c = np.zeros([num_layers, direc_num, batch_size, hidden_size])
-
-    def get_single_direction_output(rnn_input, mask=None, direc_index=0):
-        seq_len = rnn_input.shape[0]
-
-        output = []
-        # init pre hidden
-        pre_hidden_array = []
-        pre_cell_array = []
-        for i in range(num_layers):
-            pre_hidden_array.append(init_h[i, direc_index])
-            pre_cell_array.append(init_c[i, direc_index])
-
-        for i in range(seq_len):
-            step_input = rnn_input[i]
-
-            if mask is not None:
-                step_mask = mask[i]
-                step_mask = np.reshape(step_mask, [-1, 1])
-                # print("np mask", step_mask.shape  )
-
-            for i in range(num_layers):
-                new_hidden, new_cell = step(
-                    step_input,
-                    pre_hidden_array[i],
-                    pre_cell_array[i],
-                    gate_weight[direc_index * num_layers + i],
-                    gate_bias[direc_index * num_layers + i],
-                )
-
-                if mask is not None:
-
-                    new_hidden = np.multiply(
-                        new_hidden, step_mask
-                    ) - np.multiply(pre_hidden_array[i], (step_mask - 1.0))
-                    # new_hidden = new_hidden * step_mask - pre_hidden_array[i] * ( step_mask -1 )
-                    # new_cell = new_cell * step_mask - pre_cell_array[i] * (step_mask -1)
-                    new_cell = np.multiply(new_cell, step_mask) - np.multiply(
-                        pre_cell_array[i], (step_mask - 1.0)
-                    )
-
-                pre_hidden_array[i] = new_hidden
-                pre_cell_array[i] = new_cell
-
-                step_input = new_hidden
-            output.append(step_input)
-        rnn_out = np.concatenate(output, 0)
-        rnn_out = np.reshape(rnn_out, [seq_len, -1, hidden_size])
-
-        last_hidden_out = np.concatenate(pre_hidden_array, 0)
-        last_hidden_out = np.reshape(
-            last_hidden_out, [num_layers, -1, hidden_size]
-        )
-
-        last_cell_out = np.concatenate(pre_cell_array, 0)
-        last_cell_out = np.reshape(last_cell_out, [num_layers, -1, hidden_size])
-
-        return rnn_out, last_hidden_out, last_cell_out
-
-    fw_rnn_out, fw_last_hidden, fw_last_cell = get_single_direction_output(
-        input, mask, direc_index=0
-    )
-
-    if is_bidirect:
-        bw_input = input[::-1]
-        bw_mask = None
-        if mask is not None:
-            bw_mask = mask[::-1]
-
-        bw_rnn_out, bw_last_hidden, bw_last_cell = get_single_direction_output(
-            bw_input, bw_mask, direc_index=1
-        )
-
-        bw_rnn_out = bw_rnn_out[::-1]
-
-        rnn_out = np.concatenate([fw_rnn_out, bw_rnn_out], 2)
-        last_hidden = np.concatenate([fw_last_hidden, bw_last_hidden], 1)
-        last_hidden = np.reshape(
-            last_hidden, [num_layers * direc_num, -1, hidden_size]
-        )
-
-        last_cell = np.concatenate([fw_last_cell, bw_last_cell], 1)
-        last_cell = np.reshape(
-            last_cell, [num_layers * direc_num, -1, hidden_size]
-        )
-
-        if batch_first:
-            rnn_out = np.transpose(rnn_out, [1, 0, 2])
-
-        return rnn_out, last_hidden, last_cell
-    else:
-        rnn_out = fw_rnn_out
-        last_hidden = fw_last_hidden
-        last_cell = fw_last_cell
-
-        if batch_first:
-            rnn_out = np.transpose(rnn_out, [1, 0, 2])
-
-        return rnn_out, last_hidden, last_cell
-
-
-class TestBasicLSTMApi(unittest.TestCase):
-    def setUp(self):
-        self.hidden_size = 10
-        self.batch_size = 5
-        self.seq_len = 6
-        self.num_layers = 2
-        self.is_bidirect = True
-        self.batch_first = False
-        self.forget_bias = 1.0
-
-    def test_run(self):
-        x = layers.data(
-            name='x',
-            shape=[-1, self.batch_size, self.hidden_size],
-            dtype='float32',
-        )
-        sequence_length = layers.data(
-            name="sequence_length", shape=[-1], dtype='float32'
-        )
-
-        rnn_out, last_hidden, last_cell = basic_lstm(
-            x,
-            None,
-            None,
-            self.hidden_size,
-            num_layers=self.num_layers,
-            batch_first=self.batch_first,
-            bidirectional=self.is_bidirect,
-            sequence_length=sequence_length,
-            forget_bias=self.forget_bias,
-        )
-
-        last_hidden.persisbale = True
-        rnn_out.persisbale = True
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = Executor(place)
-        exe.run(framework.default_startup_program())
-
-        param_list = fluid.default_main_program().block(0).all_parameters()
-
-        # process weight and bias
-        gate_weight = []
-        gate_bias = []
-
-        for i in range(self.num_layers):
-            gate_w_name = "basic_lstm_layers_" + str(i) + "/BasicLSTMUnit_0.w_0"
-            gate_b_name = "basic_lstm_layers_" + str(i) + "/BasicLSTMUnit_0.b_0"
-
-            gate_w = np.array(
-                fluid.global_scope().find_var(gate_w_name).get_tensor()
-            )
-            gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype(
-                'float32'
-            )
-            fluid.global_scope().find_var(gate_w_name).get_tensor().set(
-                gate_w, place
-            )
-
-            gate_b = np.array(
-                fluid.global_scope().find_var(gate_b_name).get_tensor()
-            )
-            gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype(
-                'float32'
-            )
-            fluid.global_scope().find_var(gate_b_name).get_tensor().set(
-                gate_b, place
-            )
-
-            gate_weight.append(gate_w)
-            gate_bias.append(gate_b)
-
-        if self.is_bidirect:
-            for i in range(self.num_layers):
-                gate_w_name = (
-                    "basic_lstm_reverse_layers_"
-                    + str(i)
-                    + "/BasicLSTMUnit_0.w_0"
-                )
-                gate_b_name = (
-                    "basic_lstm_reverse_layers_"
-                    + str(i)
-                    + "/BasicLSTMUnit_0.b_0"
-                )
-
-                gate_w = np.array(
-                    fluid.global_scope().find_var(gate_w_name).get_tensor()
-                )
-                gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype(
-                    'float32'
-                )
-                fluid.global_scope().find_var(gate_w_name).get_tensor().set(
-                    gate_w, place
-                )
-
-                gate_b = np.array(
-                    fluid.global_scope().find_var(gate_b_name).get_tensor()
-                )
-                gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype(
-                    'float32'
-                )
-                fluid.global_scope().find_var(gate_b_name).get_tensor().set(
-                    gate_b, place
-                )
-
-                gate_weight.append(gate_w)
-                gate_bias.append(gate_b)
-
-        step_input_np = np.random.uniform(
-            -0.1, 0.1, (self.seq_len, self.batch_size, self.hidden_size)
-        ).astype('float32')
-        sequence_length_np = np.random.randint(
-            self.seq_len // 2, self.seq_len, size=(self.batch_size)
-        ).astype('int64')
-
-        out = exe.run(
-            feed={'x': step_input_np, 'sequence_length': sequence_length_np},
-            fetch_list=[rnn_out, last_hidden, last_cell],
-        )
-
-        api_rnn_out = out[0]
-        api_last_hidden = out[1]
-        api_last_cell = out[2]
-
-        np_out = lstm_np(
-            step_input_np,
-            None,
-            None,
-            self.hidden_size,
-            gate_weight,
-            gate_bias,
-            num_layers=self.num_layers,
-            batch_first=self.batch_first,
-            is_bidirect=self.is_bidirect,
-            sequence_length=sequence_length_np,
-        )
-
-        np.testing.assert_allclose(api_rnn_out, np_out[0], rtol=0.0001, atol=0)
-        np.testing.assert_allclose(
-            api_last_hidden, np_out[1], rtol=0.0001, atol=0
-        )
-        np.testing.assert_allclose(
-            api_last_cell, np_out[2], rtol=0.0001, atol=0
-        )
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py
+++ b/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy
-import numpy as np
-
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-import paddle.fluid.layers as layers
-from paddle.fluid import framework
-from paddle.fluid.contrib.layers import BasicLSTMUnit
-from paddle.fluid.executor import Executor
-
-np.set_seed(123)
-
-SIGMOID_THRESHOLD_MIN = -40.0
-SIGMOID_THRESHOLD_MAX = 13.0
-EXP_MAX_INPUT = 40.0
-
-
-def sigmoid(x):
-    y = np.copy(x)
-    y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN
-    y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX
-    return 1.0 / (1.0 + np.exp(-y))
-
-
-def tanh(x):
-    y = -2.0 * x
-    y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT
-    return (2.0 / (1.0 + np.exp(y))) - 1.0
-
-
-def step(step_in, pre_hidden, pre_cell, gate_w, gate_b, forget_bias=1.0):
-    concat_1 = np.concatenate([step_in, pre_hidden], 1)
-
-    gate_input = np.matmul(concat_1, gate_w)
-    gate_input += gate_b
-    i, j, f, o = np.split(gate_input, indices_or_sections=4, axis=1)
-
-    new_cell = pre_cell * sigmoid(f + forget_bias) + sigmoid(i) * tanh(j)
-    new_hidden = tanh(new_cell) * sigmoid(o)
-
-    return new_hidden, new_cell
-
-
-class TestBasicGRUUnit(unittest.TestCase):
-    def setUp(self):
-        self.hidden_size = 5
-        self.batch_size = 5
-
-    def test_run(self):
-        x = layers.data(name='x', shape=[-1, self.hidden_size], dtype='float32')
-        pre_hidden = layers.data(
-            name="pre_hidden", shape=[-1, self.hidden_size], dtype='float32'
-        )
-        pre_cell = layers.data(
-            name="pre_cell", shape=[-1, self.hidden_size], dtype='float32'
-        )
-
-        lstm_unit = BasicLSTMUnit("lstm_unit", self.hidden_size)
-
-        new_hidden, new_cell = lstm_unit(x, pre_hidden, pre_cell)
-
-        new_hidden.persisbale = True
-        new_cell.persisbale = True
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-
-        exe = Executor(place)
-        exe.run(framework.default_startup_program())
-
-        param_list = fluid.default_main_program().block(0).all_parameters()
-
-        # process weight and bias
-
-        gate_w_name = "lstm_unit/BasicLSTMUnit_0.w_0"
-        gate_b_name = "lstm_unit/BasicLSTMUnit_0.b_0"
-
-        gate_w = np.array(
-            fluid.global_scope().find_var(gate_w_name).get_tensor()
-        )
-        gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype(
-            'float32'
-        )
-        fluid.global_scope().find_var(gate_w_name).get_tensor().set(
-            gate_w, place
-        )
-
-        gate_b = np.array(
-            fluid.global_scope().find_var(gate_b_name).get_tensor()
-        )
-        gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype(
-            'float32'
-        )
-        fluid.global_scope().find_var(gate_b_name).get_tensor().set(
-            gate_b, place
-        )
-
-        step_input_np = np.random.uniform(
-            -0.1, 0.1, (self.batch_size, self.hidden_size)
-        ).astype('float32')
-        pre_hidden_np = np.random.uniform(
-            -0.1, 0.1, (self.batch_size, self.hidden_size)
-        ).astype('float32')
-        pre_cell_np = np.random.uniform(
-            -0.1, 0.1, (self.batch_size, self.hidden_size)
-        ).astype('float32')
-
-        out = exe.run(
-            feed={
-                'x': step_input_np,
-                'pre_hidden': pre_hidden_np,
-                'pre_cell': pre_cell_np,
-            },
-            fetch_list=[new_hidden, new_cell],
-        )
-
-        api_hidden_out = out[0]
-        api_cell_out = out[1]
-
-        np_hidden_out, np_cell_out = step(
-            step_input_np, pre_hidden_np, pre_cell_np, gate_w, gate_b
-        )
-
-        np.testing.assert_allclose(
-            api_hidden_out, np_hidden_out, rtol=0.0001, atol=0
-        )
-        np.testing.assert_allclose(
-            api_cell_out, np_cell_out, rtol=0.0001, atol=0
-        )
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py
+++ b/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-from test_imperative_base import new_program_scope
-
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-from paddle.fluid.contrib.layers import basic_gru, basic_lstm
-
-
-class TestBasicGRUApiName(unittest.TestCase):
-    def setUp(self):
-        self.name_set = set(
-            [
-                "test1_fw_w_0_gate",
-                "test1_fw_w_0_candidate",
-                "test1_fw_b_0_gate",
-                "test1_fw_b_0_candidate",
-                "test1_bw_w_0_gate",
-                "test1_bw_w_0_candidate",
-                "test1_bw_b_0_gate",
-                "test1_bw_b_0_candidate",
-            ]
-        )
-
-    def test_name(self):
-        batch_size = 20
-        input_size = 128
-        hidden_size = 256
-        num_layers = 1
-        dropout = 0.5
-        bidirectional = True
-        batch_first = False
-
-        with new_program_scope():
-            input = layers.data(
-                name="input",
-                shape=[-1, batch_size, input_size],
-                dtype='float32',
-            )
-            pre_hidden = layers.data(
-                name="pre_hidden", shape=[-1, hidden_size], dtype='float32'
-            )
-            sequence_length = layers.data(
-                name="sequence_length", shape=[-1], dtype='int32'
-            )
-
-            rnn_out, last_hidden = basic_gru(
-                input,
-                pre_hidden,
-                hidden_size,
-                num_layers=num_layers,
-                sequence_length=sequence_length,
-                dropout_prob=dropout,
-                bidirectional=bidirectional,
-                batch_first=batch_first,
-                param_attr=fluid.ParamAttr(name="test1"),
-                bias_attr=fluid.ParamAttr(name="test1"),
-                name="basic_gru",
-            )
-
-            var_list = fluid.io.get_program_parameter(
-                fluid.default_main_program()
-            )
-
-            for var in var_list:
-                self.assertTrue(var.name in self.name_set)
-
-
-class TestBasicLSTMApiName(unittest.TestCase):
-    def setUp(self):
-        self.name_set = set(
-            [
-                "test1_fw_w_0",
-                "test1_fw_b_0",
-                "test1_fw_w_1",
-                "test1_fw_b_1",
-                "test1_bw_w_0",
-                "test1_bw_b_0",
-                "test1_bw_w_1",
-                "test1_bw_b_1",
-            ]
-        )
-
-    def test_name(self):
-        batch_size = 20
-        input_size = 128
-        hidden_size = 256
-        num_layers = 2
-        dropout = 0.5
-        bidirectional = True
-        batch_first = False
-
-        with new_program_scope():
-            input = layers.data(
-                name="input",
-                shape=[-1, batch_size, input_size],
-                dtype='float32',
-            )
-            pre_hidden = layers.data(
-                name="pre_hidden", shape=[-1, hidden_size], dtype='float32'
-            )
-            pre_cell = layers.data(
-                name="pre_cell", shape=[-1, hidden_size], dtype='float32'
-            )
-            sequence_length = layers.data(
-                name="sequence_length", shape=[-1], dtype='int32'
-            )
-
-            rnn_out, last_hidden, last_cell = basic_lstm(
-                input,
-                pre_hidden,
-                pre_cell,
-                hidden_size,
-                num_layers=num_layers,
-                sequence_length=sequence_length,
-                dropout_prob=dropout,
-                bidirectional=bidirectional,
-                param_attr=fluid.ParamAttr(name="test1"),
-                bias_attr=fluid.ParamAttr(name="test1"),
-                batch_first=batch_first,
-            )
-
-            var_list = fluid.io.get_program_parameter(
-                fluid.default_main_program()
-            )
-
-            for var in var_list:
-                self.assertTrue(var.name in self.name_set)
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
@@ -20,8 +20,6 @@ import numpy as np
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
-from paddle.fluid import ParamAttr
-from paddle.fluid.contrib.layers import basic_lstm
 from paddle.fluid.executor import Executor
 from paddle.fluid.layers.control_flow import StaticRNN as PaddingRNN

@@ -85,7 +83,7 @@ class RNNConfig:
        else:
            raise ValueError('Unsupported model_type.')

-        if rnn_model not in ('static', 'padding', 'cudnn', 'basic_lstm'):
+        if rnn_model not in ('static', 'padding', 'cudnn'):
            raise ValueError('Unsupported rnn_model.')

        self.batch_size = 12
@@ -406,23 +404,6 @@ def lm_model(
            init_hidden=init_hidden_reshape,
            init_cell=init_cell_reshape,
        )
-    elif rnn_model == "basic_lstm":
-        rnn_out, last_hidden, last_cell = basic_lstm(
-            x_emb,
-            init_hidden,
-            init_cell,
-            hidden_size,
-            num_layers=num_layers,
-            batch_first=True,
-            dropout_prob=dropout,
-            param_attr=ParamAttr(
-                initializer=fluid.initializer.UniformInitializer(
-                    low=-init_scale, high=init_scale
-                )
-            ),
-            bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0)),
-            forget_bias=0.0,
-        )
    else:
        print("type not support")
        return

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -2329,33 +2329,6 @@ class TestBook(LayerTest):
            )
            return output

-    def test_basic_gru(self):
-        input_size = 128
-        hidden_size = 256
-        with self.static_graph():
-            input = fluid.data(
-                name="input", shape=[None, None, input_size], dtype='float32'
-            )
-            pre_hidden = fluid.data(
-                name="pre_hidden", shape=[None, hidden_size], dtype='float32'
-            )
-            sequence_length = fluid.data(
-                name="sequence_length", shape=[None], dtype='int32'
-            )
-
-            for bidirectional in [True, False]:
-                for batch_first in [True, False]:
-                    rnn_out, last_hidden = fluid.contrib.layers.basic_gru(
-                        input,
-                        pre_hidden,
-                        hidden_size=256,
-                        num_layers=2,
-                        sequence_length=sequence_length,
-                        dropout_prob=0.5,
-                        bidirectional=bidirectional,
-                        batch_first=batch_first,
-                    )
-

 class ExampleNet(paddle.nn.Layer):
    def __init__(self):

--- a/tools/parallel_UT_rule.py
+++ b/tools/parallel_UT_rule.py
@@ -274,7 +274,6 @@ HIGH_PARALLEL_JOB_NEW = [
    'test_mkldnn_op_nhwc',
    'test_fc_act_mkldnn_fuse_pass',
    'test_fleet_base_3',
-    'test_basic_rnn_name',
    'test_query_op',
    'test_fleet_base_4',
    'save_load_op_test',
@@ -1980,7 +1979,6 @@ CPU_PARALLEL_JOB = [
    'test_beam_search_op',
    'test_batch_sampler',
    'test_batch_norm_act_fuse_pass',
-    'test_basic_rnn_name',
    'test_attention_lstm_op',
    'test_analyzer',
    'test_aligned_allocator',

--- a/tools/static_mode_white_list.py
+++ b/tools/static_mode_white_list.py
@@ -71,7 +71,6 @@ STATIC_MODE_TESTING_LIST = [
    'test_auc_single_pred_op',
    'test_avoid_twice_initialization',
    'test_backward',
-    'test_basic_rnn_name',
    'test_batch_norm_op',
    'test_batch_norm_op_v2',
    'test_bce_loss',