[FluidAPI] remove fluid rnn apis (#49050)

* remove lstm api * remove gru_unit api * remove lstm in all * remove beam-search * remove beam_search slot * remove lstm test code * remove fluid.layers.nn api * update gru-unit * revert gru_unit white list

[FluidAPI] remove fluid rnn apis (#49050)
* remove lstm api * remove gru_unit api * remove lstm in all * remove beam-search * remove beam_search slot * remove lstm test code * remove fluid.layers.nn api * update gru-unit * revert gru_unit white list
4672ea8e · 骑马小猫 · GitHub · 0c1cb5e3 · 4672ea8e · 4672ea8e
20 changed file
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
--- a/python/paddle/fluid/tests/book/CMakeLists.txt
+++ b/python/paddle/fluid/tests/book/CMakeLists.txt
@@ -12,5 +12,4 @@ endforeach()
 set_tests_properties(test_word2vec_book PROPERTIES TIMEOUT 120)
 set_tests_properties(test_recognize_digits PROPERTIES TIMEOUT 120)
 set_tests_properties(test_image_classification PROPERTIES TIMEOUT 200)
-set_tests_properties(test_label_semantic_roles PROPERTIES TIMEOUT 240)
 set_tests_properties(test_fit_a_line PROPERTIES TIMEOUT 120)
--- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py
+++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
@@ -55,43 +55,6 @@ def convolution_net(
    return avg_cost, accuracy, prediction


-def stacked_lstm_net(
-    data, label, input_dim, class_dim=2, emb_dim=128, hid_dim=512, stacked_num=3
-):
-    assert stacked_num % 2 == 1
-
-    emb = fluid.layers.embedding(
-        input=data, size=[input_dim, emb_dim], is_sparse=True
-    )
-    # add bias attr
-
-    # TODO(qijun) linear act
-    fc1 = fluid.layers.fc(input=emb, size=hid_dim)
-    lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim)
-
-    inputs = [fc1, lstm1]
-
-    for i in range(2, stacked_num + 1):
-        fc = fluid.layers.fc(input=inputs, size=hid_dim)
-        lstm, cell = fluid.layers.dynamic_lstm(
-            input=fc, size=hid_dim, is_reverse=(i % 2) == 0
-        )
-        inputs = [fc, lstm]
-
-    fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
-    lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
-
-    prediction = fluid.layers.fc(
-        input=[fc_last, lstm_last], size=class_dim, act='softmax'
-    )
-    cost = paddle.nn.functional.cross_entropy(
-        input=prediction, label=label, reduction='none', use_softmax=False
-    )
-    avg_cost = paddle.mean(cost)
-    accuracy = paddle.static.accuracy(input=prediction, label=label)
-    return avg_cost, accuracy, prediction
-
-
 def train(
    word_dict,
    net_method,
@@ -278,25 +241,6 @@ class TestUnderstandSentiment(unittest.TestCase):
                parallel=True,
            )

-    @unittest.skip(reason="make CI faster")
-    def test_stacked_lstm_cpu(self):
-        with self.new_program_scope():
-            main(
-                self.word_dict,
-                net_method=stacked_lstm_net,
-                use_cuda=False,
-                save_dirname="understand_sentiment_stacked_lstm.inference.model",
-            )
-
-    def test_stacked_lstm_cpu_parallel(self):
-        with self.new_program_scope():
-            main(
-                self.word_dict,
-                net_method=stacked_lstm_net,
-                use_cuda=False,
-                parallel=True,
-            )
-
    def test_conv_gpu(self):
        with self.new_program_scope():
            main(
@@ -315,25 +259,6 @@ class TestUnderstandSentiment(unittest.TestCase):
                parallel=True,
            )

-    @unittest.skip(reason="make CI faster")
-    def test_stacked_lstm_gpu(self):
-        with self.new_program_scope():
-            main(
-                self.word_dict,
-                net_method=stacked_lstm_net,
-                use_cuda=True,
-                save_dirname="understand_sentiment_stacked_lstm.inference.model",
-            )
-
-    def test_stacked_lstm_gpu_parallel(self):
-        with self.new_program_scope():
-            main(
-                self.word_dict,
-                net_method=stacked_lstm_net,
-                use_cuda=True,
-                parallel=True,
-            )
-

 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import contextlib
-import os
-import tempfile
-import time
-import unittest
-
-import numpy as np
-
-import paddle
-import paddle.dataset.conll05 as conll05
-import paddle.fluid as fluid
-
-paddle.enable_static()
-
-word_dict, verb_dict, label_dict = conll05.get_dict()
-word_dict_len = len(word_dict)
-label_dict_len = len(label_dict)
-pred_dict_len = len(verb_dict)
-
-mark_dict_len = 2
-word_dim = 32
-mark_dim = 5
-hidden_dim = 512
-depth = 8
-mix_hidden_lr = 1e-3
-
-IS_SPARSE = True
-PASS_NUM = 2
-BATCH_SIZE = 10
-
-embedding_name = 'emb'
-
-
-def load_parameter(file_name, h, w):
-    with open(file_name, 'rb') as f:
-        f.read(16)  # skip header.
-        return np.fromfile(f, dtype=np.float32).reshape(h, w)
-
-
-def db_lstm(
-    word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, **ignored
-):
-    # 8 features
-    predicate_embedding = fluid.layers.embedding(
-        input=predicate,
-        size=[pred_dict_len, word_dim],
-        dtype='float32',
-        is_sparse=IS_SPARSE,
-        param_attr='vemb',
-    )
-
-    mark_embedding = fluid.layers.embedding(
-        input=mark,
-        size=[mark_dict_len, mark_dim],
-        dtype='float32',
-        is_sparse=IS_SPARSE,
-    )
-
-    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
-    emb_layers = [
-        fluid.layers.embedding(
-            size=[word_dict_len, word_dim],
-            input=x,
-            param_attr=fluid.ParamAttr(name=embedding_name, trainable=False),
-        )
-        for x in word_input
-    ]
-    emb_layers.append(predicate_embedding)
-    emb_layers.append(mark_embedding)
-
-    hidden_0_layers = [
-        fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
-    ]
-
-    hidden_0 = fluid.layers.sums(input=hidden_0_layers)
-
-    lstm_0 = fluid.layers.dynamic_lstm(
-        input=hidden_0,
-        size=hidden_dim,
-        candidate_activation='relu',
-        gate_activation='sigmoid',
-        cell_activation='sigmoid',
-    )
-
-    # stack L-LSTM and R-LSTM with direct edges
-    input_tmp = [hidden_0, lstm_0]
-
-    for i in range(1, depth):
-        mix_hidden = fluid.layers.sums(
-            input=[
-                fluid.layers.fc(input=input_tmp[0], size=hidden_dim),
-                fluid.layers.fc(input=input_tmp[1], size=hidden_dim),
-            ]
-        )
-
-        lstm = fluid.layers.dynamic_lstm(
-            input=mix_hidden,
-            size=hidden_dim,
-            candidate_activation='relu',
-            gate_activation='sigmoid',
-            cell_activation='sigmoid',
-            is_reverse=((i % 2) == 1),
-        )
-
-        input_tmp = [mix_hidden, lstm]
-
-    feature_out = fluid.layers.sums(
-        input=[
-            fluid.layers.fc(
-                input=input_tmp[0], size=label_dict_len, act='tanh'
-            ),
-            fluid.layers.fc(
-                input=input_tmp[1], size=label_dict_len, act='tanh'
-            ),
-        ]
-    )
-
-    return feature_out
-
-
-def train(use_cuda, save_dirname=None, is_local=True):
-    # define network topology
-    word = fluid.layers.data(
-        name='word_data', shape=[1], dtype='int64', lod_level=1
-    )
-    predicate = fluid.layers.data(
-        name='verb_data', shape=[1], dtype='int64', lod_level=1
-    )
-    ctx_n2 = fluid.layers.data(
-        name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1
-    )
-    ctx_n1 = fluid.layers.data(
-        name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1
-    )
-    ctx_0 = fluid.layers.data(
-        name='ctx_0_data', shape=[1], dtype='int64', lod_level=1
-    )
-    ctx_p1 = fluid.layers.data(
-        name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1
-    )
-    ctx_p2 = fluid.layers.data(
-        name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1
-    )
-    mark = fluid.layers.data(
-        name='mark_data', shape=[1], dtype='int64', lod_level=1
-    )
-    feature_out = db_lstm(**locals())
-    target = fluid.layers.data(
-        name='target', shape=[1], dtype='int64', lod_level=1
-    )
-    cost = fluid.layers.softmax_with_cross_entropy(feature_out, target)
-    avg_cost = paddle.mean(cost)
-
-    # TODO(qiao)
-    # check other optimizers and check why out will be NAN
-    sgd_optimizer = fluid.optimizer.SGD(
-        learning_rate=fluid.layers.exponential_decay(
-            learning_rate=0.01,
-            decay_steps=100000,
-            decay_rate=0.5,
-            staircase=True,
-        )
-    )
-    sgd_optimizer.minimize(avg_cost)
-
-    # TODO(qiao)
-    # add dependency track and move this config before optimizer
-
-    train_data = paddle.batch(
-        paddle.reader.shuffle(paddle.dataset.conll05.test(), buf_size=8192),
-        batch_size=BATCH_SIZE,
-    )
-
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    feeder = fluid.DataFeeder(
-        feed_list=[
-            word,
-            ctx_n2,
-            ctx_n1,
-            ctx_0,
-            ctx_p1,
-            ctx_p2,
-            predicate,
-            mark,
-            target,
-        ],
-        place=place,
-    )
-    exe = fluid.Executor(place)
-
-    def train_loop(main_program):
-        exe.run(fluid.default_startup_program())
-        embedding_param = (
-            fluid.global_scope().find_var(embedding_name).get_tensor()
-        )
-        embedding_param.set(
-            load_parameter(conll05.get_embedding(), word_dict_len, word_dim),
-            place,
-        )
-
-        start_time = time.time()
-        batch_id = 0
-        for pass_id in range(PASS_NUM):
-            for data in train_data():
-                cost = exe.run(
-                    main_program, feed=feeder.feed(data), fetch_list=[avg_cost]
-                )
-                cost = cost[0]
-
-                if batch_id % 10 == 0:
-                    print("avg_cost:" + str(cost))
-                    if batch_id != 0:
-                        print(
-                            "second per batch: "
-                            + str((time.time() - start_time) / batch_id)
-                        )
-                    # Set the threshold low to speed up the CI test
-                    if float(cost) < 80.0:
-                        if save_dirname is not None:
-                            # TODO(liuyiqun): Change the target to crf_decode
-                            fluid.io.save_inference_model(
-                                save_dirname,
-                                [
-                                    'word_data',
-                                    'verb_data',
-                                    'ctx_n2_data',
-                                    'ctx_n1_data',
-                                    'ctx_0_data',
-                                    'ctx_p1_data',
-                                    'ctx_p2_data',
-                                    'mark_data',
-                                ],
-                                [feature_out],
-                                exe,
-                            )
-                        return
-
-                batch_id = batch_id + 1
-
-        raise RuntimeError(
-            "This model should save_inference_model and return, but not reach here, please check!"
-        )
-
-    if is_local:
-        train_loop(fluid.default_main_program())
-    else:
-        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
-        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
-        eplist = []
-        for ip in pserver_ips.split(","):
-            eplist.append(':'.join([ip, port]))
-        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
-        trainers = int(os.getenv("PADDLE_TRAINERS"))
-        current_endpoint = os.getenv("POD_IP") + ":" + port
-        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
-        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
-        t = fluid.DistributeTranspiler()
-        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
-        if training_role == "PSERVER":
-            pserver_prog = t.get_pserver_program(current_endpoint)
-            pserver_startup = t.get_startup_program(
-                current_endpoint, pserver_prog
-            )
-            exe.run(pserver_startup)
-            exe.run(pserver_prog)
-        elif training_role == "TRAINER":
-            train_loop(t.get_trainer_program())
-
-
-def infer(use_cuda, save_dirname=None):
-    if save_dirname is None:
-        return
-
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    exe = fluid.Executor(place)
-
-    inference_scope = fluid.core.Scope()
-    with fluid.scope_guard(inference_scope):
-        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be fed
-        # data using feed operators), and the fetch_targets (variables that
-        # we want to obtain data from using fetch operators).
-        [
-            inference_program,
-            feed_target_names,
-            fetch_targets,
-        ] = fluid.io.load_inference_model(save_dirname, exe)
-
-        # Setup input by creating LoDTensor to represent sequence of words.
-        # Here each word is the basic element of the LoDTensor and the shape of
-        # each word (base_shape) should be [1] since it is simply an index to
-        # look up for the corresponding word vector.
-        # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
-        # which has only one level of detail. Then the created LoDTensor will have only
-        # one higher level structure (sequence of words, or sentence) than the basic
-        # element (word). Hence the LoDTensor will hold data for three sentences of
-        # length 3, 4 and 2, respectively.
-        # Note that recursive_sequence_lengths should be a list of lists.
-        recursive_seq_lens = [[3, 4, 2]]
-        base_shape = [1]
-        # The range of random integers is [low, high]
-        word = fluid.create_random_int_lodtensor(
-            recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1
-        )
-        pred = fluid.create_random_int_lodtensor(
-            recursive_seq_lens, base_shape, place, low=0, high=pred_dict_len - 1
-        )
-        ctx_n2 = fluid.create_random_int_lodtensor(
-            recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1
-        )
-        ctx_n1 = fluid.create_random_int_lodtensor(
-            recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1
-        )
-        ctx_0 = fluid.create_random_int_lodtensor(
-            recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1
-        )
-        ctx_p1 = fluid.create_random_int_lodtensor(
-            recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1
-        )
-        ctx_p2 = fluid.create_random_int_lodtensor(
-            recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1
-        )
-        mark = fluid.create_random_int_lodtensor(
-            recursive_seq_lens, base_shape, place, low=0, high=mark_dict_len - 1
-        )
-
-        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
-        # and results will contain a list of data corresponding to fetch_targets.
-        assert feed_target_names[0] == 'word_data'
-        assert feed_target_names[1] == 'verb_data'
-        assert feed_target_names[2] == 'ctx_n2_data'
-        assert feed_target_names[3] == 'ctx_n1_data'
-        assert feed_target_names[4] == 'ctx_0_data'
-        assert feed_target_names[5] == 'ctx_p1_data'
-        assert feed_target_names[6] == 'ctx_p2_data'
-        assert feed_target_names[7] == 'mark_data'
-
-        results = exe.run(
-            inference_program,
-            feed={
-                feed_target_names[0]: word,
-                feed_target_names[1]: pred,
-                feed_target_names[2]: ctx_n2,
-                feed_target_names[3]: ctx_n1,
-                feed_target_names[4]: ctx_0,
-                feed_target_names[5]: ctx_p1,
-                feed_target_names[6]: ctx_p2,
-                feed_target_names[7]: mark,
-            },
-            fetch_list=fetch_targets,
-            return_numpy=False,
-        )
-        print(results[0].recursive_sequence_lengths())
-        np_data = np.array(results[0])
-        print("Inference Shape: ", np_data.shape)
-
-
-def main(use_cuda, is_local=True):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-
-    temp_dir = tempfile.TemporaryDirectory()
-    # Directory for saving the trained model
-    save_dirname = os.path.join(
-        temp_dir.name, "label_semantic_roles.inference.model"
-    )
-
-    train(use_cuda, save_dirname, is_local)
-    infer(use_cuda, save_dirname)
-
-    temp_dir.cleanup()
-
-
-class TestLabelSemanticRoles(unittest.TestCase):
-    def test_cuda(self):
-        with self.scope_prog_guard():
-            main(use_cuda=True)
-
-    def test_cpu(self):
-        with self.scope_prog_guard():
-            main(use_cuda=False)
-
-    @contextlib.contextmanager
-    def scope_prog_guard(self):
-        prog = fluid.Program()
-        startup_prog = fluid.Program()
-        scope = fluid.core.Scope()
-        with fluid.scope_guard(scope):
-            with fluid.program_guard(prog, startup_prog):
-                yield
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -113,7 +113,6 @@ if(WIN32)
  list(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_3)
  list(REMOVE_ITEM TEST_OPS test_fleet_unitaccessor)
  list(REMOVE_ITEM TEST_OPS test_ps_dispatcher)
-  list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_nlp)
  list(REMOVE_ITEM TEST_OPS test_nvprof)

  # TODO: Fix these unittests failed on Windows
@@ -997,13 +996,6 @@ set_tests_properties(test_parallel_executor_transformer PROPERTIES TIMEOUT 120)
 set_tests_properties(test_elementwise_div_op PROPERTIES TIMEOUT 120)
 set_tests_properties(test_regularizer_api PROPERTIES TIMEOUT 150)
 set_tests_properties(test_multiclass_nms_op PROPERTIES TIMEOUT 120)
-if(NOT WIN32)
-  if(WITH_NV_JETSON)
-    set_tests_properties(test_ir_memory_optimize_nlp PROPERTIES TIMEOUT 1200)
-  else()
-    set_tests_properties(test_ir_memory_optimize_nlp PROPERTIES TIMEOUT 120)
-  endif()
-endif()
 set_tests_properties(test_add_reader_dependency PROPERTIES TIMEOUT 120)
 set_tests_properties(test_bilateral_slice_op PROPERTIES TIMEOUT 120)
 set_tests_properties(test_buffer_shared_memory_reuse_pass PROPERTIES TIMEOUT
@@ -1080,7 +1072,6 @@ set_tests_properties(test_weight_decay PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_ptb_rnn_sorted_gradient PROPERTIES TIMEOUT
                                                                        120)
 set_tests_properties(test_crop_tensor_op PROPERTIES TIMEOUT 120)
-set_tests_properties(test_eager_deletion_lstm_net PROPERTIES TIMEOUT 120)
 set_tests_properties(test_parallel_executor_mnist PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_ptb_rnn PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_save_load_v2 PROPERTIES TIMEOUT 120)
@@ -1124,7 +1115,6 @@ set_tests_properties(test_imperative_optimizer PROPERTIES TIMEOUT 250)
 set_tests_properties(test_imperative_optimizer_v2 PROPERTIES TIMEOUT 250)
 set_tests_properties(test_pool2d_op PROPERTIES TIMEOUT 120)
 set_tests_properties(test_transpose_op PROPERTIES TIMEOUT 120)
-set_tests_properties(test_eager_deletion_gru_net PROPERTIES TIMEOUT 120)
 set_tests_properties(test_activation_op PROPERTIES TIMEOUT 270)
 set_tests_properties(test_normal PROPERTIES TIMEOUT 120)
 set_tests_properties(test_lstmp_op PROPERTIES TIMEOUT 120)

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-from inference_pass_test import InferencePassTest
-
-import paddle.fluid as fluid
-from paddle.fluid.core import PassVersionChecker
-
-
-class FcGruFusePassTest(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            dict_dim, emb_dim = 128, 64
-            data = fluid.data(
-                name='step_data', shape=[None], dtype='int64', lod_level=1
-            )
-            emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
-            hidden_dim = 512
-            x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
-            hidden = fluid.layers.dynamic_gru(
-                input=x,
-                size=hidden_dim,
-                bias_attr=True,
-                origin_mode=False,
-                is_reverse=True,
-            )
-
-        batch = 16
-        lod_tensor = fluid.LoDTensor()
-        lod_tensor.set(
-            np.random.randint(0, dict_dim, size=[batch]).astype("int64"),
-            fluid.CPUPlace(),
-        )
-        lod_tensor.set_lod([[0, batch]])
-        self.feeds = {"step_data": lod_tensor}
-        self.fetch_list = [hidden]
-
-    def test_check_output(self):
-        use_gpu = False
-        self.check_output_with_option(use_gpu)
-        self.assertTrue(PassVersionChecker.IsCompatible('fc_gru_fuse_pass'))
-
-
-class MulGruFusePassTest(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            dict_dim, emb_dim = 128, 64
-            data = fluid.data(
-                name='step_data', shape=[None], dtype='int64', lod_level=1
-            )
-            emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
-            hidden_dim = 512
-            x = fluid.layers.fc(input=emb, size=hidden_dim * 3, bias_attr=False)
-            hidden = fluid.layers.dynamic_gru(
-                input=x,
-                size=hidden_dim,
-                bias_attr=True,
-                origin_mode=False,
-                is_reverse=True,
-            )
-
-        batch = 16
-        lod_tensor = fluid.LoDTensor()
-        lod_tensor.set(
-            np.random.randint(0, dict_dim, size=[batch]).astype("int64"),
-            fluid.CPUPlace(),
-        )
-        lod_tensor.set_lod([[0, batch]])
-        self.feeds = {"step_data": lod_tensor}
-        self.fetch_list = [hidden]
-
-    def test_check_output(self):
-        use_gpu = False
-        self.check_output_with_option(use_gpu)
-        self.assertTrue(PassVersionChecker.IsCompatible('mul_gru_fuse_pass'))
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-from inference_pass_test import InferencePassTest
-
-import paddle.fluid as fluid
-from paddle.fluid.core import PassVersionChecker
-
-
-class MulLstmFusePassTest(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            dict_dim, emb_dim = 128, 64
-            hidden_dim = 512
-
-            data = fluid.data(
-                name='data', shape=[1], dtype='int64', lod_level=1
-            )
-            emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
-            x = fluid.layers.fc(input=emb, size=hidden_dim * 4, bias_attr=False)
-            forward, cell = fluid.layers.dynamic_lstm(
-                input=x, size=hidden_dim * 4
-            )
-
-        batch = 16
-        lod_tensor = fluid.LoDTensor()
-        lod_tensor.set(
-            np.random.randint(0, dict_dim, size=[batch]).astype("int64"),
-            fluid.CPUPlace(),
-        )
-        lod_tensor.set_lod([[0, batch]])
-        self.feeds = {"data": lod_tensor}
-        self.fetch_list = [forward, cell]
-
-    def test_check_output(self):
-        use_gpu = False
-        self.check_output_with_option(use_gpu)
-        self.assertTrue(PassVersionChecker.IsCompatible('mul_lstm_fuse_pass'))
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-from test_eager_deletion_dynamic_rnn_base import TestBase
-
-import paddle
-import paddle.fluid as fluid
-
-fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
-
-
-def gru_net(
-    data,
-    label,
-    dict_dim,
-    emb_dim=128,
-    hid_dim=128,
-    hid_dim2=96,
-    class_dim=2,
-    emb_lr=400.0,
-):
-    emb = fluid.layers.embedding(
-        input=data,
-        size=[dict_dim, emb_dim],
-        param_attr=fluid.ParamAttr(learning_rate=emb_lr),
-    )
-    fc0 = fluid.layers.fc(input=emb, size=hid_dim * 3)
-    gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False)
-    gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max')
-    gru_max_tanh = paddle.tanh(gru_max)
-    fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh')
-    prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
-    cost = paddle.nn.functional.cross_entropy(
-        input=prediction, label=label, reduction='none', use_softmax=False
-    )
-    avg_cost = paddle.mean(x=cost)
-    return avg_cost
-
-
-class GRUTest(TestBase):
-    def setUp(self):
-        self.net = gru_net
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-from test_eager_deletion_dynamic_rnn_base import TestBase
-
-import paddle
-import paddle.fluid as fluid
-
-fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
-
-
-def lstm_net(
-    data,
-    label,
-    dict_dim,
-    emb_dim=128,
-    hid_dim=128,
-    hid_dim2=96,
-    class_dim=2,
-    emb_lr=30.0,
-):
-    emb = fluid.layers.embedding(
-        input=data,
-        size=[dict_dim, emb_dim],
-        param_attr=fluid.ParamAttr(learning_rate=emb_lr),
-    )
-    fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
-    lstm_h, c = fluid.layers.dynamic_lstm(
-        input=fc0, size=hid_dim * 4, is_reverse=False
-    )
-    lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
-    lstm_max_tanh = paddle.tanh(lstm_max)
-    fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
-    prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
-    cost = paddle.nn.functional.cross_entropy(
-        input=prediction, label=label, reduction='none', use_softmax=False
-    )
-    avg_cost = paddle.mean(x=cost)
-    return avg_cost
-
-
-class LSTMTest(TestBase):
-    def setUp(self):
-        self.net = lstm_net
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
@@ -404,21 +404,6 @@ def lm_model(
            init_hidden=init_hidden_reshape,
            init_cell=init_cell_reshape,
        )
-    elif rnn_model == "cudnn":
-        x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
-        rnn_out, last_hidden, last_cell = layers.lstm(
-            x_emb,
-            init_hidden_reshape,
-            init_cell_reshape,
-            num_steps,
-            hidden_size,
-            num_layers,
-            is_bidirec=False,
-            default_initializer=fluid.initializer.UniformInitializer(
-                low=-init_scale, high=init_scale
-            ),
-        )
-        rnn_out = paddle.transpose(rnn_out, perm=[1, 0, 2])
    elif rnn_model == "basic_lstm":
        rnn_out, last_hidden, last_cell = basic_lstm(
            x_emb,

--- a/python/paddle/fluid/tests/unittests/test_gru_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gru_op.py
@@ -18,8 +18,6 @@ import unittest
 import numpy as np
 from op_test import OpTest

-from paddle import fluid
-from paddle.fluid import Program, program_guard
 from paddle.fluid.tests.unittests.test_lstm_op import ACTIVATION


@@ -267,25 +265,5 @@ class TestGRUOpInference(TestGRUOp):
        pass


-class TestGruOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-
-            def test_Variable():
-                input_data = np.random.random((1, 1536)).astype("float32")
-                fluid.layers.dynamic_gru(input=input_data, size=512)
-
-            self.assertRaises(TypeError, test_Variable)
-
-            def test_h_0():
-                in_data = fluid.data(
-                    name="input", shape=[None, 1536], dtype="float32"
-                )
-                h = fluid.data(name="h", shape=[None, 512], dtype="int32")
-                fluid.layers.dynamic_gru(input=in_data, size=512, h_0=h)
-
-            self.assertRaises(TypeError, test_h_0)
-
-
 if __name__ == "__main__":
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_gru_unit_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gru_unit_op.py
@@ -19,8 +19,6 @@ import numpy as np
 from op_test import OpTest

 import paddle.fluid as fluid
-from paddle.fluid.framework import Program, program_guard
-from paddle.fluid.layers import gru_unit


 class GRUActivationType(OpTest):
@@ -46,55 +44,6 @@ def relu(x):
    return np.maximum(x, 0)


-class TestGRUUnitOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            batch_size = 5
-            hidden_dim = 40
-            input = fluid.data(
-                name='input', shape=[None, hidden_dim * 3], dtype='float32'
-            )
-            pre_hidden = fluid.data(
-                name='pre_hidden', shape=[None, hidden_dim], dtype='float32'
-            )
-            np_input = np.random.uniform(
-                -0.1, 0.1, (batch_size, hidden_dim * 3)
-            ).astype('float64')
-            np_pre_hidden = np.random.uniform(
-                -0.1, 0.1, (batch_size, hidden_dim)
-            ).astype('float64')
-
-            def test_input_Variable():
-                gru_unit(np_input, pre_hidden, hidden_dim * 3)
-
-            self.assertRaises(TypeError, test_input_Variable)
-
-            def test_pre_hidden_Variable():
-                gru_unit(input, np_pre_hidden, hidden_dim * 3)
-
-            self.assertRaises(TypeError, test_pre_hidden_Variable)
-
-            def test_input_type():
-                error_input = fluid.data(
-                    name='error_input',
-                    shape=[None, hidden_dim * 3],
-                    dtype='int32',
-                )
-                gru_unit(error_input, pre_hidden, hidden_dim * 3)
-
-            self.assertRaises(TypeError, test_input_type)
-
-            def test_pre_hidden_type():
-                error_pre_hidden = fluid.data(
-                    name='error_pre_hidden',
-                    shape=[None, hidden_dim],
-                    dtype='int32',
-                )
-                gru_unit(input, error_pre_hidden, hidden_dim * 3)
-
-            self.assertRaises(TypeError, test_pre_hidden_type)
-
-
 class TestGRUUnitOp(OpTest):
    batch_size = 5
    frame_size = 40

--- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# nlp model stack of op operate on lod. It's a classical test case in optimize pass.
-
-import unittest
-
-from ir_memory_optimize_net_base import TestIrMemOptBase
-
-import paddle
-import paddle.fluid as fluid
-
-
-def lstm_net(
-    data,
-    label,
-    dict_dim,
-    emb_dim=128,
-    hid_dim=128,
-    hid_dim2=96,
-    class_dim=2,
-    emb_lr=30.0,
-):
-    emb = fluid.layers.embedding(
-        input=data,
-        size=[dict_dim, emb_dim],
-        param_attr=fluid.ParamAttr(learning_rate=emb_lr),
-    )
-    fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
-
-    lstm_h, c = fluid.layers.dynamic_lstm(
-        input=fc0, size=hid_dim * 4, is_reverse=False
-    )
-    lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
-    lstm_max_tanh = paddle.tanh(lstm_max)
-    fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
-    prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
-    cost = paddle.nn.functional.cross_entropy(
-        input=prediction, label=label, reduction='none', use_softmax=False
-    )
-    avg_cost = paddle.mean(x=cost)
-    return avg_cost
-
-
-class TestIrMemOptRNN(TestIrMemOptBase):
-    def setUp(self):
-        self.network = lstm_net
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -2593,20 +2593,6 @@ class TestBook(LayerTest):
            out = paddle.nn.functional.square_error_cost(input=x, label=y)
            return out

-    def test_dynamic_lstmp(self):
-        # TODO(minqiyang): dygraph do not support lod now
-        with self.static_graph():
-            hidden_dim, proj_dim = 16, 8
-            seq_data = layers.data(
-                name='seq_data', shape=[10, 10], dtype='float32', lod_level=1
-            )
-            fc_out = layers.fc(input=seq_data, size=4 * hidden_dim)
-            self.assertIsNotNone(
-                layers.dynamic_lstmp(
-                    input=fc_out, size=4 * hidden_dim, proj_size=proj_dim
-                )
-            )
-
    def test_lod_reset(self):
        # TODO(minqiyang): dygraph do not support lod now
        with self.static_graph():

--- a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
@@ -20,9 +20,7 @@ import numpy as np
 from op_test import OpTest

 import paddle
-import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers

 random.seed(2)
 np.set_printoptions(threshold=np.inf)
@@ -539,90 +537,5 @@ class TestCUDNNLstmOp(OpTest):
            )


-@unittest.skipIf(
-    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
-)
-class TestCUDNNlstmAPI(unittest.TestCase):
-    def test_lstm(self):
-        seq_len = 20
-        batch_size = 5
-        hidden_size = 20
-        dropout_prob = 0.0
-        num_layers = 1
-        dtype = 'float32' if core.is_compiled_with_rocm() else 'float64'
-        input = fluid.data(
-            name='input', shape=[seq_len, batch_size, hidden_size], dtype=dtype
-        )
-        init_h = layers.fill_constant(
-            [num_layers, batch_size, hidden_size], dtype, 0.0
-        )
-        init_c = layers.fill_constant(
-            [num_layers, batch_size, hidden_size], dtype, 0.0
-        )
-        rnn_out, last_h, last_c = layers.lstm(
-            input,
-            init_h,
-            init_c,
-            seq_len,
-            hidden_size,
-            num_layers,
-            dropout_prob,
-            False,
-        )
-        exe = fluid.Executor(fluid.CUDAPlace(0))
-        exe.run(fluid.default_startup_program())
-        input_i = np.random.uniform(
-            low=-0.1, high=0.1, size=(seq_len, batch_size, hidden_size)
-        ).astype("float64")
-        out = exe.run(
-            fluid.default_main_program(),
-            feed={'input': input_i},
-            fetch_list=[rnn_out, last_h, last_c, 'cudnn_lstm_0.w_0'],
-        )
-
-
-@unittest.skipIf(
-    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
-)
-class TestCUDNNlstmAPI(unittest.TestCase):  # noqa: F811
-    def test_lstm(self):
-        seq_len = 20
-        batch_size = 5
-        hidden_size = 20
-        dropout_prob = 0.0
-        num_layers = 2
-        dtype = 'float32' if core.is_compiled_with_rocm() else 'float64'
-        input = fluid.data(
-            name='input', shape=[seq_len, batch_size, hidden_size], dtype=dtype
-        )
-        init_h = layers.fill_constant(
-            [num_layers, batch_size, hidden_size], dtype, 0.0
-        )
-        init_c = layers.fill_constant(
-            [num_layers, batch_size, hidden_size], dtype, 0.0
-        )
-        rnn_out, last_h, last_c = layers.lstm(
-            input,
-            init_h,
-            init_c,
-            seq_len,
-            hidden_size,
-            num_layers,
-            dropout_prob,
-            False,
-            True,
-        )
-        exe = fluid.Executor(fluid.CUDAPlace(0))
-        exe.run(fluid.default_startup_program())
-        input_i = np.random.uniform(
-            low=-0.1, high=0.1, size=(seq_len, batch_size, hidden_size)
-        ).astype(dtype)
-        out = exe.run(
-            fluid.default_main_program(),
-            feed={'input': input_i},
-            fetch_list=[rnn_out, last_h, last_c, 'cudnn_lstm_0.w_0'],
-        )
-
-
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_lstm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstm_op.py
@@ -17,11 +17,6 @@ import unittest
 import numpy as np
 from op_test import OpTest

-from paddle import fluid
-from paddle.fluid.framework import Program, program_guard
-from paddle.fluid.layers import fill_constant
-from paddle.fluid.layers import lstm as LSTM
-
 SIGMOID_THRESHOLD_MIN = -40.0
 SIGMOID_THRESHOLD_MAX = 13.0
 EXP_MAX_INPUT = 40.0
@@ -132,130 +127,6 @@ def lstm(
    return hidden, cell


-class LstmUnitTestError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            batch_size = 20
-            seq_len = 100
-            dropout_prob = 0.2
-            hidden_size = 150
-            num_layers = 1
-            input = fluid.data(
-                name='input',
-                shape=[batch_size, seq_len, hidden_size],
-                dtype='float32',
-            )
-            pre_hidden = fill_constant(
-                [num_layers, batch_size, hidden_size], 'float32', 0.0
-            )
-            pre_cell = fill_constant(
-                [num_layers, batch_size, hidden_size], 'float32', 0.0
-            )
-
-            np_input = np.random.uniform(
-                -0.1, 0.1, (batch_size, seq_len, hidden_size)
-            ).astype('float64')
-            np_pre_hidden = np.random.uniform(
-                -0.1, 0.1, (num_layers, batch_size, hidden_size)
-            ).astype('float64')
-            np_pre_cell = np.random.uniform(
-                -0.1, 0.1, (num_layers, batch_size, hidden_size)
-            ).astype('float64')
-
-            def test_input_Variable():
-                LSTM(
-                    np_input,
-                    pre_hidden,
-                    pre_cell,
-                    seq_len,
-                    hidden_size,
-                    num_layers,
-                    dropout_prob=dropout_prob,
-                )
-
-            self.assertRaises(TypeError, test_input_Variable)
-
-            def test_pre_hidden_Variable():
-                LSTM(
-                    np_input,
-                    np_pre_hidden,
-                    pre_cell,
-                    seq_len,
-                    hidden_size,
-                    num_layers,
-                    dropout_prob=dropout_prob,
-                )
-
-            self.assertRaises(TypeError, test_pre_hidden_Variable)
-
-            def test_pre_cell_Variable():
-                LSTM(
-                    np_input,
-                    pre_hidden,
-                    np_pre_cell,
-                    seq_len,
-                    hidden_size,
-                    num_layers,
-                    dropout_prob=dropout_prob,
-                )
-
-            self.assertRaises(TypeError, test_pre_cell_Variable)
-
-            def test_input_type():
-                error_input = fluid.data(
-                    name='error_input',
-                    shape=[None, hidden_size * 3],
-                    dtype='int32',
-                )
-                LSTM(
-                    error_input,
-                    pre_hidden,
-                    pre_cell,
-                    seq_len,
-                    hidden_size,
-                    num_layers,
-                    dropout_prob=dropout_prob,
-                )
-
-            self.assertRaises(TypeError, test_input_type)
-
-            def test_pre_hidden_type():
-                error_pre_hidden = fluid.data(
-                    name='error_pre_hidden',
-                    shape=[None, hidden_size],
-                    dtype='int32',
-                )
-                LSTM(
-                    input,
-                    error_pre_hidden,
-                    pre_cell,
-                    seq_len,
-                    hidden_size,
-                    num_layers,
-                    dropout_prob=dropout_prob,
-                )
-
-            self.assertRaises(TypeError, test_pre_hidden_type)
-
-            def test_pre_cell_type():
-                error_pre_cell = fluid.data(
-                    name='error_pre_cell',
-                    shape=[None, hidden_size],
-                    dtype='int32',
-                )
-                LSTM(
-                    input,
-                    pre_hidden,
-                    error_pre_cell,
-                    seq_len,
-                    hidden_size,
-                    num_layers,
-                    dropout_prob=dropout_prob,
-                )
-
-            self.assertRaises(TypeError, test_pre_cell_type)
-
-
 class TestLstmOp(OpTest):
    def set_is_test(self):
        self.is_test = False
@@ -374,47 +245,6 @@ class TestLstmOpInference(TestLstmOp):
        pass


-class TestLstmOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-
-            def test_Variable():
-                input_data = np.random.random((1, 2048)).astype("float32")
-                fluid.layers.dynamic_lstm(
-                    input=input_data, size=2048, use_peepholes=False
-                )
-
-            self.assertRaises(TypeError, test_Variable)
-
-            def test_h_0():
-                in_data = fluid.data(
-                    name="input", shape=[None, 2048], dtype="float32"
-                )
-                h = fluid.data(name="h", shape=[None, 512], dtype="int32")
-                c = fluid.data(name="c", shape=[None, 512], dtype="float32")
-                fluid.layers.dynamic_lstm(
-                    input=in_data, size=2048, use_peepholes=False, h_0=h, c_0=c
-                )
-
-            self.assertRaises(TypeError, test_h_0)
-
-            def test_c_0():
-                in_data_ = fluid.data(
-                    name="input_", shape=[None, 2048], dtype="float32"
-                )
-                h_ = fluid.data(name="h_", shape=[None, 512], dtype="float32")
-                c_ = fluid.data(name="c_", shape=[None, 512], dtype="int32")
-                fluid.layers.dynamic_lstm(
-                    input=in_data_,
-                    size=2048,
-                    use_peepholes=False,
-                    h_0=h_,
-                    c_0=c_,
-                )
-
-            self.assertRaises(TypeError, test_c_0)
-
-
 # class TestLstmOpHasInitial(TestLstmOp):
 #     def set_argument(self):
 #         self.lod = [[2, 3, 2]]

--- a/python/paddle/fluid/tests/unittests/test_lstmp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstmp_op.py
@@ -17,9 +17,6 @@ import unittest
 import numpy as np
 import test_lstm_op as LstmTest

-from paddle import fluid
-from paddle.fluid import Program, program_guard
-
 ACTIVATION = {
    'identity': LstmTest.identity,
    'sigmoid': LstmTest.sigmoid,
@@ -378,64 +375,5 @@ class TestLstmpOpLen0Case2(TestLstmpOp):
        self.lod = [[2, 0, 3]]


-class TestLstmpOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-
-            def test_Variable():
-                input_data = np.random.random((1, 2048)).astype("float32")
-                fluid.layers.dynamic_lstmp(
-                    input=input_data,
-                    size=2048,
-                    proj_size=256,
-                    use_peepholes=False,
-                    is_reverse=True,
-                    cell_activation="tanh",
-                    proj_activation="tanh",
-                )
-
-            self.assertRaises(TypeError, test_Variable)
-
-            def test_h_0():
-                in_data = fluid.data(
-                    name="input", shape=[None, 2048], dtype="float32"
-                )
-                h = fluid.data(name="h", shape=[None, 512], dtype="int32")
-                c = fluid.data(name="c", shape=[None, 512], dtype="float32")
-                fluid.layers.dynamic_lstmp(
-                    input=in_data,
-                    size=2048,
-                    proj_size=256,
-                    use_peepholes=False,
-                    is_reverse=True,
-                    cell_activation="tanh",
-                    proj_activation="tanh",
-                    h_0=h,
-                    c_0=c,
-                )
-
-            self.assertRaises(TypeError, test_h_0)
-
-            def test_c_0():
-                in_data_ = fluid.data(
-                    name="input_", shape=[None, 2048], dtype="float32"
-                )
-                h_ = fluid.data(name="h_", shape=[None, 512], dtype="float32")
-                c_ = fluid.data(name="c_", shape=[None, 512], dtype="int32")
-                fluid.layers.dynamic_lstmp(
-                    input=in_data_,
-                    size=2048,
-                    proj_size=256,
-                    use_peepholes=False,
-                    is_reverse=True,
-                    cell_activation="tanh",
-                    proj_activation="tanh",
-                    h_0=h_,
-                    c_0=c_,
-                )
-
-            self.assertRaises(TypeError, test_c_0)
-
-
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
@@ -17,7 +17,6 @@ import unittest

 import numpy as np
 import seresnext_net
-from fake_reader import fake_imdb_reader
 from simple_nets import fc_with_batchnorm, init_data, simple_fc_net
 from test_parallel_executor_transformer import (
    DeviceType,
@@ -30,37 +29,6 @@ import paddle.fluid as fluid
 import paddle.fluid.core as core


-def lstm_net(use_feed):
-    dict_dim = 5147
-    emb_dim = 128
-    hid_dim = 128
-    hid_dim2 = 96
-    class_dim = 2
-    emb_lr = 30.0
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1
-    )
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-    emb = fluid.layers.embedding(
-        input=data,
-        size=[dict_dim, emb_dim],
-        param_attr=fluid.ParamAttr(learning_rate=emb_lr),
-    )
-    fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
-    lstm_h, c = fluid.layers.dynamic_lstm(
-        input=fc0, size=hid_dim * 4, is_reverse=False
-    )
-    lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
-    lstm_max_tanh = paddle.tanh(lstm_max)
-    fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
-    prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
-    cost = paddle.nn.functional.cross_entropy(
-        input=prediction, label=label, reduction='none', use_softmax=False
-    )
-    avg_cost = paddle.mean(x=cost)
-    return avg_cost
-
-
 def simple_fc_net_with_accuracy(use_feed):
    img = fluid.layers.data(name='image', shape=[784], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
@@ -268,29 +236,6 @@ class TestProgramPruneBackward(unittest.TestCase):
                method=transformer, feed_dict=feed_dict, optimizer=optimizer
            )

-    def test_lstm(self):
-        def optimizer():
-            optimizer = fluid.optimizer.Adagrad(
-                learning_rate=0.001,
-                regularization=fluid.regularizer.L2Decay(1e-4),
-            )
-            return optimizer
-
-        with self.program_scope_guard():
-            word_dict_size = 5147
-            reader = fake_imdb_reader(word_dict_size, 1)
-            data = fluid.layers.data(
-                name="words", shape=[1], dtype="int64", lod_level=1
-            )
-            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-            feeder = fluid.DataFeeder(
-                feed_list=[data, label], place=core.CPUPlace()
-            )
-            feed_data = feeder.feed(reader())
-            self.check_prune_correctness(
-                method=lstm_net, feed_dict=feed_data, optimizer=optimizer
-            )
-
    def test_cond(self):
        def optimizer():
            optimizer = fluid.optimizer.SGD(learning_rate=0.01)

--- a/tools/parallel_UT_rule.py
+++ b/tools/parallel_UT_rule.py
@@ -91,7 +91,6 @@ HIGH_PARALLEL_JOB_NEW = [
    'test_seqpool_concat_fuse_pass',
    'test_analyzer_save_model',
    'test_exception',
-    'test_fc_lstm_fuse_pass',
    'test_similarity_focus_op',
    'test_conv_batch_norm_mkldnn_fuse_pass',
    'test_sequence_last_step',
@@ -457,7 +456,6 @@ HIGH_PARALLEL_JOB_NEW = [
    'test_spawn_and_init_parallel_env',
    'test_fleet_gradient_scale',
    'unroll_array_ops_test',
-    'test_fc_gru_fuse_pass',
    'op_version_registry_test',
    'test_cudnn_placement_pass',
    'cipher_utils_test',
@@ -1188,7 +1186,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
    'test_sigmoid_focal_loss',
    'test_manual_seed',
    'test_lrn_op',
-    'test_ir_memory_optimize_nlp',
    'test_dataset_dataloader',
    'test_complex_variable',
    'test_lite_engine',
@@ -1199,7 +1196,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
    'test_elementwise_sub_op',
    'test_compare_op',
    'test_simnet',
-    'test_label_semantic_roles',
    'test_normal',
    'test_tensor_scalar_type_promotion_static',
    'test_trt_group_norm_op',
@@ -1249,7 +1245,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
    'test_input_spec',
    'test_adam_op',
    'test_elementwise_floordiv_op',
-    'test_eager_deletion_gru_net',
    'test_diagonal_op',
    'test_imperative_static_runner_mnist',
    'test_nearest_interp_op',
@@ -1468,7 +1463,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
    'test_nearest_interp_v2_op',
    'test_sequence_slice_op',
    'test_program_translator',
-    'test_eager_deletion_lstm_net',
    'malloc_test',
    'test_size_op',
    'test_analysis_predictor',
@@ -1906,8 +1900,6 @@ CPU_PARALLEL_JOB = [
    'test_fetch_handler',
    'test_feed_fetch_method',
    'test_fc_mkldnn_op',
-    'test_fc_lstm_fuse_pass',
-    'test_fc_gru_fuse_pass',
    'test_fc_elementwise_layernorm_fuse_pass_cc',
    'test_fc_bf16_mkldnn_op',
    'test_executor_feed_non_tensor',

--- a/tools/static_mode_white_list.py
+++ b/tools/static_mode_white_list.py
@@ -162,8 +162,6 @@ STATIC_MODE_TESTING_LIST = [
    'test_dynrnn_static_input',
    'test_eager_deletion_conditional_block',
    'test_eager_deletion_delete_vars',
-    'test_eager_deletion_gru_net',
-    'test_eager_deletion_lstm_net',
    'test_eager_deletion_padding_rnn',
    'test_eager_deletion_recurrent_op',
    'test_eager_deletion_while_op',
@@ -586,8 +584,6 @@ STATIC_MODE_TESTING_LIST = [
    'test_conv_elementwise_add_act_fuse_pass',
    'test_conv_elementwise_add_fuse_pass',
    'test_fc_fuse_pass',
-    'test_fc_gru_fuse_pass',
-    'test_fc_lstm_fuse_pass',
    'test_repeated_fc_relu_fuse_pass',
    'test_seqconv_eltadd_relu_fuse_pass',
    'test_squared_mat_sub_fuse_pass',
@@ -683,7 +679,6 @@ STATIC_MODE_TESTING_LIST = [
    'test_fleet_rolemaker_new',
    'test_fused_fc_elementwise_layernorm_op',
    'test_fusion_transpose_flatten_concat_op',
-    'test_ir_memory_optimize_nlp',
    'test_nvprof',
    'test_pipeline',
    'test_weight_decay',