diff --git a/python/paddle/fluid/layers/__init__.py b/python/paddle/fluid/layers/__init__.py
index 6ab688cef37ce9a1029337969868ea97b3513b22..b91d7de093c88cb54d5381d9b0df75d773af40bc 100644
--- a/python/paddle/fluid/layers/__init__.py
+++ b/python/paddle/fluid/layers/__init__.py
@@ -18,8 +18,6 @@ from . import io
 from .io import *
 from . import tensor
 from .tensor import *
-from . import control_flow
-from .control_flow import *
 from . import math_op_patch
 from .math_op_patch import *
 from .learning_rate_scheduler import *
@@ -30,5 +28,4 @@ __all__ = []
 __all__ += nn.__all__
 __all__ += io.__all__
 __all__ += tensor.__all__
-__all__ += control_flow.__all__
 __all__ += learning_rate_scheduler.__all__
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
deleted file mode 100755
index 6d402df9f3cd4e150888d4d46829e13fb30f1b66..0000000000000000000000000000000000000000
--- a/python/paddle/fluid/layers/control_flow.py
+++ /dev/null
@@ -1,1547 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ..wrapped_decorator import signature_safe_contextmanager
-
-from .layer_function_generator import templatedoc
-from .. import core
-from ..framework import (
-    Program,
-    Variable,
-    Operator,
-    static_only,
-    in_dygraph_mode,
-)
-from ..layer_helper import LayerHelper, unique_name
-from ...utils import (
-    assert_same_structure,
-    map_structure,
-    hold_mutable_vars,
-    copy_mutable_vars,
-    is_sequence,
-    pack_sequence_as,
-    flatten,
-    to_sequence,
-)
-import numpy
-import warnings
-from functools import reduce, partial
-from ..data_feeder import (
-    convert_dtype,
-    check_variable_and_dtype,
-    check_type,
-    check_dtype,
-)
-from ..backward import _infer_var_data_type_shape_
-import paddle
-from paddle import _C_ops, _legacy_C_ops
-
-__all__ = [
-    'Switch',
-    'StaticRNN',
-    'while_loop',
-]
-
-
-def select_output(input, outputs, mask):
-    """
-    **select_output**
-    This API takes in one input and multiple outputs and an integer mask. It
-    selects the output specified by the mask and copy the input to selected
-    output. It is useful in control flow.
-
-    Args:
-        input(Variable): The input variable
-        outputs(tuple|list): The output variables
-        mask(Variable): A tensor containing 1 integer number selecting which
-            output to be copied with input
-
-    Returns:
-        Variable: The outputs variables
-    """
-    helper = LayerHelper('select_output', **locals())
-    check_type(input, 'input', (Variable), 'select_output')
-    check_variable_and_dtype(mask, 'mask', ['int32'], 'select_output')
-    check_type(outputs, 'outputs', (list, tuple), 'select_output')
-
-    helper.append_op(
-        type='select_output',
-        inputs={'X': input, 'Mask': mask},
-        outputs={'Out': outputs},
-    )
-    return outputs
-
-
-def _select_input_infer_shape(first_shape, second_shape):
-    """
-    This function infer the output shape by following algorithm:
-    1. if the dims is different, raise a error.
-    2. compare axis one by one:
-        if a == b: we set axis to a
-        if a != b: we set axis to -1
-    for compatibility, non declarative mode, we just return second_shape.
-    """
-    if len(first_shape) != len(second_shape):
-        warnings.warn(
-            f"the input shapes of select_input should have the same rank, but get {first_shape}, {second_shape}"
-        )
-        return second_shape
-    out_shape = list(
-        map(lambda a, b: a if a == b else -1, first_shape, second_shape)
-    )
-    return out_shape
-
-
-def select_input(inputs, mask):
-    """
-    **select_input**
-
-    This API takes in multiple inputs and uses an integer mask to select one
-    input to output. It is useful in control flow.
-
-    Args:
-        inputs(tuple|list): The input variables
-        mask(Variable): A tensor containing 1 integer number selecting which
-            input to output
-
-    Returns:
-        Variable: The selected input variable
-    """
-    helper = LayerHelper('select_input', **locals())
-    check_type(inputs, 'inputs', (list, tuple), 'select_input')
-    check_variable_and_dtype(mask, 'mask', ['int32'], 'select_input')
-
-    # Select input should expand the shape. If it is - 1 and valid number, use - 1 first. If the dim is different, an error will be reported directly
-    # assert inputs[0].dtype == inputs[1].dtype, f"Expect the inputs should have the same dtype, but get {inputs[0].dtype} and {inputs[1].dtype}"
-
-    output_shape = _select_input_infer_shape(inputs[0].shape, inputs[1].shape)
-    output_dtype = inputs[1].dtype
-    output_type = inputs[1].type
-
-    out = helper.create_variable(
-        dtype=output_dtype, shape=output_shape, type=output_type
-    )
-    helper.append_op(
-        type='select_input',
-        inputs={'X': inputs, 'Mask': mask},
-        outputs={'Out': out},
-    )
-    return out
-
-
-# (TODO: Mine) There exists dependency. It will be removed later.
-class BlockGuard:
-    """
-    BlockGuard class.
-
-    BlockGuard class is used to create a sub-block in a program by
-    using the Python `with` keyword.
-    """
-
-    def __init__(self, main_program):
-        if not isinstance(main_program, Program):
-            raise TypeError("BlockGuard takes a program")
-        self.main_program = main_program
-
-    def __enter__(self):
-        self.main_program._create_block()
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.main_program._rollback()
-        if exc_type is not None:
-            return False  # re-raise exception
-        return True
-
-
-# (TODO: Mine) There exists dependency. It will be removed later.
-class BlockGuardWithCompletion(BlockGuard):
-    """
-    BlockGuardWithCompletion class.
-
-    BlockGuardWithCompletion class is used to create an op with a block in a program.
-    """
-
-    def __init__(self, rnn):
-        if not isinstance(rnn, StaticRNN):
-            raise TypeError("BlockGuardWithCompletion takes a StaticRNN")
-        super().__init__(rnn.helper.main_program)
-        self.rnn = rnn
-
-    def __enter__(self):
-        self.rnn.status = StaticRNN.IN_RNN_BLOCK
-        return super().__enter__()
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if exc_type is not None:
-            return False
-        self.rnn.status = StaticRNN.AFTER_RNN_BLOCK
-        self.rnn._complete_op()
-        return super().__exit__(exc_type, exc_val, exc_tb)
-
-
-class StaticRNNMemoryLink:
-    """
-    StaticRNNMemoryLink class.
-
-    StaticRNNMemoryLink class is used to create a link between two
-    memory cells of a StaticRNN.
-
-
-    NOTE: This is a internal data structure of a very low-level API.
-    Please use StaticRNN instead.
-
-    Args:
-        init(Variable): the initial variable for Memory.
-        pre_mem(Variable): the memory variable in previous time step.
-        mem(Variable): the memory variable in current time step.
-    """
-
-    def __init__(self, init, pre_mem, mem=None):
-        self.init = init
-        self.pre_mem = pre_mem
-        self.mem = mem
-
-
-class StaticRNN:
-    """
-    :api_attr: Static Graph
-
-    StaticRNN class.
-
-    The StaticRNN can process a batch of sequence data. The first dimension of inputs
-    represents sequence length, the length of each input sequence must be equal.
-    StaticRNN will unfold sequence into time steps, user needs to define how to process
-    each time step during the :code:`with` step.
-
-    Args:
-        name (str, optional): Please refer to :ref:`api_guide_Name`, Default None.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            import paddle.fluid as fluid
-            import paddle.fluid.layers as layers
-
-            vocab_size, hidden_size=10000, 200
-            paddle.enable_static()
-            x = paddle.static.data(name="x", shape=[None, 1, 1], dtype='int64')
-            # create word sequence
-            x_emb = layers.embedding(
-                input=x,
-                size=[vocab_size, hidden_size],
-                dtype='float32',
-                is_sparse=False)
-            # transform batch size to dim 1
-            x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
-
-            rnn = fluid.layers.StaticRNN()
-            with rnn.step():
-                # mark created x_emb as input, each step process a word
-                word = rnn.step_input(x_emb)
-                # create prev memory parameter, batch size comes from word
-                prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
-                hidden = paddle.static.nn.fc(x=[word, prev], size=hidden_size, activation='relu')
-                # use hidden to update prev
-                rnn.update_memory(prev, hidden)
-                # mark hidden as output
-                rnn.step_output(hidden)
-            # get StaticrNN final output
-            result = rnn()
-
-    """
-
-    BEFORE_RNN_BLOCK = 0
-    IN_RNN_BLOCK = 1
-    AFTER_RNN_BLOCK = 2
-
-    def __init__(self, name=None):
-        check_type(name, "name", (str, type(None)), "fluid.layers.StaticRNN")
-        self.helper = LayerHelper("static_rnn", name=name)
-        self.memories = {}  # memory map, from pre_mem.name --> MemoryLink
-        self.inputs = []  # input variable list in current block
-        self.outputs = []  # output variable list in parent block
-        self.status = StaticRNN.BEFORE_RNN_BLOCK  # status flag.
-        # sequence length, since it is a static RNN, sequence length are fixed.
-        self.seq_len = None
-
-    def step(self):
-        """
-        Define operators in each step. step is used in :code:`with` block, OP in :code:`with` block
-        will be executed sequence_len times (sequence_len is the length of input)
-        """
-        return BlockGuardWithCompletion(self)
-
-    def _assert_in_rnn_block_(self, method):
-        if self.status != StaticRNN.IN_RNN_BLOCK:
-            raise ValueError("You must invoke {0} in rnn block".format(method))
-
-    def memory(
-        self,
-        init=None,
-        shape=None,
-        batch_ref=None,
-        init_value=0.0,
-        init_batch_dim_idx=0,
-        ref_batch_dim_idx=1,
-    ):
-        """
-        Create a memory variable for static rnn.
-        If the :code:`init` is not None, :code:`memory` will be initialized by
-        this Variable. If the :code:`init` is None, :code:`shape` and :code:`batch_ref`
-        must be set, and this function will create a new variable with shape and batch_ref
-        to initialize :code:`init` Variable.
-
-        Args:
-            init(Variable, optional): Tensor used to init memory. If it is not set,
-                :code:`shape` and :code:`batch_ref` must be provided.
-                Default: None.
-            shape(list|tuple): When :code:`init` is None use this arg to initialize memory shape.
-            NOTE the shape does not contain batch_size. Default: None.
-            batch_ref(Variable, optional): When :code:`init` is None, memory's batch size will
-            be set as batch_ref's ref_batch_dim_idx value. Default: None.
-            init_value(float, optional): When :code:`init` is None, used to init memory's value. Default: 0.0.
-            init_batch_dim_idx(int, optional): the batch_size axis of the :code:`init` Variable. Default: 0.
-            ref_batch_dim_idx(int, optional): the batch_size axis of the :code:`batch_ref` Variable. Default: 1.
-
-        Returns:
-            Variable: The memory variable.
-
-        Examples 1:
-            .. code-block:: python
-
-                import paddle
-                import paddle.fluid as fluid
-                import paddle.fluid.layers as layers
-
-                vocab_size, hidden_size=10000, 200
-                paddle.enable_static()
-                x = paddle.static.data(name="x", shape=[None, 1, 1], dtype='int64')
-                # create word sequence
-                x_emb = layers.embedding(
-                        input=x,
-                        size=[vocab_size, hidden_size],
-                        dtype='float32',
-                        is_sparse=False)
-                # transform batch size to dim 1
-                x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
-
-                rnn = fluid.layers.StaticRNN()
-                with rnn.step():
-                        # mark created x_emb as input, each step process a word
-                        word = rnn.step_input(x_emb)
-                        # create prev memory parameter, batch size comes from word
-                        prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
-                        hidden = paddle.static.nn.fc(x=[word, prev], size=hidden_size, activation='relu')
-                        # use hidden to update prev
-                        rnn.update_memory(prev, hidden)
-
-
-        Examples 2:
-            .. code-block:: python
-
-                import paddle
-                import paddle.fluid as fluid
-                import paddle.fluid.layers as layers
-                vocab_size, hidden_size=10000, 200
-                paddle.enable_static()
-                x = paddle.static.data(name="x", shape=[None, 1, 1], dtype='int64')
-                # create word sequence
-                x_emb = layers.embedding(
-                        input=x,
-                        size=[vocab_size, hidden_size],
-                        dtype='float32',
-                        is_sparse=False)
-                # transform batch size to dim 1
-                x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
-                boot_memory = paddle.static.data(name='boot', shape=[-1, hidden_size], dtype='float32', lod_level=1)
-                rnn = fluid.layers.StaticRNN()
-                with rnn.step():
-                        # mark created x_emb as input, each step process a word
-                        word = rnn.step_input(x_emb)
-                        # init memory
-                        prev = rnn.memory(init=boot_memory)
-                        hidden = paddle.static.nn.fc(x=[word, prev], size=hidden_size, activation='relu')
-                        # update hidden with prev
-                        rnn.update_memory(prev, hidden)
-
-        """
-        self._assert_in_rnn_block_('memory')
-        check_type(
-            init,
-            "init",
-            (Variable, type(None)),
-            "fluid.layers.StaticRNN.memory",
-        )
-        check_type(
-            shape,
-            "shape",
-            (list, tuple, type(None)),
-            "fluid.layers.StaticRNN.memory",
-        )
-        check_type(
-            batch_ref,
-            "batch_ref",
-            (Variable, type(None)),
-            "fluid.layers.StaticRNN.memory",
-        )
-        if init is None:
-            if shape is None or batch_ref is None:
-                raise ValueError(
-                    "if init is None, memory at least need shape and batch_ref"
-                )
-            parent_block = self._parent_block()
-            var_name = unique_name.generate_with_ignorable_key(
-                "@".join([self.helper.name, "memory_boot"])
-            )
-            boot_var = parent_block.create_var(
-                name=var_name,
-                shape=shape,
-                dtype=batch_ref.dtype,
-                persistable=False,
-            )
-
-            parent_block.append_op(
-                type="fill_constant_batch_size_like",
-                inputs={'Input': [batch_ref]},
-                outputs={'Out': [boot_var]},
-                attrs={
-                    'value': init_value,
-                    'shape': boot_var.shape,
-                    'dtype': boot_var.dtype,
-                    'input_dim_idx': ref_batch_dim_idx,
-                    'output_dim_idx': init_batch_dim_idx,
-                },
-            )
-
-            return self.memory(init=boot_var)
-        else:
-            pre_mem = self.helper.create_variable(
-                name=unique_name.generate_with_ignorable_key(
-                    "@".join([self.helper.name, "mem"])
-                ),
-                dtype=init.dtype,
-                shape=init.shape,
-            )
-            self.memories[pre_mem.name] = StaticRNNMemoryLink(
-                init=init, pre_mem=pre_mem
-            )
-            return pre_mem
-
-    def step_input(self, x):
-        """
-        Mark a sequence as a StaticRNN input.
-
-        Args:
-            x(Variable): The input sequence, the shape of x
-                should be [seq_len, ...].
-
-        Returns:
-            Variable: The current time step data in the input sequence.
-
-        Examples:
-            .. code-block:: python
-
-                import paddle
-                import paddle.fluid as fluid
-                import paddle.fluid.layers as layers
-
-                vocab_size, hidden_size=10000, 200
-                paddle.enable_static()
-                x = paddle.static.data(name="x", shape=[None, 1, 1], dtype='int64')
-                # create word sequence
-                x_emb = layers.embedding(
-                        input=x,
-                        size=[vocab_size, hidden_size],
-                        dtype='float32',
-                        is_sparse=False)
-                # transform batch size to dim 1
-                x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
-
-                rnn = fluid.layers.StaticRNN()
-                with rnn.step():
-                        # mark created x_emb as input, each step process a word
-                        word = rnn.step_input(x_emb)
-                        # create prev memory parameter, batch size comes from word
-                        prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
-                        hidden = paddle.static.nn.fc(x=[word, prev], size=hidden_size, activation='relu')
-                        # use hidden to update prev
-                        rnn.update_memory(prev, hidden)
-
-        """
-        self._assert_in_rnn_block_('step_input')
-        check_type(x, "x", Variable, "fluid.layers.StaticRNN.step_input")
-        if self.seq_len is None:
-            self.seq_len = x.shape[0]
-        elif x.shape[0] != -1 and self.seq_len != x.shape[0]:
-            raise ValueError("Static RNN only take fix seq_len input")
-
-        ipt = self.helper.create_variable(
-            name=x.name, dtype=x.dtype, shape=list(x.shape[1:]), type=x.type
-        )
-        self.inputs.append(ipt)
-        return ipt
-
-    def step_output(self, o):
-        """
-        Mark a sequence as a StaticRNN output.
-
-        Args:
-            o(Variable): The output sequence.
-
-        Returns:
-            None.
-
-        Examples:
-            .. code-block:: python
-
-                import paddle
-                import paddle.fluid as fluid
-                import paddle.fluid.layers as layers
-
-                vocab_size, hidden_size=10000, 200
-                paddle.enable_static()
-                x = paddle.static.data(name="x", shape=[None, 1, 1], dtype='int64')
-                # create word sequence
-                x_emb = layers.embedding(
-                        input=x,
-                        size=[vocab_size, hidden_size],
-                        dtype='float32',
-                        is_sparse=False)
-                # transform batch size to dim 1
-                x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
-
-                rnn = fluid.layers.StaticRNN()
-                with rnn.step():
-                        # mark created x_emb as input, each step process a word
-                        word = rnn.step_input(x_emb)
-                        # create prev memory parameter, batch size comes from word
-                        prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
-                        hidden = paddle.static.nn.fc(x=[word, prev], size=hidden_size, activation='relu')
-                        # use hidden to update prev
-                        rnn.update_memory(prev, hidden)
-                        rnn.step_output(hidden)
-
-                result = rnn()
-
-        """
-        self._assert_in_rnn_block_('step_output')
-        check_type(o, "o", Variable, "fluid.layers.StaticRNN.step_output")
-
-        tmp_o = self.helper.create_variable_for_type_inference(dtype=o.dtype)
-        self.helper.append_op(
-            type='rnn_memory_helper',
-            inputs={'X': [o]},
-            outputs={'Out': tmp_o},
-            attrs={'dtype': o.dtype},
-        )
-
-        out_var = self._parent_block().create_var(
-            name=tmp_o.name,
-            shape=[self.seq_len] + list(tmp_o.shape),
-            dtype=tmp_o.dtype,
-        )
-
-        self.outputs.append(out_var)
-
-    def output(self, *outputs):
-        """
-        Mark the StaticRNN output variables.
-
-        Args:
-            outputs: The output Tensor, can mark multiple variables as output
-
-        Returns:
-            None
-
-        Examples:
-            .. code-block:: python
-
-                import paddle
-                import paddle.fluid as fluid
-                import paddle.fluid.layers as layers
-
-                vocab_size, hidden_size=10000, 200
-                paddle.enable_static()
-                x = paddle.static.data(name="x", shape=[None, 1, 1], dtype='int64')
-                # create word sequence
-                x_emb = layers.embedding(
-                        input=x,
-                        size=[vocab_size, hidden_size],
-                        dtype='float32',
-                        is_sparse=False)
-                # transform batch size to dim 1
-                x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
-
-                rnn = fluid.layers.StaticRNN()
-                with rnn.step():
-                        # mark created x_emb as input, each step process a word
-                        word = rnn.step_input(x_emb)
-                        # create prev memory parameter, batch size comes from word
-                        prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
-                        hidden = paddle.static.nn.fc(x=[word, prev], size=hidden_size, activation='relu')
-                        # use hidden to update prev
-                        rnn.update_memory(prev, hidden)
-                        # mark each step's hidden and word as output
-                        rnn.output(hidden, word)
-
-                result = rnn()
-        """
-        for each in outputs:
-            self.step_output(each)
-
-    def update_memory(self, mem, var):
-        """
-        Update the memory from :code:`mem` to :code:`var`.
-
-        Args:
-            mem(Variable): the memory variable.
-            var(Variable): the plain variable generated in RNN block, used to update memory.
-                           var and mem should have same dims and data type.
-
-        Returns:
-            None
-
-        """
-        check_type(mem, "mem", Variable, "fluid.layers.StaticRNN.update_memory")
-        check_type(var, "var", Variable, "fluid.layers.StaticRNN.update_memory")
-        self.memories[mem.name].mem = var
-
-    def _parent_block(self):
-        prog = self.helper.main_program
-        parent_idx = prog.current_block().parent_idx
-        assert parent_idx >= 0
-        parent_block = prog.block(parent_idx)
-        return parent_block
-
-    def __call__(self, *args, **kwargs):
-        if self.status != StaticRNN.AFTER_RNN_BLOCK:
-            raise ValueError("RNN output can only be retrieved after rnn block")
-        if len(self.outputs) == 0:
-            raise ValueError("RNN has no output")
-        elif len(self.outputs) == 1:
-            return self.outputs[0]
-        else:
-            return self.outputs
-
-    def _complete_op(self):
-        main_program = self.helper.main_program
-        rnn_block = main_program.current_block()
-        parent_block = self._parent_block()
-
-        local_inputs = set()
-
-        for op in rnn_block.ops:
-            assert isinstance(op, Operator)
-            for oname in op.output_names:
-                for out_var_name in op.output(oname):
-                    local_inputs.add(out_var_name)
-
-        for var in self.inputs:
-            local_inputs.add(var.name)
-        for m in self.memories:
-            local_inputs.add(m)
-
-        # NOTE(zcd): the params have two categories of variables.
-        #   - the variables that are the out of StaticRnn.
-        #   - the variables that are the parameters of some layers, for example, conv2d.
-        params = list()
-        for op in rnn_block.ops:
-            assert isinstance(op, Operator)
-            for iname in op.input_names:
-                for in_var_name in op.input(iname):
-                    if in_var_name not in local_inputs:
-                        params.append(in_var_name)
-
-        parameters = [
-            parent_block._find_var_recursive(name) for name in set(params)
-        ]
-
-        step_scope = parent_block.create_var(
-            type=core.VarDesc.VarType.STEP_SCOPES
-        )
-
-        inlinks = [parent_block.var(i.name) for i in self.inputs]
-        outlinks = self.outputs
-
-        # NOTE(zcd): the states maybe empty in some case.
-        boot_memories = []
-        pre_memories = []
-        memories = []
-        for _, mem in self.memories.items():
-            boot_memories.append(mem.init)
-            pre_memories.append(mem.pre_mem.name)
-            assert (
-                mem.mem is not None
-            ), "%s should be updated in every step." % (mem.init.name)
-            mem_var = rnn_block.var(mem.mem.name)
-            assert isinstance(mem_var, Variable)
-            new_mem = self.helper.create_variable_for_type_inference(
-                dtype=mem_var.dtype
-            )
-            rnn_block.append_op(
-                type='rnn_memory_helper',
-                inputs={'X': [mem_var]},
-                outputs={'Out': [new_mem]},
-                attrs={'dtype': mem_var.dtype},
-            )
-
-            memories.append(new_mem.name)
-
-        parent_block.append_op(
-            type='recurrent',
-            inputs={
-                'inputs': inlinks,
-                'initial_states': boot_memories,
-                'parameters': parameters,
-            },
-            outputs={'outputs': outlinks, 'step_scopes': [step_scope]},
-            attrs={
-                'has_states': len(pre_memories) > 0,
-                'ex_states': pre_memories,
-                'states': memories,
-                'sub_block': rnn_block,
-            },
-        )
-
-
-# (TODO: Mine) There exists dependency. It will be removed later.
-class WhileGuard(BlockGuard):
-    def __init__(self, while_op):
-        if not isinstance(while_op, While):
-            raise TypeError("WhileGuard takes a while op")
-        super().__init__(while_op.helper.main_program)
-        self.while_op = while_op
-
-    def __enter__(self):
-        self.while_op.status = While.IN_WHILE_BLOCK
-        return super().__enter__()
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if exc_type is not None:
-            return False
-        self.while_op.status = While.AFTER_WHILE_BLOCK
-        self.while_op._complete()
-        return super().__exit__(exc_type, exc_val, exc_tb)
-
-
-# (TODO: Mine) There exists dependency. It will be removed later.
-def get_inputs_outputs_in_block(
-    current_block, inner_inputs, inner_outputs, helper
-):
-    """
-    Find inputs and outputs in current control flow block.
-    :param current_block: Current control flow block.
-    :param inner_inputs: Input var name of ops in current block.
-    :param inner_outputs: Output var name of ops in current block.
-    :return: inner_inputs, inner_outputs
-    """
-
-    def is_ignore_vars(op, var_name):
-        # NOTE(dev): There are some persistable var created in some non-standard API
-        # such as "contrib.layers.shuffle_batch". It create a "Seed" used both in
-        # Input and Output. This var shall not be considered as a loop_var in
-        # control_flow.
-        IGNORE_VAR_NAMES = {"shuffle_batch": ["shuffle_batch_seed"]}
-        if op.type in IGNORE_VAR_NAMES:
-            var_names = IGNORE_VAR_NAMES[op.type]
-            for name in var_names:
-                if name in var_name:
-                    return True
-        return False
-
-    # Step1: update inner_inputs and inner_outputs
-    # NOTE: Here assumes that all variables are input or output of Ops,
-    # but some variables are created without appendding a real op.
-    # For example, in `arr = create_array(dtype)`, `arr` is not a output of a op.
-    for op in current_block.ops:
-        assert isinstance(op, Operator)
-        for iname in op.input_names:
-            for in_var_name in op.input(iname):
-                if in_var_name not in inner_outputs and not is_ignore_vars(
-                    op, in_var_name
-                ):
-                    inner_inputs.add(in_var_name)
-
-        for oname in op.output_names:
-            for out_var_name in op.output(oname):
-                inner_outputs.add(out_var_name)
-
-    # Step2: Remove LOD_TENSOR_ARRAY created in current control flow block.
-    remove_inner_inputs = set()
-    parent_block = helper.main_program.block(current_block.parent_idx)
-
-    for in_var_name in inner_inputs:
-        parent_block_var = parent_block._find_var_recursive(in_var_name)
-        current_block_var = None
-        if current_block.has_var(in_var_name):
-            current_block_var = current_block.var(in_var_name)
-        if (
-            not parent_block_var
-            and current_block_var
-            and current_block_var.type == core.VarDesc.VarType.LOD_TENSOR_ARRAY
-        ):
-            remove_inner_inputs.add(in_var_name)
-
-    inner_inputs = inner_inputs - remove_inner_inputs
-
-    return inner_inputs, inner_outputs
-
-
-# (TODO: Mine) There exists dependency. It will be removed later.
-class While:
-    """
-    :api_attr: Static Graph
-
-    while loop control flow. Repeat while body until cond is False.
-
-    Note:
-        A new OP :ref:`api_fluid_layers_while_loop` is highly recommended instead of ``While`` if the shape of parameter ``cond`` is [1].
-        OP :ref:`api_fluid_layers_while_loop` is easier to use and is called with less code but does the same thing as ``While`` .
-
-    Notice:
-        Local variables created in ``While`` are similar to that created in while of C++, and cannot be referenced externally.
-        As a result, they cannot be obtained through ``fetch_list`` of ``Executor``. If you would like to access the variable
-        out of ``while`` , PaddlePaddle provides ``assign`` API to assign local variables to external. Please refer to example
-        code 2 or refer to `issue#22724 <https://github.com/PaddlePaddle/Paddle/issues/22724>`_.
-
-    Args:
-        cond(Variable): A Tensor whose data type is bool controlling whether to continue looping.
-        is_test(bool, optional): A flag indicating whether execution is in test phase. Default value is False.
-        name(str, optional): The default value is None.  Normally there is no need for user to set this property.  For more information, please refer to :ref:`api_guide_Name` .
-
-    Examples 1:
-          .. code-block:: python
-
-            import paddle.fluid as fluid
-            import paddle
-            import numpy as np
-
-            i = paddle.full(shape=[1], dtype='int64', fill_value=0)           # loop counter
-
-            loop_len = paddle.full(shape=[1],dtype='int64', fill_value=10)    # loop length
-
-            cond = paddle.less_than(x=i, y=loop_len)
-            while_op = fluid.layers.While(cond=cond)
-            with while_op.block():
-                i = paddle.increment(x=i, value=1)
-                paddle.assign(paddle.less_than(x=i, y=loop_len), cond)
-
-            exe = fluid.Executor(fluid.CPUPlace())
-            exe.run(fluid.default_startup_program())
-
-            res = exe.run(fluid.default_main_program(), feed={}, fetch_list=[i])
-            print(res) # [array([10])]
-
-
-    Examples 2:
-          .. code-block:: python
-
-            import paddle
-            import paddle.fluid as fluid
-            import numpy as np
-
-            paddle.enable_static()
-            i = paddle.full(shape=[1], dtype='int64', fill_value=0)
-            loop_len = paddle.full(shape=[1], dtype='int64', fill_value=10)
-            one = paddle.full(shape=[1], dtype='float32', fill_value=1)
-            data = paddle.static.data(name='data', shape=[1], dtype='float32')
-            sums = paddle.full(shape=[1], dtype='float32', fill_value=0)  # Define the variable to be obtained ouside of While, which name should be different from the variable inside the While to be obtained
-
-            cond = paddle.less_than(x=i, y=loop_len)
-            while_op = fluid.layers.While(cond=cond)
-            with while_op.block():
-                sums_tensor = paddle.add(x=data, y=data)
-                fluid.layers.assign(sums_tensor, sums)  # Update the value of sums_tensor defined in While to the sums which defined outside of While through layers.assign
-                i = paddle.increment(x=i, value=1)
-                data = paddle.add(x=data, y=one)
-                paddle.assign(paddle.less_than(x=i, y=loop_len), cond)
-
-            feed_data = np.ones(1).astype('float32')
-            exe = fluid.Executor(fluid.CPUPlace())
-            exe.run(fluid.default_startup_program())
-            res = exe.run(fluid.default_main_program(), feed={'data': feed_data}, fetch_list=sums)
-            print(res[0])  # [2.]    # Because the data in While does not update the value outside the While, the value of sums is [2.] after the loop
-    """
-
-    BEFORE_WHILE_BLOCK = 0
-    IN_WHILE_BLOCK = 1
-    AFTER_WHILE_BLOCK = 2
-
-    def __init__(self, cond, is_test=False, name=None):
-        self.helper = LayerHelper("while", name=name)
-        self.status = While.BEFORE_WHILE_BLOCK
-        check_variable_and_dtype(cond, 'cond', ['bool'], 'fluid.layers.While')
-        if reduce(lambda a, b: a * b, cond.shape, 1) != 1:
-            raise TypeError(
-                "condition expected shape as [1], but given shape as {0}.".format(
-                    list(cond.shape)
-                )
-            )
-        self.cond_var = cond
-        self.is_test = is_test
-
-    def block(self):
-        return WhileGuard(self)
-
-    def _complete(self):
-        main_program = self.helper.main_program
-        while_block = main_program.current_block()
-        parent_block = main_program.block(
-            main_program.current_block().parent_idx
-        )
-
-        inner_outputs = {self.cond_var.name}
-        x_name_list = set()
-        x_name_list, inner_outputs = get_inputs_outputs_in_block(
-            while_block, x_name_list, inner_outputs, self.helper
-        )
-
-        out_vars = []
-        for inner_out_name in inner_outputs:
-            inner_var = parent_block._find_var_recursive(inner_out_name)
-            if inner_var:
-                out_vars.append(inner_var)
-
-        x_name_list |= set(map(lambda x: x.name, out_vars))
-        # NOTE(dev): cond_var has been contained in Input('Condition'), so
-        # we remove it from Input('X')
-        x_name_list -= {self.cond_var.name}
-
-        step_scope = parent_block.create_var(
-            type=core.VarDesc.VarType.STEP_SCOPES
-        )
-
-        parent_block.append_op(
-            type='while',
-            inputs={
-                'X': [
-                    parent_block._var_recursive(x_name)
-                    for x_name in x_name_list
-                ],
-                'Condition': [self.cond_var],
-            },
-            outputs={'Out': out_vars, 'StepScopes': [step_scope]},
-            attrs={'sub_block': while_block, "is_test": self.is_test},
-        )
-
-
-support_ret_buildin_type = (bool, float, int)
-
-
-# (TODO: Mine) There exists dependency. It will be removed later.
-def assign_skip_lod_tensor_array(input, output):
-    """
-    Assign input to output, but skip the process of copying LoDTensorArray unless it's created in while_block.
-    """
-
-    def has_shape_diff(x_var, y_var):
-        if len(x_var.shape) != len(y_var.shape):
-            return True
-        for x_dim, y_dim in zip(x_var.shape, y_var.shape):
-            if x_dim != y_dim and -1 not in [x_dim, y_dim]:
-                return True
-        return False
-
-    if not isinstance(input, (Variable, core.eager.Tensor)):
-        if isinstance(output, Variable) and isinstance(
-            input, support_ret_buildin_type
-        ):
-            paddle.assign(input, output)
-        else:
-            output = input
-        return
-
-    if input.type == core.VarDesc.VarType.LOD_TENSOR_ARRAY:
-        main_program = input.block.program
-        parent_block = main_program.block(
-            main_program.current_block().parent_idx
-        )
-        if parent_block and not parent_block._find_var_recursive(input.name):
-            paddle.assign(input, output)
-    else:
-        if (
-            isinstance(output, Variable)
-            and isinstance(input, Variable)
-            and has_shape_diff(input, output)
-        ):
-            warnings.warn(
-                "In dy2static mode, we attemp to assign a variable with shape {} into a variable with shape{}, which is not always right.".format(
-                    input.shape, output.shape
-                )
-            )
-        paddle.assign(input, output)
-
-
-# (TODO: Mine) There exists dependency (jit.dy2static.convert_operators). It will be removed later.
-def while_loop(cond, body, loop_vars, is_test=False, name=None):
-    """
-    :api_attr: Static Graph
-
-    while_loop is one of the control flows. Repeats while_loop `body` until `cond` returns False.
-
-    Notice:
-        Local variables defined in ``body`` cannot be obtained through ``fetch_list`` of ``Executor`` , variables should
-        be defined outside ``body`` and placed in ``loop_vars`` for looping, then these variables can be fetched by ``fetch_list`` .
-
-    Args:
-        cond(Callable): A callable returning a boolean tensor controlling whether to continue looping. And ``cond`` takes
-            as many arguments as ``loop_vars`` .
-        body(Callable): A callable returning a tuple or list of tensors or LoDTensorArrays of the same arity
-            (length and structure) and types as ``loops_vars`` . And ``body`` takes as many arguments as ``loop_vars`` .
-        loop_vars(list|tuple): A list or tuple of tensors or LoDTensorArrays that is passed to both ``cond`` and ``body`` .
-        is_test(bool, optional): A flag indicating whether execution is in test phase. Default value is False.
-        name(str, optional): Normally there is no need for users to set this property. For more information, please
-            refer to :ref:`api_guide_Name`. Default is None.
-
-    Returns:
-        A list or tuple of Tensors or LoDTensorArrays which returned by ``body`` .
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            paddle.enable_static()
-
-            def cond(i, ten):
-                return i < ten
-
-            def body(i, ten):
-                i = i + 1
-                return [i, ten]
-
-            main_program = paddle.static.default_main_program()
-            startup_program = paddle.static.default_startup_program()
-            with paddle.static.program_guard(main_program, startup_program):
-                i = paddle.full(shape=[1], fill_value=0, dtype='int64')     # loop counter
-                ten = paddle.full(shape=[1], fill_value=10, dtype='int64')  # loop length
-                i, ten = paddle.static.nn.while_loop(cond, body, [i, ten])
-
-                exe = paddle.static.Executor(paddle.CPUPlace())
-                res = exe.run(main_program, feed={}, fetch_list=[i])
-                print(res) # [array([10])]
-    """
-    helper = LayerHelper('while_loop', **locals())
-
-    if not callable(cond):
-        raise TypeError("cond in while_loop should be callable")
-    if not callable(body):
-        raise TypeError("body in while_loop should be callable")
-    check_type(loop_vars, 'loop_vars', (list, tuple), 'fluid.layers.while_loop')
-    if len(loop_vars) == 0:
-        raise ValueError("loop_vars in while_loop should not be empty")
-
-    pre_cond = cond(*loop_vars)
-
-    if reduce(lambda a, b: a * b, pre_cond.shape, 1) != 1:
-        raise TypeError(
-            "the shape of the variable returned by cond should be [1],"
-            "but given shape as {0}.".format(list(pre_cond.shape))
-        )
-
-    if in_dygraph_mode():
-        now_cond = pre_cond.item()
-        while now_cond:
-            output_vars = body(*loop_vars)
-            if not isinstance(output_vars, (list, tuple)):
-                output_vars = [output_vars]
-            if len(output_vars) != len(loop_vars):
-                raise ValueError(
-                    "body in while_loop should return the same arity "
-                    "(length and structure) and types as loop_vars"
-                )
-            now_cond = cond(*output_vars).item()
-            map_structure(assign_skip_lod_tensor_array, output_vars, loop_vars)
-        return loop_vars
-    else:
-        check_variable_and_dtype(
-            pre_cond,
-            'var of cond returned',
-            ['bool'],
-            'fluid.layers.while_loop',
-        )
-        while_loop_block = While(pre_cond, is_test, name)
-        has_mutable_vars_in_loop = hold_mutable_vars(loop_vars)
-        with while_loop_block.block():
-            # If a variable with mutable type is included in loop_vars, like `dict/list`,
-            # modifying it in the body function will cause origin variable to be modified
-            # synchronously. This will raise an assignment error out of while block.
-            # Here we make a copy of the mutable vars to avoid this problem.
-            if has_mutable_vars_in_loop:
-                new_loop_vars = copy_mutable_vars(loop_vars)
-                output_vars = body(*new_loop_vars)
-            else:
-                output_vars = body(*loop_vars)
-            if not isinstance(output_vars, (list, tuple)):
-                output_vars = [output_vars]
-            try:
-                loop_vars = _deal_with_undefined_var(output_vars, loop_vars)
-                assert_same_structure(output_vars, loop_vars, check_types=False)
-            except ValueError as e:
-                raise ValueError(
-                    "body in while_loop should return the same arity "
-                    "(length and structure) as loop_vars: {0}".format(e)
-                )
-            now_cond = cond(*output_vars)
-            map_structure(assign_skip_lod_tensor_array, output_vars, loop_vars)
-            paddle.assign(now_cond, pre_cond)
-        return loop_vars
-
-
-# (TODO: Mine) There exists dependency. It will be removed later.
-def _deal_with_undefined_var(output_vars, loop_vars):
-    """Deal with undefined var cases, We create undefined variable based on the results of body().
-    In Dy2Static, we use undefined var to represent the var created in control flow. This function
-    expand the loop_vars and replace original loop_vars.
-    1. UndefinedVar = Variable      # create a variable
-    2. UndefinedVar = None          # create a undefined var with RETURN_NO_VALUE_MAGIC_NUM
-    3. UndefinedVar = List(int)     # create a list of variable
-    4. UndefinedVar = value         # create a variable
-    """
-    from paddle.jit.dy2static.utils import (
-        UndefinedVar,
-        create_undefined_variable,
-    )
-
-    def create_var_like(o_var):
-        if (
-            isinstance(o_var, (Variable,) + support_ret_buildin_type)
-            or o_var is None
-        ):
-            return create_undefined_variable()
-        if is_sequence(o_var):
-            """
-            Create a complex container class inside the body of while, including Python list and python Dict
-            """
-            return map_structure(lambda x: create_undefined_variable(), o_var)
-
-    if len(output_vars) != len(loop_vars):
-        raise ValueError("The length of loop_vars should be the same.")
-
-    results = []
-    for o_var, l_var in zip(output_vars, loop_vars):
-        if isinstance(l_var, UndefinedVar) or l_var is None:
-            results.append(create_var_like(o_var))
-        else:
-            results.append(l_var)
-    return results
-
-
-class ConditionalBlockGuard(BlockGuard):
-    """
-    ConditionalBlockGuard is derived from BlockGuard. It is dedicated for
-    holding a ConditionalBlock, and helping users entering and exiting the
-    ConditionalBlock via Python's 'with' keyword. However, ConditionalBlockGuard
-    is generally an internal component of IfElse, users should not use it directly.
-    """
-
-    def __init__(self, block):
-        check_type(block, "block", ConditionalBlock, "ConditionalBlockGuard")
-        super().__init__(block.helper.main_program)
-        self.block = block
-
-    def __enter__(self):
-        return super().__enter__()
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.block.complete()
-        return super().__exit__(exc_type, exc_val, exc_tb)
-
-
-class ConditionalBlock:
-    '''
-    **ConditionalBlock**
-
-    ConditionalBlock is an operator that bind a block to a specific condition,
-    if the condition matches, the corresponding block will be executed.
-
-    Args:
-        inputs (Variable): bool conditions.
-        is_scalar_condition (bool): whether the branch is controlled by a scalar.
-        name(str): name of this ConditionalBlock.
-
-    Examples:
-        .. code-block:: python
-
-             import paddle
-             import paddle.fluid as fluid
-             cond = paddle.less_than(x=label, y=limit)
-             true_image, false_image = layers.split_lod_tensor(
-                 input=image, mask=cond)
-             true_cond = layers.ConditionalBlock([true_image])
-
-             with true_cond.block():
-                 ...
-             with false_cond.block():
-                 ...
-    '''
-
-    def __init__(self, inputs, is_scalar_condition=False, name=None):
-        for each_input in inputs:
-            check_type(each_input, "input", Variable, "ConditionalBlock")
-        self.inputs = inputs
-        self.is_scalar_condition = is_scalar_condition
-        self.helper = LayerHelper('conditional_block', name=name)
-
-    def block(self):
-        return ConditionalBlockGuard(self)
-
-    def complete(self):
-        inside_block = self.helper.main_program.current_block()
-        parent_block = self.helper.main_program.block(inside_block.parent_idx)
-
-        intermediate = set()
-        params = set()
-        params, intermediate = get_inputs_outputs_in_block(
-            inside_block, params, intermediate, helper=self.helper
-        )
-
-        # Todo(liym27) Here assume that all params are in recursive parent block
-        # but when minimize() called in control flow, some params may be in
-        # conditional grad block
-        param_list = [
-            parent_block._var_recursive(each_name) for each_name in params
-        ]
-
-        out_list = []
-        for inner_out_name in intermediate:
-            inner_var = parent_block._find_var_recursive(inner_out_name)
-            if inner_var:
-                out_list.append(inner_var)
-
-        step_scope = parent_block.create_var(
-            type=core.VarDesc.VarType.STEP_SCOPES
-        )
-        conditional_block_op = parent_block.append_op(
-            type='conditional_block',
-            inputs={
-                'Cond': self.inputs,
-                'Input': param_list,
-            },
-            outputs={'Out': out_list, 'Scope': [step_scope]},
-            attrs={
-                'sub_block': inside_block,
-                'is_scalar_condition': self.is_scalar_condition,
-            },
-        )
-
-        if self.need_append_conditional_block_grad(inside_block):
-            self.append_conditional_block_grad(
-                parent_block, inside_block, conditional_block_op
-            )
-
-    def need_append_conditional_block_grad(self, inside_block):
-        grad_sub_block_idx = inside_block.backward_block_idx
-        inside_block_idx = inside_block.idx
-
-        # if inside_block have grad_block and grad_block is not itself,
-        # we will append conditional block grad.
-        return (
-            grad_sub_block_idx != -1 and grad_sub_block_idx != inside_block_idx
-        )
-
-    def append_conditional_block_grad(
-        self, parent_block, inside_block, conditional_block_op
-    ):
-        '''
-        Append op `conditional_block_grad` manually.
-        When `optimizer.minimize/append_backward` is called in Paddle control flow,
-        grad ops will be appended before appending op `conditional_block` so that
-        op `conditional_block_grad` can't be appended when calling
-        `optimizer.minimize/append_backward`. After appending op `conditional_block`,
-        `conditional_block_grad` is appended manually.
-
-        Args:
-            parent_block (Block): The block that `conditional_block_op` blongs to.
-            inside_block (Block): The sub block of `conditional_block_op`.
-            conditional_block_op (Operator): The forward op conditional_block.
-        '''
-
-        grad_sub_block_idx = inside_block.backward_block_idx
-        grad_sub_block = self.helper.main_program.block(grad_sub_block_idx)
-
-        intermediate = set()
-        params = set()
-
-        for each_op in grad_sub_block.ops:
-            assert isinstance(each_op, Operator)
-            for iname in each_op.input_names:
-                for in_var_name in each_op.input(iname):
-                    if in_var_name not in intermediate:
-                        params.add(in_var_name)
-
-            for oname in each_op.output_names:
-                for out_var_name in each_op.output(oname):
-                    intermediate.add(out_var_name)
-
-        param_list = []
-        for inner_input_name in params:
-            inner_var = parent_block._find_var_recursive(inner_input_name)
-            if inner_var:
-                param_list.append(inner_var.name)
-
-        grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
-            conditional_block_op.desc, set(), [grad_sub_block.desc]
-        )
-
-        # append op_desc in grad_op_descs to target_block
-        op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
-        backward = core.op_proto_and_checker_maker.OpRole.Backward
-        new_op_desc = parent_block.desc.append_op()
-        new_op_desc.copy_from(grad_op_desc[0])
-        new_op_desc._set_attr(op_role_attr_name, backward)
-        # set input and output manually
-        new_op_desc.set_input('Input', param_list)
-        new_op_desc.set_output(
-            'Input@GRAD', [param + "@GRAD" for param in param_list]
-        )
-
-        new_vars = set()
-        for grad_var_name in new_op_desc.output_arg_names():
-            if (
-                grad_sub_block.desc.has_var_recursive(grad_var_name.encode())
-                or grad_var_name == core.empty_var_name()
-            ):
-                continue
-            grad_sub_block.desc.var(grad_var_name.encode())
-            new_vars.add(grad_var_name)
-            if grad_var_name not in op_grad_to_var:
-                continue
-
-        # infer_shape and infer_type
-        new_op_desc.infer_var_type(grad_sub_block.desc)
-        new_op_desc.infer_shape(grad_sub_block.desc)
-
-        for arg in new_op_desc.output_arg_names():
-            if arg in new_vars:
-                _infer_var_data_type_shape_(arg, grad_sub_block)
-
-        self.helper.main_program._sync_with_cpp()
-
-
-def _to_sequence_except_dict(x):
-    """
-    In this function, dict is not viewed as sequence.
-    """
-    if isinstance(x, dict):
-        return [x]
-    return to_sequence(x)
-
-
-def _is_sequence_except_dict(x):
-    """
-    In this function, dict is not viewed as sequence.
-    """
-    if isinstance(x, dict):
-        return False
-    return is_sequence(x)
-
-
-def expand_undefined_var(nest1, nest2, names):
-    """TODO: make this function recursively.
-    nest1: Var1, (UndefinedVar, [1,2,3])
-    nest2: Var2, ([1,2,3,4], UndefinedVar)
-    In this case, we should not expand recursively.
-    """
-    from paddle.jit.dy2static.utils import UndefinedVar
-    from paddle.jit.dy2static.return_transformer import (
-        RETURN_VALUE_PREFIX,
-    )
-
-    def pack_undefined_var_as(seq):
-        return pack_sequence_as(
-            seq, [UndefinedVar("padding") for i in flatten(seq)]
-        )
-
-    def map_fn(n1, n2, name, order):
-        if not name.startswith(RETURN_VALUE_PREFIX) and (
-            isinstance(n1, UndefinedVar) or n1 is None
-        ):
-            if n1 is None and n2 is not None:
-                if order == 0:
-                    warnings.warn(
-                        "In cond : Var '{}' or part of it is set differently in ifelse branchs, "
-                        "<{}, {}> in true branch and <{}, {}> in false branch. Set var to "
-                        "'None' in ifelse block might lead to error.".format(
-                            name, type(n1), n1, type(n2), n2
-                        )
-                    )
-                else:
-                    warnings.warn(
-                        "In cond : Var '{}' or part of it is set differently in ifelse branchs, "
-                        "<{}, {}> in true branch and <{}, {}> in false branch. Set var to "
-                        "'None' in ifelse block might lead to error.".format(
-                            name, type(n2), n2, type(n1), n1
-                        )
-                    )
-            return pack_undefined_var_as(n2)
-        return n1
-
-    nest1_out = list(
-        map(
-            map_fn,
-            _to_sequence_except_dict(nest1),
-            _to_sequence_except_dict(nest2),
-            _to_sequence_except_dict(names),
-            [0 for i in _to_sequence_except_dict(names)],
-        )
-    )
-    nest2_out = list(
-        map(
-            map_fn,
-            _to_sequence_except_dict(nest2),
-            _to_sequence_except_dict(nest1),
-            _to_sequence_except_dict(names),
-            [1 for i in _to_sequence_except_dict(names)],
-        )
-    )
-    if not _is_sequence_except_dict(nest1):
-        nest1_out = nest1_out[0]
-    if not _is_sequence_except_dict(nest2):
-        nest2_out = nest2_out[0]
-    return nest1_out, nest2_out
-
-
-# TODO: It will be deleted later.
-class Switch:
-    """
-    :api_attr: Static Graph
-
-    This class is used to implement Switch branch control function.
-    Switch branch contains several case branches and one default branch.
-    Switch control flow checks whether the case branch conditions are satisfied in turn,
-    and only executes the statement after the first case branch that satisfies the conditions.
-    If there is no case branch that satisfies the condition,
-    only the statement following the default branch is executed.
-
-    Note:
-        A new OP :ref:`api_fluid_layers_case` is highly recommended instead of ``Switch`` if the shape of parameter ``cond`` is [1].
-        OP :ref:`api_fluid_layers_case` is easier to use and is called with less code but does the same thing as ``Switch`` .
-
-    Member Functions:
-        case(condition): The case branch of Switch whose parameter cond is a scalar Variable of bool type. Only if the cond of the current case branch is True and the cond of the previous case branch is False, the statement after the case branch will be executed, and the statement after the case branch will not be executed.
-
-        default(): The default branch of Switch. When cond of all case branches is False, the statement after default branch is executed.
-
-    Case and default functions can only be used inside the scope of Switch, as shown below:
-
-    .. code-block:: python
-
-        '''
-        import paddle
-        import paddle.fluid as fluid
-        with fluid.layers.Switch() as switch:
-            with switch.case(cond1):
-                i = paddle.full(shape=[1], dtype='int64', fill_value=1)
-            with switch.case(cond2):
-                i = paddle.full(shape=[1], dtype='int64', fill_value=2)
-            with switch.default():
-                i = paddle.full(shape=[1], dtype='int64', fill_value=0)
-        '''
-
-    Args:
-        name(str, optional): The default value is None.  Normally there is no need for user to set this property.  For more information, please refer to :ref:`api_guide_Name` .
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            import paddle.fluid as fluid
-
-            lr = paddle.static.create_global_var(
-                shape=[1],
-                value=0.0,
-                dtype='float32',
-                persistable=True,
-                name="learning_rate")
-            zero_var = paddle.full(
-                shape=[1], dtype='float32', fill_value=0.0)
-            one_var = paddle.full(
-                shape=[1], dtype='float32', fill_value=1.0)
-            two_var = paddle.full(
-                shape=[1], dtype='float32', fill_value=2.0)
-
-            global_step = fluid.layers.autoincreased_step_counter(counter_name='@LR_DECAY_COUNTER@', begin=0, step=1)
-
-            with fluid.layers.control_flow.Switch() as switch:
-                with switch.case(global_step == zero_var):
-                    paddle.assign(input=one_var, output=lr)
-                with switch.default():
-                    paddle.assign(input=two_var, output=lr)
-
-            exe = fluid.Executor(fluid.CPUPlace())
-            exe.run(fluid.default_startup_program())
-
-            res = exe.run(fluid.default_main_program(), feed={}, fetch_list=[lr])
-            print(res) # [array([1.], dtype=float32)]
-    """
-
-    def __init__(self, name=None):
-        self.helper = LayerHelper('switch', name=name)
-        self.inside_scope = False
-        self.pre_not_conditions = []
-
-    def case(self, condition):
-        if not self.inside_scope:
-            raise ValueError("case should be called inside with")
-
-        check_variable_and_dtype(
-            condition,
-            'condition',
-            ['bool'],
-            'the member function case of fluid.layers.Switch',
-        )
-
-        if len(self.pre_not_conditions) == 0:
-            cond_block = ConditionalBlock([condition], is_scalar_condition=True)
-            not_cond = paddle.logical_not(x=condition)
-            self.pre_not_conditions.append(not_cond)
-        else:
-            pre_cond_num = len(self.pre_not_conditions)
-            pre_not_cond = self.pre_not_conditions[pre_cond_num - 1]
-            new_not_cond = paddle.logical_and(
-                x=pre_not_cond, y=paddle.logical_not(x=condition)
-            )
-            self.pre_not_conditions.append(new_not_cond)
-            cond_block = ConditionalBlock(
-                [paddle.logical_and(x=pre_not_cond, y=condition)],
-                is_scalar_condition=True,
-            )
-
-        return ConditionalBlockGuard(cond_block)
-
-    def default(self):
-        pre_cond_num = len(self.pre_not_conditions)
-        if pre_cond_num == 0:
-            raise ValueError("there should be at least one condition")
-        cond_block = ConditionalBlock(
-            [self.pre_not_conditions[pre_cond_num - 1]],
-            is_scalar_condition=True,
-        )
-        return ConditionalBlockGuard(cond_block)
-
-    def __enter__(self):
-        """
-        set flag that now is inside switch.block {}
-        :return:
-        """
-        self.inside_scope = True
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.inside_scope = False
-        if exc_type is not None:
-            return False  # re-raise exception
-
-        return True
diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py
index c59d8ba65336d0ba568c9788d2dddabf1334f55b..050cc774ab7c75f8255a6810f5f134ee1e57dbb9 100644
--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@@ -24,7 +24,6 @@ import math
 import numbers
 
 import paddle
-from . import control_flow
 from . import nn
 from . import tensor
 from ..framework import (
@@ -434,8 +433,7 @@ def piecewise_decay(boundaries, values):
                 persistable=True,
                 name="learning_rate",
             )
-            # TODO: fluid.layers.control_flow.Switch should be replaced by paddle.static.nn.case(or cond) if possible
-            with control_flow.Switch() as switch:
+            with paddle.static.nn.control_flow.Switch() as switch:
                 for i in range(len(boundaries)):
                     boundary_val = paddle.tensor.fill_constant(
                         shape=[1],
diff --git a/python/paddle/incubate/nn/layer/io.py b/python/paddle/incubate/nn/layer/io.py
index 7272e8f6a379761b5b31915a9c40f2f27adb7d1f..f464d93833b2f98fbc2e713ef02f52f3748fe986 100644
--- a/python/paddle/incubate/nn/layer/io.py
+++ b/python/paddle/incubate/nn/layer/io.py
@@ -11,13 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import paddle
 
 from ....fluid.framework import Variable
-from ....fluid.layers.control_flow import BlockGuard
 from ....framework import LayerHelper, core
 
 
-class BlockGuardServ(BlockGuard):
+class BlockGuardServ(paddle.static.nn.control_flow.BlockGuard):
     """
     BlockGuardServ class.
 
diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py
index b1823785cda5cca4497327f219399277d17c6b55..7b0fbb49fb63965293fa4fb07a9b7d4fc5e38d16 100644
--- a/python/paddle/jit/dy2static/convert_operators.py
+++ b/python/paddle/jit/dy2static/convert_operators.py
@@ -21,8 +21,6 @@ from paddle.fluid.dygraph.base import (
     in_declarative_mode,
 )
 from paddle.fluid.framework import Variable, core, default_main_program
-from paddle.fluid.layers import control_flow
-from paddle.fluid.layers.control_flow import while_loop
 
 from .utils import (
     RETURN_NO_VALUE_VAR_NAME,
@@ -181,7 +179,9 @@ def _run_paddle_while(
         return_name_ids, loop_vars
     )  # change the non-local var to variable
     # variable maybe modified to inner var. change it into
-    loop_vars = control_flow.while_loop(new_cond_fn, new_body_fn, loop_vars)
+    from paddle.static.nn import while_loop
+
+    loop_vars = while_loop(new_cond_fn, new_body_fn, loop_vars)
     helper.set(return_name_ids, loop_vars)
     return loop_vars
 
@@ -820,6 +820,8 @@ def _run_paddle_pop(array, *args):
 
     new_array = _slice_tensor_array(array, 0, idx)
     i = idx + 1
+    from paddle.static.nn import while_loop
+
     _, new_array = while_loop(cond, body, [i, new_array])
     paddle.assign(new_array, output=array)
 
diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py
index 8f0c2f544e1141eec5b66d469f51fe9cd19c46e6..3ef01f836fc6dc66f80891721b86e740bcca3950 100644
--- a/python/paddle/nn/layer/rnn.py
+++ b/python/paddle/nn/layer/rnn.py
@@ -28,7 +28,6 @@ from paddle.fluid.framework import (
     in_dygraph_mode,
     program_guard,
 )
-from paddle.fluid.layers import control_flow
 from paddle.framework import core
 from paddle.nn import functional as F
 from paddle.nn import initializer as I
@@ -275,7 +274,7 @@ def _rnn_static_graph(
 
         end = paddle.cast(end, "int64")
         cond = start_i < end
-    while_op = control_flow.While(cond)
+    while_op = paddle.static.nn.control_flow.While(cond)
 
     out_array = paddle.tensor.create_array(
         dtype=paddle.utils.flatten(inputs)[0].dtype
diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py
index 879645612ba4f946c65b1b809a91819aa8936611..8397c16db45a03d317d31f908356e0f3018fb1fe 100755
--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -40,7 +40,6 @@ from .common import layer_norm  # noqa: F401
 
 from .common import embedding  # noqa: F401
 from .common import sparse_embedding  # noqa: F401
-from ...fluid.layers import StaticRNN  # noqa: F401
 
 from .sequence_lod import sequence_conv  # noqa: F401
 from .sequence_lod import sequence_softmax  # noqa: F401
@@ -99,6 +98,5 @@ __all__ = [  # noqa
     'sequence_scatter',
     'sequence_enumerate',
     'sequence_reverse',
-    'StaticRNN',
     'prelu',
 ]
diff --git a/python/paddle/static/nn/control_flow.py b/python/paddle/static/nn/control_flow.py
index c48128d2c083f92ae668f5d49e7afad2896c1bb7..774e21e50d8a6d9607c7de96c54f9060fbddb8e5 100644
--- a/python/paddle/static/nn/control_flow.py
+++ b/python/paddle/static/nn/control_flow.py
@@ -24,10 +24,8 @@ from paddle.common_ops_import import (
     in_dygraph_mode,
 )
 from paddle.fluid import core
+from paddle.fluid.backward import _infer_var_data_type_shape_
 from paddle.fluid.framework import Operator, Program, Variable, static_only
-
-# Temporary solution, it will be deleted later
-from paddle.fluid.layers.control_flow import ConditionalBlock, select_input
 from paddle.utils import (
     assert_same_structure,
     copy_mutable_vars,
@@ -152,6 +150,198 @@ class WhileGuard(BlockGuard):
         return super().__exit__(exc_type, exc_val, exc_tb)
 
 
+class ConditionalBlock:
+    '''
+    **ConditionalBlock**
+
+    ConditionalBlock is an operator that bind a block to a specific condition,
+    if the condition matches, the corresponding block will be executed.
+
+    Args:
+        inputs (Variable): bool conditions.
+        is_scalar_condition (bool): whether the branch is controlled by a scalar.
+        name(str): name of this ConditionalBlock.
+
+    Examples:
+        .. code-block:: python
+
+             import paddle
+             import paddle.fluid as fluid
+             cond = paddle.less_than(x=label, y=limit)
+             true_image, false_image = layers.split_lod_tensor(
+                 input=image, mask=cond)
+             true_cond = layers.ConditionalBlock([true_image])
+
+             with true_cond.block():
+                 ...
+             with false_cond.block():
+                 ...
+    '''
+
+    def __init__(self, inputs, is_scalar_condition=False, name=None):
+        for each_input in inputs:
+            check_type(each_input, "input", Variable, "ConditionalBlock")
+        self.inputs = inputs
+        self.is_scalar_condition = is_scalar_condition
+        self.helper = LayerHelper('conditional_block', name=name)
+
+    def block(self):
+        return ConditionalBlockGuard(self)
+
+    def complete(self):
+        inside_block = self.helper.main_program.current_block()
+        parent_block = self.helper.main_program.block(inside_block.parent_idx)
+
+        intermediate = set()
+        params = set()
+        params, intermediate = get_inputs_outputs_in_block(
+            inside_block, params, intermediate, helper=self.helper
+        )
+
+        # Todo(liym27) Here assume that all params are in recursive parent block
+        # but when minimize() called in control flow, some params may be in
+        # conditional grad block
+        param_list = [
+            parent_block._var_recursive(each_name) for each_name in params
+        ]
+
+        out_list = []
+        for inner_out_name in intermediate:
+            inner_var = parent_block._find_var_recursive(inner_out_name)
+            if inner_var:
+                out_list.append(inner_var)
+
+        step_scope = parent_block.create_var(
+            type=core.VarDesc.VarType.STEP_SCOPES
+        )
+        conditional_block_op = parent_block.append_op(
+            type='conditional_block',
+            inputs={
+                'Cond': self.inputs,
+                'Input': param_list,
+            },
+            outputs={'Out': out_list, 'Scope': [step_scope]},
+            attrs={
+                'sub_block': inside_block,
+                'is_scalar_condition': self.is_scalar_condition,
+            },
+        )
+
+        if self.need_append_conditional_block_grad(inside_block):
+            self.append_conditional_block_grad(
+                parent_block, inside_block, conditional_block_op
+            )
+
+    def need_append_conditional_block_grad(self, inside_block):
+        grad_sub_block_idx = inside_block.backward_block_idx
+        inside_block_idx = inside_block.idx
+
+        # if inside_block have grad_block and grad_block is not itself,
+        # we will append conditional block grad.
+        return (
+            grad_sub_block_idx != -1 and grad_sub_block_idx != inside_block_idx
+        )
+
+    def append_conditional_block_grad(
+        self, parent_block, inside_block, conditional_block_op
+    ):
+        '''
+        Append op `conditional_block_grad` manually.
+        When `optimizer.minimize/append_backward` is called in Paddle control flow,
+        grad ops will be appended before appending op `conditional_block` so that
+        op `conditional_block_grad` can't be appended when calling
+        `optimizer.minimize/append_backward`. After appending op `conditional_block`,
+        `conditional_block_grad` is appended manually.
+
+        Args:
+            parent_block (Block): The block that `conditional_block_op` blongs to.
+            inside_block (Block): The sub block of `conditional_block_op`.
+            conditional_block_op (Operator): The forward op conditional_block.
+        '''
+
+        grad_sub_block_idx = inside_block.backward_block_idx
+        grad_sub_block = self.helper.main_program.block(grad_sub_block_idx)
+
+        intermediate = set()
+        params = set()
+
+        for each_op in grad_sub_block.ops:
+            assert isinstance(each_op, Operator)
+            for iname in each_op.input_names:
+                for in_var_name in each_op.input(iname):
+                    if in_var_name not in intermediate:
+                        params.add(in_var_name)
+
+            for oname in each_op.output_names:
+                for out_var_name in each_op.output(oname):
+                    intermediate.add(out_var_name)
+
+        param_list = []
+        for inner_input_name in params:
+            inner_var = parent_block._find_var_recursive(inner_input_name)
+            if inner_var:
+                param_list.append(inner_var.name)
+
+        grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
+            conditional_block_op.desc, set(), [grad_sub_block.desc]
+        )
+
+        # append op_desc in grad_op_descs to target_block
+        op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
+        backward = core.op_proto_and_checker_maker.OpRole.Backward
+        new_op_desc = parent_block.desc.append_op()
+        new_op_desc.copy_from(grad_op_desc[0])
+        new_op_desc._set_attr(op_role_attr_name, backward)
+        # set input and output manually
+        new_op_desc.set_input('Input', param_list)
+        new_op_desc.set_output(
+            'Input@GRAD', [param + "@GRAD" for param in param_list]
+        )
+
+        new_vars = set()
+        for grad_var_name in new_op_desc.output_arg_names():
+            if (
+                grad_sub_block.desc.has_var_recursive(grad_var_name.encode())
+                or grad_var_name == core.empty_var_name()
+            ):
+                continue
+            grad_sub_block.desc.var(grad_var_name.encode())
+            new_vars.add(grad_var_name)
+            if grad_var_name not in op_grad_to_var:
+                continue
+
+        # infer_shape and infer_type
+        new_op_desc.infer_var_type(grad_sub_block.desc)
+        new_op_desc.infer_shape(grad_sub_block.desc)
+
+        for arg in new_op_desc.output_arg_names():
+            if arg in new_vars:
+                _infer_var_data_type_shape_(arg, grad_sub_block)
+
+        self.helper.main_program._sync_with_cpp()
+
+
+class ConditionalBlockGuard(BlockGuard):
+    """
+    ConditionalBlockGuard is derived from BlockGuard. It is dedicated for
+    holding a ConditionalBlock, and helping users entering and exiting the
+    ConditionalBlock via Python's 'with' keyword. However, ConditionalBlockGuard
+    is generally an internal component of IfElse, users should not use it directly.
+    """
+
+    def __init__(self, block):
+        check_type(block, "block", ConditionalBlock, "ConditionalBlockGuard")
+        super().__init__(block.helper.main_program)
+        self.block = block
+
+    def __enter__(self):
+        return super().__enter__()
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.block.complete()
+        return super().__exit__(exc_type, exc_val, exc_tb)
+
+
 def get_inputs_outputs_in_block(
     current_block, inner_inputs, inner_outputs, helper
 ):
@@ -1167,6 +1357,92 @@ def copy_var_to_parent_block(var, layer_helper):
     return parent_block_var
 
 
+def select_output(input, outputs, mask):
+    """
+    **select_output**
+    This API takes in one input and multiple outputs and an integer mask. It
+    selects the output specified by the mask and copy the input to selected
+    output. It is useful in control flow.
+
+    Args:
+        input(Variable): The input variable
+        outputs(tuple|list): The output variables
+        mask(Variable): A tensor containing 1 integer number selecting which
+            output to be copied with input
+
+    Returns:
+        Variable: The outputs variables
+    """
+    helper = LayerHelper('select_output', **locals())
+    check_type(input, 'input', (Variable), 'select_output')
+    check_variable_and_dtype(mask, 'mask', ['int32'], 'select_output')
+    check_type(outputs, 'outputs', (list, tuple), 'select_output')
+
+    helper.append_op(
+        type='select_output',
+        inputs={'X': input, 'Mask': mask},
+        outputs={'Out': outputs},
+    )
+    return outputs
+
+
+def _select_input_infer_shape(first_shape, second_shape):
+    """
+    This function infer the output shape by following algorithm:
+    1. if the dims is different, raise a error.
+    2. compare axis one by one:
+        if a == b: we set axis to a
+        if a != b: we set axis to -1
+    for compatibility, non declarative mode, we just return second_shape.
+    """
+    if len(first_shape) != len(second_shape):
+        warnings.warn(
+            f"the input shapes of select_input should have the same rank, but get {first_shape}, {second_shape}"
+        )
+        return second_shape
+    out_shape = list(
+        map(lambda a, b: a if a == b else -1, first_shape, second_shape)
+    )
+    return out_shape
+
+
+def select_input(inputs, mask):
+    """
+    **select_input**
+
+    This API takes in multiple inputs and uses an integer mask to select one
+    input to output. It is useful in control flow.
+
+    Args:
+        inputs(tuple|list): The input variables
+        mask(Tensor): A tensor containing 1 integer number selecting which
+            input to output
+
+    Returns:
+        Variable: The selected input variable
+    """
+    helper = LayerHelper('select_input', **locals())
+    check_type(inputs, 'inputs', (list, tuple), 'select_input')
+    check_variable_and_dtype(mask, 'mask', ['int32'], 'select_input')
+
+    # Select input should expand the shape. If it is - 1 and valid number, use - 1 first. If the dim is different, an error will be reported directly
+    # assert inputs[0].dtype == inputs[1].dtype, f"Expect the inputs should have the same dtype, but get {inputs[0].dtype} and {inputs[1].dtype}"
+
+    output_shape = _select_input_infer_shape(inputs[0].shape, inputs[1].shape)
+    output_dtype = inputs[1].dtype
+    output_type = inputs[1].type
+
+    out = helper.create_variable(
+        dtype=output_dtype, shape=output_shape, type=output_type
+    )
+    helper.append_op(
+        type='select_input',
+        inputs={'X': inputs, 'Mask': mask},
+        outputs={'Out': out},
+    )
+    return out
+
+
 def select_input_with_buildin_type(inputs, mask, name):
     from paddle.jit.dy2static.utils import UndefinedVar
     from paddle.jit.dy2static.variable_trans_func import to_static_variable
@@ -1433,3 +1709,64 @@ def Print(
         },
     )
     return output
+
+
+class Switch:
+    def __init__(self, name=None):
+        self.helper = LayerHelper('switch', name=name)
+        self.inside_scope = False
+        self.pre_not_conditions = []
+
+    def case(self, condition):
+        if not self.inside_scope:
+            raise ValueError("case should be called inside with")
+
+        check_variable_and_dtype(
+            condition,
+            'condition',
+            ['bool'],
+            'the member function case of fluid.layers.Switch',
+        )
+
+        if len(self.pre_not_conditions) == 0:
+            cond_block = ConditionalBlock([condition], is_scalar_condition=True)
+            not_cond = paddle.logical_not(x=condition)
+            self.pre_not_conditions.append(not_cond)
+        else:
+            pre_cond_num = len(self.pre_not_conditions)
+            pre_not_cond = self.pre_not_conditions[pre_cond_num - 1]
+            new_not_cond = paddle.logical_and(
+                x=pre_not_cond, y=paddle.logical_not(x=condition)
+            )
+            self.pre_not_conditions.append(new_not_cond)
+            cond_block = ConditionalBlock(
+                [paddle.logical_and(x=pre_not_cond, y=condition)],
+                is_scalar_condition=True,
+            )
+
+        return ConditionalBlockGuard(cond_block)
+
+    def default(self):
+        pre_cond_num = len(self.pre_not_conditions)
+        if pre_cond_num == 0:
+            raise ValueError("there should be at least one condition")
+        cond_block = ConditionalBlock(
+            [self.pre_not_conditions[pre_cond_num - 1]],
+            is_scalar_condition=True,
+        )
+        return ConditionalBlockGuard(cond_block)
+
+    def __enter__(self):
+        """
+        set flag that now is inside switch.block {}
+        :return:
+        """
+        self.inside_scope = True
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.inside_scope = False
+        if exc_type is not None:
+            return False  # re-raise exception
+
+        return True
diff --git a/test/legacy_test/test_build_strategy_fusion_group_pass.py b/test/legacy_test/test_build_strategy_fusion_group_pass.py
index 30703d6a4a57649daabd7912c3c2c25ea8270027..e635479e9ea45eebfc9656adf1f3f68f03fb7f89 100644
--- a/test/legacy_test/test_build_strategy_fusion_group_pass.py
+++ b/test/legacy_test/test_build_strategy_fusion_group_pass.py
@@ -16,6 +16,7 @@ import unittest
 
 from test_eager_deletion_padding_rnn import PaddingRNNTestBase, RNNConfig
 
+import paddle
 from paddle import fluid
 from paddle.fluid import core
 
@@ -36,4 +37,5 @@ class FusionGroupPaddingRNNTest(PaddingRNNTestBase):
 
 
 if __name__ == '__main__':
+    paddle.enable_static()
     unittest.main()
diff --git a/test/legacy_test/test_conditional_block.py b/test/legacy_test/test_conditional_block.py
index 005d2e50b22e821e98302531573f24c9289fae2c..a5383dd6d10f9a3507917d53feadd1565fcc7eb5 100644
--- a/test/legacy_test/test_conditional_block.py
+++ b/test/legacy_test/test_conditional_block.py
@@ -19,9 +19,8 @@ import numpy as np
 import paddle
 from paddle import fluid
 from paddle.fluid import core
-from paddle.fluid.backward import append_backward
-from paddle.fluid.executor import Executor
-from paddle.fluid.layers.control_flow import ConditionalBlock
+from paddle.static import Executor, append_backward
+from paddle.static.nn.control_flow import ConditionalBlock
 
 
 class ConditionalBlockTest(unittest.TestCase):
@@ -83,4 +82,5 @@ class TestConditionalBlockOpInferShape(unittest.TestCase):
 
 
 if __name__ == '__main__':
+    paddle.enable_static()
     unittest.main()
diff --git a/test/legacy_test/test_eager_deletion_padding_rnn.py b/test/legacy_test/test_eager_deletion_padding_rnn.py
index 001a6f33eab1342dcde9d26ba51ab9dd7e4cb40a..29195c3a2fc12bf0976c2f381c7c09cd09d7e75d 100644
--- a/test/legacy_test/test_eager_deletion_padding_rnn.py
+++ b/test/legacy_test/test_eager_deletion_padding_rnn.py
@@ -21,7 +21,6 @@ import paddle
 from paddle import fluid
 from paddle.fluid import layers
 from paddle.fluid.executor import Executor
-from paddle.fluid.layers.control_flow import StaticRNN as PaddingRNN
 
 os.environ["CPU_NUM"] = "1"
 
@@ -83,7 +82,7 @@ class RNNConfig:
         else:
             raise ValueError('Unsupported model_type.')
 
-        if rnn_model not in ('static', 'padding', 'cudnn'):
+        if rnn_model not in ('static', 'cudnn'):
             raise ValueError('Unsupported rnn_model.')
 
         self.batch_size = 12
@@ -117,124 +116,6 @@ def lm_model(
     dropout=None,
     rnn_model='static',
 ):
-    def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None):
-        weight_1_arr = []
-        weight_2_arr = []
-        bias_arr = []
-        hidden_array = []
-        cell_array = []
-        mask_array = []
-        for i in range(num_layers):
-            weight_1 = paddle.create_parameter(
-                [hidden_size * 2, hidden_size * 4],
-                dtype="float32",
-                name="fc_weight1_" + str(i),
-                default_initializer=paddle.nn.initializer.Uniform(
-                    low=-init_scale, high=init_scale
-                ),
-            )
-            weight_1_arr.append(weight_1)
-            bias_1 = paddle.create_parameter(
-                [hidden_size * 4],
-                dtype="float32",
-                name="fc_bias1_" + str(i),
-                default_initializer=paddle.nn.initializer.Constant(0.0),
-            )
-            bias_arr.append(bias_1)
-
-            pre_hidden = paddle.slice(
-                init_hidden, axes=[0], starts=[i], ends=[i + 1]
-            )
-            pre_cell = paddle.slice(
-                init_cell, axes=[0], starts=[i], ends=[i + 1]
-            )
-            pre_hidden = paddle.reshape(pre_hidden, shape=[-1, hidden_size])
-            pre_cell = paddle.reshape(pre_cell, shape=[-1, hidden_size])
-            hidden_array.append(pre_hidden)
-            cell_array.append(pre_cell)
-
-        input_embedding = paddle.transpose(input_embedding, perm=[1, 0, 2])
-        rnn = PaddingRNN()
-
-        with rnn.step():
-            input = rnn.step_input(input_embedding)
-            for k in range(num_layers):
-                pre_hidden = rnn.memory(init=hidden_array[k])
-                pre_cell = rnn.memory(init=cell_array[k])
-                weight_1 = weight_1_arr[k]
-                bias = bias_arr[k]
-
-                nn = paddle.concat([input, pre_hidden], 1)
-                gate_input = paddle.matmul(x=nn, y=weight_1)
-
-                gate_input = paddle.add(gate_input, bias)
-                i = paddle.slice(
-                    gate_input, axes=[1], starts=[0], ends=[hidden_size]
-                )
-                j = paddle.slice(
-                    gate_input,
-                    axes=[1],
-                    starts=[hidden_size],
-                    ends=[hidden_size * 2],
-                )
-                f = paddle.slice(
-                    gate_input,
-                    axes=[1],
-                    starts=[hidden_size * 2],
-                    ends=[hidden_size * 3],
-                )
-                o = paddle.slice(
-                    gate_input,
-                    axes=[1],
-                    starts=[hidden_size * 3],
-                    ends=[hidden_size * 4],
-                )
-
-                c = pre_cell * paddle.nn.functional.sigmoid(
-                    f
-                ) + paddle.nn.functional.sigmoid(i) * paddle.tanh(j)
-                m = paddle.tanh(c) * paddle.nn.functional.sigmoid(o)
-
-                rnn.update_memory(pre_hidden, m)
-                rnn.update_memory(pre_cell, c)
-
-                rnn.step_output(m)
-                rnn.step_output(c)
-
-                input = m
-
-                if dropout is not None and dropout > 0.0:
-                    input = paddle.nn.functional.dropout(
-                        input,
-                        p=dropout,
-                        mode='upscale_in_train',
-                    )
-
-            rnn.step_output(input)
-        rnnout = rnn()
-
-        last_hidden_array = []
-        last_cell_array = []
-        real_res = rnnout[-1]
-        for i in range(num_layers):
-            m = rnnout[i * 2]
-            c = rnnout[i * 2 + 1]
-            m.stop_gradient = True
-            c.stop_gradient = True
-            last_h = paddle.slice(
-                m, axes=[0], starts=[num_steps - 1], ends=[num_steps]
-            )
-            last_hidden_array.append(last_h)
-            last_c = paddle.slice(
-                c, axes=[0], starts=[num_steps - 1], ends=[num_steps]
-            )
-            last_cell_array.append(last_c)
-        real_res = paddle.transpose(x=real_res, perm=[1, 0, 2])
-        last_hidden = paddle.concat(last_hidden_array, 0)
-        last_cell = paddle.concat(last_cell_array, 0)
-
-        return real_res, last_hidden, last_cell
-
     def encoder_static(
         input_embedding, len=3, init_hidden=None, init_cell=None
     ):
@@ -381,14 +262,7 @@ def lm_model(
             mode='upscale_in_train',
         )
 
-    if rnn_model == "padding":
-        rnn_out, last_hidden, last_cell = padding_rnn(
-            x_emb,
-            len=num_steps,
-            init_hidden=init_hidden_reshape,
-            init_cell=init_cell_reshape,
-        )
-    elif rnn_model == "static":
+    if rnn_model == "static":
         rnn_out, last_hidden, last_cell = encoder_static(
             x_emb,
             len=num_steps,
@@ -622,36 +496,6 @@ class PaddingRNNTestBase(unittest.TestCase):
             ppl = np.append(ppl, train_ppl)
         return ppl
 
-    def compare_padding_static_mode(self, use_program_cache=True):
-        '''
-        Test that train ppl of padding mode is same to that of static graph mode
-        '''
-        config = RNNConfig('test', 'padding')
-        with fluid.scope_guard(fluid.Scope()):
-            padding_rnn_ppl = self.train(config, use_program_cache)
-        config = RNNConfig('test', 'static')
-        with fluid.scope_guard(fluid.Scope()):
-            static_rnn_ppl = self.train(config, use_program_cache)
-        np.testing.assert_allclose(padding_rnn_ppl, static_rnn_ppl, rtol=0.001)
-
-
-class EagerDeletionPaddingRNNTest(PaddingRNNTestBase):
-    def test_padding_mode_no_eager_deletion(self):
-        '''
-        Test that train ppl of padding mode is same to that of static graph mode without eager deletion
-        '''
-        fluid.core._set_eager_deletion_mode(-1.0, 1.0, True)
-        # When parallel is True, use_program_cache does not make a difference.
-        self.compare_padding_static_mode(use_program_cache=True)
-
-    def test_padding_mode_eager_deletion(self):
-        '''
-        Test that train ppl of padding mode is same to that of static graph mode under eager deletion
-        '''
-        fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
-        # When parallel is True, use_program_cache does not make a difference.
-        self.compare_padding_static_mode(use_program_cache=True)
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/legacy_test/test_eager_deletion_recurrent_op.py b/test/legacy_test/test_eager_deletion_recurrent_op.py
index c01b05d1e462f041f00f27a17f87b50feadda114..7f98c7dbe387f86222efd98937f5f8e0f7ab0ab7 100644
--- a/test/legacy_test/test_eager_deletion_recurrent_op.py
+++ b/test/legacy_test/test_eager_deletion_recurrent_op.py
@@ -19,10 +19,6 @@ import numpy as np
 
 import paddle
 from paddle import fluid
-from paddle.fluid import ParamAttr, core, layers
-from paddle.fluid.backward import append_backward
-from paddle.fluid.executor import Executor
-from paddle.fluid.framework import Program, grad_var_name
 
 paddle.enable_static()
 
@@ -31,646 +27,6 @@ os.environ["CPU_NUM"] = "1"
 fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
 
 
-class PyRNNBase:
-    def __init__(self, input_shape, output_shape):
-        self.x = np.ones(shape=input_shape).astype("float32")
-        self.y = np.zeros(shape=output_shape).astype("float32")
-
-    def step(self, step_id, x):
-        raise NotImplementedError
-
-    def forward(self):
-        for step_id in range(self.x.shape[0]):
-            self.step(step_id, self.x[step_id])
-        return np.mean(self.y)
-
-    def segment_inputs(self):
-        return [self.x[i] for i in range(self.x.shape[0])]
-
-
-class PySimpleRNN1(PyRNNBase):
-    def __init__(self, input_shape, output_shape):
-        super().__init__(input_shape, output_shape)
-
-        seq_len, batch_size, input_dim = input_shape
-        self.h_boot = np.random.normal(size=(batch_size, input_dim)).astype(
-            "float32"
-        )
-
-        self.scale = 1.0 / 2.0
-        men_dim = (seq_len, batch_size, input_dim)
-        self.mems = np.zeros(shape=men_dim).astype("float32")
-
-    def step(self, step_id, x):
-        if step_id == 0:
-            pre_mem = self.h_boot
-        else:
-            pre_mem = self.mems[step_id - 1]
-        self.mems[step_id] = (pre_mem + x) * self.scale
-        self.y[step_id] = self.mems[step_id]
-
-
-class PySimpleRNN2(PyRNNBase):
-    def __init__(self, input_shape, output_shape):
-        super().__init__(input_shape, output_shape)
-
-        seq_len, batch_size, input_dim = input_shape
-        self.W = np.ones(shape=(input_dim, input_dim)).astype("float32")
-        self.U = np.zeros(shape=(input_dim, input_dim)).astype("float32")
-        self.h_boot = np.ones(shape=(batch_size, input_dim)).astype("float32")
-
-        men_dim = (seq_len, batch_size, input_dim)
-        self.mems = np.zeros(shape=men_dim).astype("float32")
-
-    def step(self, step_id, x):
-        if step_id > 0:
-            pre_mem = self.mems[step_id - 1]
-        else:
-            pre_mem = self.h_boot
-        xW = np.matmul(x, self.W).astype("float32")
-        hU = np.matmul(pre_mem, self.U).astype("float32")
-
-        def py_sigmoid(x):
-            return 1.0 / (1.0 + np.exp(-x))
-
-        self.mems[step_id] = py_sigmoid(xW + hU)
-        self.y[step_id] = self.mems[step_id]
-
-
-def create_tensor(np_data, place):
-    tensor = core.LoDTensor()
-    tensor.set(np_data, place)
-    return tensor
-
-
-class EagerDeletionRecurrentOpTest1(unittest.TestCase):
-    '''
-    Test RNNOp
-    equation:
-        h_t = ( x_t + h_{t-1} ) / scale
-    vars:
-        - x
-    memories:
-        - h
-    outputs:
-        - h
-    '''
-
-    input_dim = 2
-    batch_size = 1
-    sent_len = 1
-
-    def setup_program(self):
-        self.main_program = Program()
-        self.startup_program = Program()
-        self.place = core.CPUPlace()
-
-    def setUp(self):
-        self.setup_program()
-        self.data_field = {"x", "h_boot"}
-
-        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape)
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            self.output = paddle.mean(self.create_rnn_op())
-
-    def create_rnn_op(self):
-        x = paddle.static.data(
-            shape=[self.sent_len, self.batch_size, self.input_dim],
-            dtype='float32',
-            name='x',
-        )
-        x.stop_gradient = False
-        h_boot = paddle.static.data(
-            shape=[-1, self.input_dim], dtype='float32', name='h_boot'
-        )
-        h_boot.stop_gradient = False
-
-        rnn = layers.StaticRNN()
-        with rnn.step():
-            h_pre = rnn.memory(init=h_boot)
-            x_t = rnn.step_input(x)
-
-            h = paddle.scale(
-                x=paddle.add(x=h_pre, y=x_t),
-                scale=self.py_rnn.scale,
-            )
-
-            rnn.update_memory(h_pre, h)
-            rnn.output(h)
-
-        return rnn()
-
-    def forward(self):
-        gc_vars = core._get_eager_deletion_vars(
-            self.main_program.desc, [self.output.name]
-        )
-        self.assertEqual(len(gc_vars), self.main_program.num_blocks)
-        self.feed_map = {
-            x: create_tensor(getattr(self.py_rnn, x), self.place)
-            for x in self.data_field
-        }
-        exe = Executor(self.place)
-        out = exe.run(
-            self.main_program, feed=self.feed_map, fetch_list=[self.output]
-        )
-
-        return out[0]
-
-    def backward(self):
-        self.feed_map = {
-            x: create_tensor(getattr(self.py_rnn, x), self.place)
-            for x in self.data_field
-        }
-        fetch_list = [
-            self.main_program.global_block().var(grad_var_name(x))
-            for x in self.data_field
-        ]
-
-        gc_vars = core._get_eager_deletion_vars(
-            self.main_program.desc, [var.name for var in fetch_list]
-        )
-        self.assertEqual(len(gc_vars), self.main_program.num_blocks)
-
-        exe = Executor(self.place)
-        return exe.run(
-            self.main_program,
-            feed=self.feed_map,
-            fetch_list=fetch_list,
-            return_numpy=False,
-        )
-
-    def test_backward(self, rtol=0.01):
-        self.check_forward()
-        num_grad = self.get_numerical_gradient()
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            append_backward(self.output)
-
-        ana_grad = [np.array(x) for x in self.backward()]
-
-        for idx, name in enumerate(self.data_field):
-            self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape)
-            np.testing.assert_allclose(
-                num_grad[idx],
-                ana_grad[idx],
-                rtol=rtol,
-                err_msg='num_grad ('
-                + name
-                + ') has diff at '
-                + str(self.place)
-                + '\nExpect '
-                + str(num_grad[idx])
-                + '\n'
-                + 'But Got'
-                + str(ana_grad[idx])
-                + ' in class '
-                + self.__class__.__name__,
-            )
-
-    def check_forward(self):
-        pd_output = self.forward()
-        py_output = self.py_rnn.forward()
-        self.assertEqual(pd_output.shape, py_output.shape)
-        np.testing.assert_allclose(pd_output, py_output, rtol=0.01)
-
-    def get_numerical_gradient(self, delta=0.005):
-        dloss_dout = 1.0
-        feed_list = [getattr(self.py_rnn, x) for x in self.data_field]
-        grad_list = [np.zeros_like(x) for x in feed_list]
-        for feed, grad in zip(feed_list, grad_list):
-            for f, g in np.nditer([feed, grad], op_flags=['readwrite']):
-                o = float(f)
-                f[...] = o + delta
-                y_pos = self.forward()
-
-                f[...] = o - delta
-                y_neg = self.forward()
-
-                f[...] = o
-                dout_dfeed = (y_pos - y_neg) / (delta * 2)
-                g[...] = dout_dfeed
-
-        return grad_list
-
-
-class EagerDeletionRecurrentOpTest2(EagerDeletionRecurrentOpTest1):
-    r'''
-    Test RNNOp
-    equation:
-        h_t = \sigma (W x_t + U h_{t-1})
-    weights:
-        - W
-        - U
-    vars:
-        - x
-    memories:
-        - h
-    outputs:
-       - h
-    '''
-
-    input_dim = 2
-    batch_size = 10
-    sent_len = 2
-
-    def setUp(self):
-        self.setup_program()
-
-        self.data_field = {"x", "h_boot", "W", "U"}
-
-        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = PySimpleRNN2(self.input_shape, self.output_shape)
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            self.output = paddle.mean(self.create_rnn_op())
-
-    def create_rnn_op(self):
-        x = paddle.static.data(
-            shape=[self.sent_len, self.batch_size, self.input_dim],
-            dtype='float32',
-            name='x',
-        )
-        x.stop_gradient = False
-        h_boot = paddle.static.data(
-            shape=[-1, self.input_dim], dtype='float32', name='h_boot'
-        )
-        h_boot.stop_gradient = False
-
-        rnn = layers.StaticRNN()
-        with rnn.step():
-            h_pre = rnn.memory(init=h_boot)
-            x_t = rnn.step_input(x)
-
-            temp_l = paddle.static.nn.fc(
-                x=x_t,
-                size=self.input_dim,
-                weight_attr=ParamAttr(
-                    name='W',
-                    initializer=paddle.nn.initializer.Constant(1.0),
-                ),
-                bias_attr=False,
-            )
-            temp_r = paddle.static.nn.fc(
-                x=h_pre,
-                size=self.input_dim,
-                weight_attr=ParamAttr(
-                    name='U',
-                    initializer=paddle.nn.initializer.Constant(0.0),
-                ),
-                bias_attr=False,
-            )
-
-            h = paddle.nn.functional.sigmoid(x=paddle.add(x=temp_l, y=temp_r))
-
-            rnn.update_memory(h_pre, h)
-            rnn.output(h)
-
-        return rnn()
-
-    def test_backward(self):
-        super().test_backward(rtol=0.01)
-
-
-class EagerDeletionRecurrentOpMultipleMemoryTest(EagerDeletionRecurrentOpTest1):
-    '''
-    Test RNNOp with two memories
-    equation:
-        h_1 = h_pre_1
-        h_2 = h_pre_2
-        y = h_1 + h_2
-    vars:
-        - x
-    memories:
-        - h_1, h_2
-    outputs:
-       - y
-    '''
-
-    class PySimpleRNN3(PyRNNBase):
-        def __init__(self, input_shape, output_shape):
-            super(
-                EagerDeletionRecurrentOpMultipleMemoryTest.PySimpleRNN3, self
-            ).__init__(input_shape, output_shape)
-
-            seq_len, batch_size, input_dim = input_shape
-            self.h_boot1 = np.random.normal(
-                size=(batch_size, input_dim)
-            ).astype("float32")
-            self.h_boot2 = np.random.normal(
-                size=(batch_size, input_dim)
-            ).astype("float32")
-
-            men_dim = (seq_len, batch_size, input_dim)
-            self.mems1 = np.zeros(shape=men_dim).astype("float32")
-            self.mems2 = np.zeros(shape=men_dim).astype("float32")
-
-        def step(self, step_id, x):
-            if step_id == 0:
-                pre_mem1 = self.h_boot1
-                pre_mem2 = self.h_boot2
-            else:
-                pre_mem1 = self.mems1[step_id - 1]
-                pre_mem2 = self.mems2[step_id - 1]
-            self.mems1[step_id] = pre_mem1
-            self.mems2[step_id] = pre_mem2
-            self.y[step_id] = self.mems1[step_id] + self.mems2[step_id] + x
-
-    input_dim = 1
-    batch_size = 1
-    sent_len = 2
-
-    def setUp(self):
-        self.setup_program()
-
-        self.data_field = {"x", "h_boot1", "h_boot2"}
-
-        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = EagerDeletionRecurrentOpMultipleMemoryTest.PySimpleRNN3(
-            self.input_shape, self.output_shape
-        )
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            self.output = paddle.mean(self.create_rnn_op())
-
-    def create_rnn_op(self):
-        x = paddle.static.data(
-            shape=[self.sent_len, self.batch_size, self.input_dim],
-            dtype='float32',
-            name='x',
-        )
-        x.stop_gradient = False
-        h_boot1 = paddle.static.data(
-            shape=[self.batch_size, self.input_dim],
-            dtype='float32',
-            name='h_boot1',
-        )
-        h_boot1.stop_gradient = False
-        h_boot2 = paddle.static.data(
-            shape=[self.batch_size, self.input_dim],
-            dtype='float32',
-            name='h_boot2',
-        )
-        h_boot2.stop_gradient = False
-
-        rnn = layers.StaticRNN()
-        with rnn.step():
-            h_pre1 = rnn.memory(init=h_boot1)
-            h_pre2 = rnn.memory(init=h_boot2)
-            x_t = rnn.step_input(x)
-
-            mem1 = paddle.scale(x=h_pre1, scale=1.0)
-            mem2 = paddle.scale(x=h_pre2, scale=1.0)
-            out = paddle.add_n([mem1, x_t, mem2])
-
-            rnn.update_memory(h_pre1, mem1)
-            rnn.update_memory(h_pre2, mem2)
-            rnn.output(out)
-
-        return rnn()
-
-
-class EagerDeletionRecurrentOpNoMemBootTest(EagerDeletionRecurrentOpTest1):
-    '''
-    Test RNNOp without memory boot
-    equation:
-        mem = x + mem_pre
-        y = mem
-    vars:
-        - x
-    memories:
-        - mem
-    outputs:
-       - y
-    '''
-
-    class PySimpleRNN4(PyRNNBase):
-        def __init__(self, input_shape, output_shape):
-            super(
-                EagerDeletionRecurrentOpNoMemBootTest.PySimpleRNN4, self
-            ).__init__(input_shape, output_shape)
-            men_dim = input_shape
-            self.mems = np.zeros(shape=men_dim).astype("float32")
-
-        def step(self, step_id, x):
-            if step_id == 0:
-                pre_mem = np.zeros_like(x)
-            else:
-                pre_mem = self.mems[step_id - 1]
-            self.mems[step_id] = pre_mem + x
-            self.y[step_id] = self.mems[step_id]
-
-    input_dim = 1
-    batch_size = 1
-    sent_len = 2
-
-    def setUp(self):
-        self.setup_program()
-
-        self.data_field = {"x"}
-
-        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = EagerDeletionRecurrentOpNoMemBootTest.PySimpleRNN4(
-            self.input_shape, self.output_shape
-        )
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            self.output = paddle.mean(self.create_rnn_op())
-
-    def create_rnn_op(self):
-        x = paddle.static.data(
-            shape=[self.sent_len, self.batch_size, self.input_dim],
-            dtype='float32',
-            name='x',
-        )
-        x.stop_gradient = False
-
-        rnn = layers.StaticRNN()
-        with rnn.step():
-            mem_pre = rnn.memory(shape=[-1, self.input_dim], batch_ref=x)
-            x_t = rnn.step_input(x)
-            mem = paddle.add(x=mem_pre, y=x_t)
-            rnn.update_memory(mem_pre, mem)
-            rnn.output(mem)
-
-        return rnn()
-
-
-class EagerDeletionTwoRecurrentOpsTest(EagerDeletionRecurrentOpTest1):
-    '''
-    Test RNNOp with two recurrent ops
-    equation:
-        first_rnn:
-            mem_inside = x + mem_pre_inside
-            first_inside_out = mem_inside
-        second_rnn:
-            mem = x + reduce_sum(rnn_inside_out)
-            y = mem + mem_pre
-    vars:
-        - x
-    memories:
-        - mem_inside
-        - mem
-    outputs:
-       - y
-    '''
-
-    class PySimpleRNN5(PyRNNBase):
-        def __init__(self, input_shape, output_shape):
-            super().__init__(input_shape, output_shape)
-            self.mem_0 = np.zeros(shape=input_shape).astype("float32")
-            self.mem_1 = np.zeros(shape=input_shape).astype("float32")
-            self.rnn_0_output = np.zeros(shape=input_shape).astype("float32")
-
-        def step(self, step_id, x):
-            # First Rnn
-            for step in range(self.x.shape[0]):
-                x_t = self.x[step]
-                pre_mem = (
-                    np.zeros_like(x_t) if step == 0 else self.mem_0[step - 1]
-                )
-                self.mem_0[step] = x_t + pre_mem
-                self.rnn_0_output[step] = self.mem_0[step]
-            # Second RNN
-            pre_mem = (
-                np.zeros_like(x) if step_id == 0 else self.mem_1[step_id - 1]
-            )
-            self.mem_1[step_id] = x + np.sum(self.rnn_0_output)
-            self.y[step_id] = self.mem_1[step_id] + pre_mem
-
-    input_dim = 1
-    batch_size = 1
-    sent_len = 1
-
-    def setUp(self):
-        self.setup_program()
-
-        self.data_field = {"x"}
-
-        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = EagerDeletionTwoRecurrentOpsTest.PySimpleRNN5(
-            self.input_shape, self.output_shape
-        )
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            self.output = paddle.mean(self.create_rnn_op())
-
-    def create_rnn_op(self):
-        x = paddle.static.data(
-            shape=[self.sent_len, self.batch_size, self.input_dim],
-            dtype='float32',
-            name='x',
-        )
-        x.stop_gradient = False
-
-        rnn_0 = layers.StaticRNN()
-        with rnn_0.step():
-            x_t = rnn_0.step_input(x)
-            mem_pre = rnn_0.memory(shape=[-1, self.input_dim], batch_ref=x)
-            mem = paddle.add(x=mem_pre, y=x_t)
-            rnn_0.update_memory(mem_pre, mem)
-            rnn_0.output(mem)
-
-        rnn_1 = layers.StaticRNN()
-        with rnn_1.step():
-            mem_pre = rnn_1.memory(shape=[-1, self.input_dim], batch_ref=x)
-            x_t = rnn_1.step_input(x)
-            last_rnn_output = rnn_0()
-            last_rnn_sum = paddle.sum(last_rnn_output)
-            mem = paddle.add(x=x_t, y=last_rnn_sum)
-            y = paddle.add(x=mem_pre, y=mem)
-            rnn_1.update_memory(mem_pre, mem)
-            rnn_1.output(y)
-        return rnn_1()
-
-
-class EagerDeletionFarwardOnlyRnnAndBackwardRnnTest(
-    EagerDeletionRecurrentOpTest1
-):
-    '''
-    Test one forward only RNN and one backward RNN in one program
-    '''
-
-    def setUp(self):
-        self.setup_program()
-        self.data_field = {"x", "h_boot"}
-
-        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape)
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            x = paddle.static.data(
-                shape=[self.sent_len, self.batch_size, self.input_dim],
-                dtype='float32',
-                name='x',
-            )
-            x.stop_gradient = False
-            h_boot = paddle.static.data(
-                shape=[-1, self.input_dim], dtype='float32', name='h_boot'
-            )
-            h_boot.stop_gradient = False
-
-            forward_only_rnn = layers.StaticRNN()
-            with forward_only_rnn.step():
-                h_pre = forward_only_rnn.memory(init=h_boot)
-                x_t = forward_only_rnn.step_input(x)
-
-                h = paddle.scale(
-                    x=paddle.add(x=h_pre, y=x_t),
-                    scale=self.py_rnn.scale,
-                )
-
-                forward_only_rnn.update_memory(h_pre, h)
-                forward_only_rnn.output(h)
-            forward_only_output = forward_only_rnn()
-            forward_only_output.stop_gradient = True
-            self.forward_only_output = paddle.mean(forward_only_output)
-
-            rnn = layers.StaticRNN()
-            with rnn.step():
-                h_pre = rnn.memory(init=h_boot)
-                x_t = rnn.step_input(x)
-
-                h = paddle.scale(
-                    x=paddle.add(x=h_pre, y=x_t),
-                    scale=self.py_rnn.scale,
-                )
-
-                rnn.update_memory(h_pre, h)
-                rnn.output(h)
-
-            self.output = paddle.mean(rnn())
-
-    def forward_two_rnn(self):
-        self.feed_map = {
-            x: create_tensor(getattr(self.py_rnn, x), self.place)
-            for x in self.data_field
-        }
-        exe = Executor(self.place)
-        out = exe.run(
-            self.main_program,
-            feed=self.feed_map,
-            fetch_list=[self.forward_only_output, self.output],
-        )
-
-        return out[0], out[1]
-
-    def check_forward(self):
-        forward_only_output, pd_output = self.forward_two_rnn()
-        py_output = self.py_rnn.forward()
-        self.assertEqual(forward_only_output.shape, py_output.shape)
-        self.assertEqual(pd_output.shape, py_output.shape)
-        np.testing.assert_allclose(forward_only_output, py_output, rtol=0.01)
-        np.testing.assert_allclose(pd_output, py_output, rtol=0.01)
-
-
 class RecurrentNet(paddle.nn.Layer):
     def __init__(self):
         super().__init__()
diff --git a/test/legacy_test/test_executor_and_use_program_cache.py b/test/legacy_test/test_executor_and_use_program_cache.py
index daf557be9e4d9767cc0c684d26e2dc2b4269d1ed..f4cc24e0604a551e729d4e94780063c0ae868c08 100644
--- a/test/legacy_test/test_executor_and_use_program_cache.py
+++ b/test/legacy_test/test_executor_and_use_program_cache.py
@@ -98,7 +98,7 @@ class ExecutorPaddingRNNTest(PaddingRNNTestBase):
             )
 
     def test_inference_output(self):
-        for rnn_model in ["static", "padding"]:
+        for rnn_model in ["static"]:
             # Set parallel to False to use the default executor.
             self.train_and_save_inference_program(
                 rnn_model=rnn_model, use_program_cache=True
@@ -166,4 +166,5 @@ class ExecutorPaddingRNNTest(PaddingRNNTestBase):
 
 
 if __name__ == '__main__':
+    paddle.enable_static()
     unittest.main()
diff --git a/test/legacy_test/test_recurrent_op.py b/test/legacy_test/test_recurrent_op.py
deleted file mode 100644
index 8874d955c06abcd0d228ff0200dbc044c5d45d0e..0000000000000000000000000000000000000000
--- a/test/legacy_test/test_recurrent_op.py
+++ /dev/null
@@ -1,710 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-
-import paddle
-from paddle import fluid
-from paddle.fluid import ParamAttr, core, layers
-from paddle.fluid.backward import append_backward
-from paddle.fluid.executor import Executor
-from paddle.fluid.framework import Program, grad_var_name
-
-np.random.seed(123)
-
-
-class PyRNNBase:
-    def __init__(self, input_shape, output_shape):
-        self.x = np.ones(shape=input_shape).astype("float32")
-        self.y = np.zeros(shape=output_shape).astype("float32")
-
-    def step(self, step_id, x):
-        raise NotImplementedError
-
-    def forward(self):
-        for step_id in range(self.x.shape[0]):
-            self.step(step_id, self.x[step_id])
-        return np.mean(self.y)
-
-    def segment_inputs(self):
-        return [self.x[i] for i in range(self.x.shape[0])]
-
-
-class PySimpleRNN1(PyRNNBase):
-    def __init__(self, input_shape, output_shape):
-        super().__init__(input_shape, output_shape)
-
-        seq_len, batch_size, input_dim = input_shape
-        self.h_boot = np.random.normal(size=(batch_size, input_dim)).astype(
-            "float32"
-        )
-
-        self.scale = 1.0 / 2.0
-        men_dim = (seq_len, batch_size, input_dim)
-        self.mems = np.zeros(shape=men_dim).astype("float32")
-
-    def step(self, step_id, x):
-        if step_id == 0:
-            pre_mem = self.h_boot
-        else:
-            pre_mem = self.mems[step_id - 1]
-        self.mems[step_id] = (pre_mem + x) * self.scale
-        self.y[step_id] = self.mems[step_id]
-
-
-class PySimpleRNN2(PyRNNBase):
-    def __init__(self, input_shape, output_shape):
-        super().__init__(input_shape, output_shape)
-
-        seq_len, batch_size, input_dim = input_shape
-        self.W = np.ones(shape=(input_dim, input_dim)).astype("float32")
-        self.U = np.zeros(shape=(input_dim, input_dim)).astype("float32")
-        self.h_boot = np.ones(shape=(batch_size, input_dim)).astype("float32")
-
-        men_dim = (seq_len, batch_size, input_dim)
-        self.mems = np.zeros(shape=men_dim).astype("float32")
-
-    def step(self, step_id, x):
-        if step_id > 0:
-            pre_mem = self.mems[step_id - 1]
-        else:
-            pre_mem = self.h_boot
-        xW = np.matmul(x, self.W).astype("float32")
-        hU = np.matmul(pre_mem, self.U).astype("float32")
-
-        def py_sigmoid(x):
-            return 1.0 / (1.0 + np.exp(-x))
-
-        self.mems[step_id] = py_sigmoid(xW + hU)
-        self.y[step_id] = self.mems[step_id]
-
-
-def create_tensor(np_data, place):
-    tensor = core.LoDTensor()
-    tensor.set(np_data, place)
-    return tensor
-
-
-class RecurrentOpTest1(unittest.TestCase):
-    '''
-    Test RNNOp
-    equation:
-        h_t = ( x_t + h_{t-1} ) / scale
-    vars:
-        - x
-    memories:
-        - h
-    outputs:
-        - h
-    '''
-
-    input_dim = 2
-    batch_size = 1
-    sent_len = 1
-
-    def setup_program(self):
-        self.main_program = Program()
-        self.startup_program = Program()
-        self.place = core.CPUPlace()
-
-    def setUp(self):
-        self.setup_program()
-        self.feed_data_field = {"x", "h_boot"}
-        self.grad_data_field = self.feed_data_field
-
-        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape)
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            self.output = paddle.mean(self.create_rnn_op())
-
-    def create_rnn_op(self):
-        x = paddle.static.data(
-            shape=[self.sent_len, self.batch_size, self.input_dim],
-            dtype='float32',
-            name='x',
-        )
-        x.stop_gradient = False
-        h_boot = paddle.static.data(
-            shape=[-1, self.input_dim], dtype='float32', name='h_boot'
-        )
-        h_boot.stop_gradient = False
-
-        rnn = layers.StaticRNN()
-        with rnn.step():
-            h_pre = rnn.memory(init=h_boot)
-            x_t = rnn.step_input(x)
-
-            h = paddle.scale(
-                x=paddle.add(x=h_pre, y=x_t),
-                scale=self.py_rnn.scale,
-            )
-
-            rnn.update_memory(h_pre, h)
-            rnn.output(h)
-
-        return rnn()
-
-    def forward(self):
-        self.feed_map = {
-            x: create_tensor(getattr(self.py_rnn, x), self.place)
-            for x in self.feed_data_field
-        }
-        exe = Executor(self.place)
-        out = exe.run(
-            self.main_program, feed=self.feed_map, fetch_list=[self.output]
-        )
-
-        return out[0]
-
-    def backward(self):
-        self.feed_map = {
-            x: create_tensor(getattr(self.py_rnn, x), self.place)
-            for x in self.feed_data_field
-        }
-        fetch_list = [
-            self.main_program.global_block().var(grad_var_name(x))
-            for x in self.grad_data_field
-        ]
-
-        exe = Executor(self.place)
-        return exe.run(
-            self.main_program,
-            feed=self.feed_map,
-            fetch_list=fetch_list,
-            return_numpy=False,
-        )
-
-    def test_backward(self, rtol=0.01):
-        self.check_forward()
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            append_backward(self.output)
-
-        ana_grad = [np.array(x) for x in self.backward()]
-
-        num_grad = self.get_numerical_gradient()
-        for idx, name in enumerate(self.grad_data_field):
-            self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape)
-            np.testing.assert_allclose(
-                num_grad[idx],
-                ana_grad[idx],
-                rtol=rtol,
-                atol=1e-8,
-                err_msg='num_grad ('
-                + name
-                + ') has diff at '
-                + str(self.place)
-                + '\nExpect '
-                + str(num_grad[idx])
-                + '\n'
-                + 'But Got'
-                + str(ana_grad[idx])
-                + ' in class '
-                + self.__class__.__name__,
-            )
-
-    def check_forward(self):
-        pd_output = self.forward()
-        py_output = self.py_rnn.forward()
-        self.assertEqual(pd_output.shape, py_output.shape)
-        np.testing.assert_allclose(pd_output, py_output, rtol=0.01)
-
-    def get_numerical_gradient(self, delta=0.005):
-        dloss_dout = 1.0
-        feed_list = [getattr(self.py_rnn, x) for x in self.grad_data_field]
-        grad_list = [np.zeros_like(x) for x in feed_list]
-        for feed, grad in zip(feed_list, grad_list):
-            for f, g in np.nditer([feed, grad], op_flags=['readwrite']):
-                o = float(f)
-                f[...] = o + delta
-                y_pos = self.forward()
-
-                f[...] = o - delta
-                y_neg = self.forward()
-
-                f[...] = o
-                dout_dfeed = (y_pos - y_neg) / (delta * 2)
-                g[...] = dout_dfeed
-
-        return grad_list
-
-
-class RecurrentOpTest2(RecurrentOpTest1):
-    r'''
-    Test RNNOp
-    equation:
-        h_t = \sigma (W x_t + U h_{t-1})
-    weights:
-        - W
-        - U
-    vars:
-        - x
-    memories:
-        - h
-    outputs:
-       - h
-    '''
-
-    input_dim = 2
-    batch_size = 10
-    sent_len = 2
-
-    def setUp(self):
-        self.setup_program()
-
-        self.feed_data_field = {"x", "h_boot", "W", "U"}
-        self.grad_data_field = self.feed_data_field
-
-        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = PySimpleRNN2(self.input_shape, self.output_shape)
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            self.output = paddle.mean(self.create_rnn_op())
-
-    def create_rnn_op(self):
-        x = paddle.static.data(
-            shape=[self.sent_len, self.batch_size, self.input_dim],
-            dtype='float32',
-            name='x',
-        )
-        x.stop_gradient = False
-        h_boot = paddle.static.data(
-            shape=[-1, self.input_dim], dtype='float32', name='h_boot'
-        )
-        h_boot.stop_gradient = False
-
-        rnn = layers.StaticRNN()
-        with rnn.step():
-            h_pre = rnn.memory(init=h_boot)
-            x_t = rnn.step_input(x)
-
-            temp_l = paddle.static.nn.fc(
-                x=x_t,
-                size=self.input_dim,
-                weight_attr=ParamAttr(
-                    name='W',
-                    initializer=paddle.nn.initializer.Constant(1.0),
-                ),
-                bias_attr=False,
-            )
-            temp_r = paddle.static.nn.fc(
-                x=h_pre,
-                size=self.input_dim,
-                weight_attr=ParamAttr(
-                    name='U',
-                    initializer=paddle.nn.initializer.Constant(0.0),
-                ),
-                bias_attr=False,
-            )
-
-            h = paddle.nn.functional.sigmoid(x=paddle.add(x=temp_l, y=temp_r))
-
-            rnn.update_memory(h_pre, h)
-            rnn.output(h)
-
-        return rnn()
-
-    def test_backward(self):
-        super().test_backward(rtol=0.01)
-
-
-class RecurrentOpMultipleMemoryTest(RecurrentOpTest1):
-    '''
-    Test RNNOp with two memories
-    equation:
-        h_1 = h_pre_1
-        h_2 = h_pre_2
-        y = h_1 + h_2
-    vars:
-        - x
-    memories:
-        - h_1, h_2
-    outputs:
-       - y
-    '''
-
-    class PySimpleRNN3(PyRNNBase):
-        def __init__(self, input_shape, output_shape):
-            super().__init__(input_shape, output_shape)
-
-            seq_len, batch_size, input_dim = input_shape
-            self.h_boot1 = np.random.normal(
-                size=(batch_size, input_dim)
-            ).astype("float32")
-            self.h_boot2 = np.random.normal(
-                size=(batch_size, input_dim)
-            ).astype("float32")
-
-            men_dim = (seq_len, batch_size, input_dim)
-            self.mems1 = np.zeros(shape=men_dim).astype("float32")
-            self.mems2 = np.zeros(shape=men_dim).astype("float32")
-
-        def step(self, step_id, x):
-            if step_id == 0:
-                pre_mem1 = self.h_boot1
-                pre_mem2 = self.h_boot2
-            else:
-                pre_mem1 = self.mems1[step_id - 1]
-                pre_mem2 = self.mems2[step_id - 1]
-            self.mems1[step_id] = pre_mem1
-            self.mems2[step_id] = pre_mem2
-            self.y[step_id] = self.mems1[step_id] + self.mems2[step_id] + x
-
-    input_dim = 1
-    batch_size = 1
-    sent_len = 2
-
-    def setUp(self):
-        self.setup_program()
-
-        self.feed_data_field = {"x", "h_boot1", "h_boot2"}
-        self.grad_data_field = self.feed_data_field
-
-        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = RecurrentOpMultipleMemoryTest.PySimpleRNN3(
-            self.input_shape, self.output_shape
-        )
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            self.output = paddle.mean(self.create_rnn_op())
-
-    def create_rnn_op(self):
-        x = paddle.static.data(
-            shape=[self.sent_len, self.batch_size, self.input_dim],
-            dtype='float32',
-            name='x',
-        )
-        x.stop_gradient = False
-        h_boot1 = paddle.static.data(
-            shape=[self.batch_size, self.input_dim],
-            dtype='float32',
-            name='h_boot1',
-        )
-        h_boot1.stop_gradient = False
-        h_boot2 = paddle.static.data(
-            shape=[self.batch_size, self.input_dim],
-            dtype='float32',
-            name='h_boot2',
-        )
-        h_boot2.stop_gradient = False
-
-        rnn = layers.StaticRNN()
-        with rnn.step():
-            h_pre1 = rnn.memory(init=h_boot1)
-            h_pre2 = rnn.memory(init=h_boot2)
-            x_t = rnn.step_input(x)
-
-            mem1 = paddle.scale(x=h_pre1, scale=1.0)
-            mem2 = paddle.scale(x=h_pre2, scale=1.0)
-            out = paddle.add_n([mem1, x_t, mem2])
-
-            rnn.update_memory(h_pre1, mem1)
-            rnn.update_memory(h_pre2, mem2)
-            rnn.output(out)
-
-        return rnn()
-
-
-class RecurrentOpNoMemBootTest(RecurrentOpTest1):
-    '''
-    Test RNNOp with two memories
-    equation:
-        mem = x + mem_pre
-        y = mem
-    vars:
-        - x
-    memories:
-        - mem
-    outputs:
-       - y
-    '''
-
-    class PySimpleRNN4(PyRNNBase):
-        def __init__(self, input_shape, output_shape):
-            super().__init__(input_shape, output_shape)
-            men_dim = input_shape
-            self.mems = np.zeros(shape=men_dim).astype("float32")
-
-        def step(self, step_id, x):
-            if step_id == 0:
-                pre_mem = np.zeros_like(x)
-            else:
-                pre_mem = self.mems[step_id - 1]
-            self.mems[step_id] = pre_mem + x
-            self.y[step_id] = self.mems[step_id]
-
-    input_dim = 1
-    batch_size = 1
-    sent_len = 2
-
-    def setUp(self):
-        self.setup_program()
-
-        self.feed_data_field = {"x"}
-        self.grad_data_field = self.feed_data_field
-
-        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = RecurrentOpNoMemBootTest.PySimpleRNN4(
-            self.input_shape, self.output_shape
-        )
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            self.output = paddle.mean(self.create_rnn_op())
-
-    def create_rnn_op(self):
-        x = paddle.static.data(
-            shape=[self.sent_len, self.batch_size, self.input_dim],
-            dtype='float32',
-            name='x',
-        )
-        x.stop_gradient = False
-
-        rnn = layers.StaticRNN()
-        with rnn.step():
-            mem_pre = rnn.memory(shape=[-1, self.input_dim], batch_ref=x)
-            x_t = rnn.step_input(x)
-            mem = paddle.add(x=mem_pre, y=x_t)
-            rnn.update_memory(mem_pre, mem)
-            rnn.output(mem)
-
-        return rnn()
-
-
-class RecurrentOpSubBlockTest(RecurrentOpTest1):
-    r'''
-    Test RNNOp with subblock variable
-    equation:
-        y_ = emb * w1
-        h_t = \concat([x, h_{t-1}])
-        h_t = h_t * w2
-        h_t = \\unsqueeze(h_t, 1)
-        h_t = \dot_attention(h_t, y_)
-        h_t = \squeeze(h_t, 1)
-        y = h_t
-    vars:
-        - x
-        - w1
-        - w2
-    memories:
-        - h
-    outputs:
-       - y
-    '''
-
-    class PySimpleRNN5(PyRNNBase):
-        def __init__(self, input_shape, output_shape):
-            super().__init__(input_shape, output_shape)
-
-            seq_len, batch_size, input_dim = input_shape
-            self.w1 = np.random.uniform(
-                -0.1, 0.1, size=(input_dim, input_dim)
-            ).astype("float32")
-            self.w2 = np.random.uniform(
-                -0.1, 0.1, size=(input_dim * 2, input_dim)
-            ).astype("float32")
-
-            self.emb = np.random.uniform(
-                -0.1, 0.1, size=(seq_len, batch_size, input_dim)
-            ).astype("float32")
-
-            men_dim = (seq_len, batch_size, input_dim)
-            self.mems = np.zeros(shape=men_dim).astype("float32")
-            self.oy = np.matmul(self.emb, self.w1)
-
-        def step(self, step_id, x):
-            def dot_attention(query, memory):
-                attn = np.matmul(query, memory.transpose((0, 2, 1)))
-                weight = softmax(attn)
-                weight_memory = np.matmul(weight, memory)
-                return weight_memory, weight
-
-            def softmax(x):
-                return np.exp(x) / sum(np.exp(x))
-
-            if step_id == 0:
-                pre_mem = np.zeros_like(x)
-            else:
-                pre_mem = self.mems[step_id - 1]
-            concat_in = np.concatenate([x, pre_mem], 1)
-            new_mem = np.matmul(concat_in, self.w2)
-
-            new_mem = np.expand_dims(new_mem, 1)
-            new_mem, _ = dot_attention(new_mem, self.oy)
-            new_mem = np.squeeze(new_mem, 1)
-
-            self.mems[step_id] = new_mem
-            self.y[step_id] = self.mems[step_id]
-
-    input_dim = 2
-    batch_size = 3
-    sent_len = 3
-
-    def setUp(self):
-        self.setup_program()
-
-        self.feed_data_field = {"x", "emb", "w1", "w2"}
-        self.grad_data_field = self.feed_data_field
-
-        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = RecurrentOpSubBlockTest.PySimpleRNN5(
-            self.input_shape, self.output_shape
-        )
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            rnn_out = self.create_rnn_op()
-            self.output = paddle.mean(rnn_out)
-
-    def create_rnn_op(self):
-        x = paddle.static.data(
-            shape=[self.sent_len, self.batch_size, self.input_dim],
-            dtype='float32',
-            name='x',
-        )
-        x.stop_gradient = False
-
-        emb = paddle.static.data(
-            name='emb',
-            shape=[self.sent_len, self.batch_size, self.input_dim],
-            dtype='float32',
-        )
-        emb.stop_gradient = False
-
-        w1 = paddle.static.data(
-            shape=[self.input_dim, self.input_dim],
-            dtype='float32',
-            name='w1',
-        )
-        w1.stop_gradient = False
-        w2 = paddle.static.data(
-            shape=[self.input_dim * 2, self.input_dim],
-            dtype='float32',
-            name='w2',
-        )
-        w2.stop_gradient = False
-
-        rnn = layers.StaticRNN()
-
-        def dot_attention(query, memory):
-            attn = paddle.matmul(query, memory, transpose_y=True)
-            weight = paddle.nn.functional.softmax(attn)
-            weight_memory = paddle.matmul(weight, memory)
-
-            return weight_memory, weight
-
-        y = paddle.matmul(emb, w1)
-        with rnn.step():
-            pre_h = rnn.memory(
-                shape=(self.sent_len, self.input_dim),
-                batch_ref=x,
-                init_value=0.0,
-            )
-            step_in = rnn.step_input(x)
-            concat_in = paddle.concat([step_in, pre_h], 1)
-            new_h = paddle.matmul(concat_in, w2)
-            new_h = paddle.unsqueeze(new_h, [1])
-            new_h, _ = dot_attention(new_h, y)
-            new_h = paddle.squeeze(new_h, [1])
-
-            rnn.update_memory(pre_h, new_h)
-            rnn.step_output(new_h)
-
-        return rnn()
-
-
-class RecurrentOpStopGradientTest(RecurrentOpTest1):
-    r"""
-    Test RNNOp with stop_gradient = True
-    equation:
-        h_t = \sigma (W x_t + U h_{t-1})
-    weights:
-        - W
-        - U
-    vars:
-        - x
-    memories:
-        - h
-    output:
-        - h
-    """
-
-    input_dim = 2
-    batch_size = 10
-    sent_len = 2
-
-    def setUp(self):
-        self.setup_program()
-        self.feed_data_field = {"x", "h_boot", "W", "U"}
-        self.grad_data_field = {"x", "W", "U"}
-
-        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = PySimpleRNN2(self.input_shape, self.output_shape)
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            self.output = paddle.mean(self.create_rnn_op())
-
-    def create_rnn_op(self):
-        x = paddle.static.data(
-            shape=[self.sent_len, self.batch_size, self.input_dim],
-            dtype="float32",
-            name="x",
-        )
-        x.stop_gradient = False
-        h_boot = paddle.static.data(
-            shape=[-1, self.input_dim], dtype="float32", name="h_boot"
-        )
-        h_boot.stop_gradient = True
-
-        rnn = layers.StaticRNN()
-        with rnn.step():
-            h_pre = rnn.memory(init=h_boot)  # init doesn't have gradient
-            x_t = rnn.step_input(x)
-
-            temp_l = paddle.static.nn.fc(
-                x=x_t,
-                size=self.input_dim,
-                weight_attr=ParamAttr(
-                    name="W",
-                    initializer=paddle.nn.initializer.Constant(1.0),
-                ),
-                bias_attr=False,
-            )
-            temp_r = paddle.static.nn.fc(
-                x=h_pre,
-                size=self.input_dim,
-                weight_attr=ParamAttr(
-                    name="U",
-                    initializer=paddle.nn.initializer.Constant(0.0),
-                ),
-                bias_attr=False,
-            )
-
-            h = paddle.nn.functional.sigmoid(x=paddle.add(temp_l, temp_r))
-
-            rnn.update_memory(h_pre, h)
-            rnn.output(h)
-
-        return rnn()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/legacy_test/test_select_input_output_op.py b/test/legacy_test/test_select_input_output_op.py
index 215adf238f0471be2bffadc77e08cf711089fe1f..99292cbe2f25e6879c46aa0314d46e1f9196c735 100644
--- a/test/legacy_test/test_select_input_output_op.py
+++ b/test/legacy_test/test_select_input_output_op.py
@@ -22,7 +22,7 @@ from paddle.fluid import core
 from paddle.fluid.backward import append_backward
 from paddle.fluid.executor import Executor
 from paddle.fluid.framework import Program, program_guard
-from paddle.fluid.layers.control_flow import select_input, select_output
+from paddle.static.nn.control_flow import select_input, select_output
 
 paddle.enable_static()