diff --git a/python/paddle/fluid/dataloader/collate.py b/python/paddle/fluid/dataloader/collate.py
index 50b86ca41e53b7dafeea940746720662bf398d56..661a0de13cd517dfb4aead02d196efd6818b7c37 100644
--- a/python/paddle/fluid/dataloader/collate.py
+++ b/python/paddle/fluid/dataloader/collate.py
@@ -58,7 +58,7 @@ def default_collate_fn(batch):
         batch = np.stack(batch, axis=0)
         return batch
     elif isinstance(sample, (paddle.Tensor, core.eager.Tensor)):
-        return layers.stack(batch, axis=0)
+        return paddle.stack(batch, axis=0)
     elif isinstance(sample, numbers.Number):
         batch = np.array(batch)
         return batch
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 6ff8f22a719215b4929b9464c763aa73654361d9..494fd6d47a62cdbc15b8095d312daaa1e2e1b236 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -137,7 +137,6 @@ __all__ = [
     'brelu',
     'leaky_relu',
     'flatten',
-    'stack',
     'pad2d',
     'unique',
     'unique_with_counts',
@@ -10247,147 +10246,6 @@ def flatten(x, axis=1, name=None):
     return out
 
 
-def stack(x, axis=0, name=None):
-    """
-
-    This OP stacks all the inputs :code:`x` along axis.
-
-    .. code-block:: text
-
-        Case 1:
-
-          Input:
-            x[0].shape = [1, 2]
-            x[0].data = [ [1.0 , 2.0 ] ]
-            x[1].shape = [1, 2]
-            x[1].data = [ [3.0 , 4.0 ] ]
-            x[2].shape = [1, 2]
-            x[2].data = [ [5.0 , 6.0 ] ]
-
-          Attrs:
-            axis = 0
-
-          Output:
-            Out.dims = [3, 1, 2]
-            Out.data =[ [ [1.0, 2.0] ],
-                        [ [3.0, 4.0] ],
-                        [ [5.0, 6.0] ] ]
-
-
-        Case 2:
-
-
-          Input:
-            x[0].shape = [1, 2]
-            x[0].data = [ [1.0 , 2.0 ] ]
-            x[1].shape = [1, 2]
-            x[1].data = [ [3.0 , 4.0 ] ]
-            x[2].shape = [1, 2]
-            x[2].data = [ [5.0 , 6.0 ] ]
-
-
-          Attrs:
-            axis = 1 or axis = -2
-
-          Output:
-            Out.shape = [1, 3, 2]
-            Out.data =[ [ [1.0, 2.0]
-                          [3.0, 4.0]
-                          [5.0, 6.0] ] ]
-
-
-    Args:
-        x (list(Variable)|tuple(Variable)): Input :code:`x` can be a :code:`list` or :code:`tuple` of Tensors, the shapes of all these Tensors
-                                     must be the same. Supposing input is N dims
-                                     Tensors :math:`[d_0, d_1, ..., d_{n-1}]`, the output is N+1 dims
-                                     Tensor :math:`[d_0, d_1, d_{axis-1}, len(x), d_{axis}, ..., d_{n-1}]`.
-                                     Supported data types: float32, float64, int32, int64.
-        axis (int, optional): The axis along which all inputs are stacked. ``axis`` range is ``[-(R+1), R+1)``,
-                              where ``R`` is the number of dimensions of the first input tensor ``x[0]``.
-                              If ``axis < 0``, ``axis = axis+R+1``. The default value of axis is 0.
-        name (str, optional): Please refer to :ref:`api_guide_Name`, Default None.
-
-
-    Returns:
-        Variable: The stacked Tensor, has same data type with input Tensors. Output dim is :math:`rank(x[0])+1`.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import paddle.fluid.layers as layers
-            # set batch size=None
-            x1 = fluid.data(name='x1', shape=[None, 1, 2], dtype='int32')
-            x2 = fluid.data(name='x2', shape=[None, 1, 2], dtype='int32')
-            # stack Tensor list
-            data = layers.stack([x1,x2]) # stack according to axis 0, data.shape=[2, None, 1, 2]
-
-            data = layers.stack([x1,x2], axis=1) # stack according to axis 1, data.shape=[None, 2, 1, 2]
-
-
-    """
-    axis = 0 if axis is None else axis
-
-    if in_dygraph_mode():
-        return _C_ops.stack(x, axis)
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.stack(x, 'axis', axis)
-
-    if not isinstance(x, list) and not isinstance(x, tuple):
-        # NOTE:(zhiqiu) Only support Variable as input if the Variable is a LOD_TENSOR_ARRAY create by create_array, array_write, array_read, etc.
-        # In that case, Variable is array of tensors indeed.
-        if (
-            isinstance(x, Variable)
-            and x.desc.type() == core.VarDesc.VarType.LOD_TENSOR_ARRAY
-        ):
-            x = [x]
-        else:
-            raise TypeError(
-                "The type of '%s' in %s must be %s, but received %s"
-                % (
-                    'x',
-                    'stack',
-                    'list[Tensor], tuple[Tensor] or TensorArray',
-                    type(x),
-                )
-            )
-
-    helper = LayerHelper('stack', **locals())
-
-    out = helper.create_variable_for_type_inference(x[0].dtype)
-    if x[0].desc.type() == core.VarDesc.VarType.LOD_TENSOR_ARRAY:
-        assert len(x) == 1, (
-            "If the elements of 'x' in stack are Variable(LoDTensorArray), "
-            "number of the elements must be 1, but received %s." % len(x)
-        )
-        out_index = helper.create_variable_for_type_inference(dtype="int32")
-
-        for i in x:
-            check_variable_and_dtype(
-                i,
-                'x',
-                ['float16', 'float32', 'float64', 'int32', 'int64'],
-                'stack',
-            )
-
-        helper.append_op(
-            type='tensor_array_to_tensor',
-            inputs={'X': x[0]},
-            outputs={'Out': [out], 'OutIndex': [out_index]},
-            attrs={'axis': axis, 'use_stack': True},
-        )
-    else:
-        helper.append_op(
-            type='stack',
-            inputs={'X': x},
-            outputs={'Y': out},
-            attrs={'axis': axis},
-        )
-
-    return out
-
-
 @templatedoc(op_type="filter_by_instag")
 def filter_by_instag(ins, ins_tag, filter_tag, is_lod, out_val_if_empty=0):
     """
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index fc0603e22736239b6710a2c39e9aa50d752c84f2..0104502a7ddbdc9ddbcf157d151d4f616e83612f 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -623,7 +623,7 @@ def _rnn_dynamic_graph(
         )
 
     final_outputs = map_structure(
-        lambda x: nn.stack(x.array, axis=time_step_index), outputs
+        lambda x: paddle.stack(x.array, axis=time_step_index), outputs
     )
 
     if is_reverse:
@@ -1167,7 +1167,7 @@ class BeamSearchDecoder(Decoder):
             ),
             [1, self.beam_size],
         )
-        topk_coordinates = nn.stack([batch_pos, indices], axis=2)
+        topk_coordinates = paddle.stack([batch_pos, indices], axis=2)
         topk_coordinates.stop_gradient = True
         return nn.gather_nd(x, topk_coordinates)
 
@@ -1546,7 +1546,9 @@ def _dynamic_decode_imperative(
         if max_step_num is not None and step_idx > max_step_num:
             break
 
-    final_outputs = map_structure(lambda x: nn.stack(x.array, axis=0), outputs)
+    final_outputs = map_structure(
+        lambda x: paddle.stack(x.array, axis=0), outputs
+    )
     final_states = states
 
     try:
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index 79766ba09f7912a113af41f312a2f55fd7b4b079..0f6652fdd5d7c1665d691cdd6724f73a39772522 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -571,8 +571,9 @@ def tensor_array_to_tensor(input, axis=1, name=None, use_stack=False):
         assert isinstance(
             input, list
         ), "The 'input' in tensor_array_to_tensor must be list"
-        from .nn import stack, concat
+        from .nn import concat
         from ..dygraph import to_variable
+        from paddle import stack
 
         op = stack if use_stack else concat
         res = op(input, axis=axis)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
index 76627c5a5ef0a805af483dcb7f909cbe32d8ab4e..e5b85be96b8ba5a011a293f5ed7fbcd731521bd0 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
@@ -279,7 +279,7 @@ class BertModelLayer(Layer):
         self_attn_mask = fluid.layers.scale(
             x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False
         )
-        n_head_self_attn_mask = fluid.layers.stack(
+        n_head_self_attn_mask = paddle.stack(
             x=[self_attn_mask] * self._n_head, axis=1
         )
         n_head_self_attn_mask.stop_gradient = True
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index d46778e838fc1576dd11f47766cbad77b73f71e4..20aa0870086f490bbd1d5ccf30492be18317fd2e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -196,7 +196,7 @@ class BaseModel(fluid.dygraph.Layer):
         return new_state
 
     def _gather(self, x, indices, batch_pos):
-        topk_coordinates = fluid.layers.stack([batch_pos, indices], axis=2)
+        topk_coordinates = paddle.stack([batch_pos, indices], axis=2)
         return fluid.layers.gather_nd(x, topk_coordinates)
 
     @declarative
@@ -288,7 +288,7 @@ class BaseModel(fluid.dygraph.Layer):
                     step_input = new_hidden
             dec_output.append(step_input)
 
-        dec_output = fluid.layers.stack(dec_output)
+        dec_output = paddle.stack(dec_output)
         dec_output = self.fc(self._transpose_batch_time(dec_output))
         loss = fluid.layers.softmax_with_cross_entropy(
             logits=dec_output, label=label, soft_label=False
@@ -498,8 +498,8 @@ class BaseModel(fluid.dygraph.Layer):
             predicted_ids.append(token_indices)
             parent_ids.append(beam_indices)
 
-        predicted_ids = fluid.layers.stack(predicted_ids)
-        parent_ids = fluid.layers.stack(parent_ids)
+        predicted_ids = paddle.stack(predicted_ids)
+        parent_ids = paddle.stack(parent_ids)
         predicted_ids = fluid.layers.gather_tree(predicted_ids, parent_ids)
         predicted_ids = self._transpose_batch_time(predicted_ids)
         return predicted_ids
@@ -680,7 +680,7 @@ class AttentionModel(fluid.dygraph.Layer):
         return new_state
 
     def _gather(self, x, indices, batch_pos):
-        topk_coordinates = fluid.layers.stack([batch_pos, indices], axis=2)
+        topk_coordinates = paddle.stack([batch_pos, indices], axis=2)
         return fluid.layers.gather_nd(x, topk_coordinates)
 
     def attention(self, query, enc_output, mask=None):
@@ -774,7 +774,7 @@ class AttentionModel(fluid.dygraph.Layer):
             enc_outputs.append(enc_step_input)
             enc_hidden, enc_cell = new_enc_hidden, new_enc_cell
 
-        enc_outputs = fluid.layers.stack(enc_outputs)
+        enc_outputs = paddle.stack(enc_outputs)
         enc_outputs = self._transpose_batch_time(enc_outputs)
 
         # train
@@ -815,7 +815,7 @@ class AttentionModel(fluid.dygraph.Layer):
             dec_output.append(out)
             dec_hidden, dec_cell = new_dec_hidden, new_dec_cell
 
-        dec_output = fluid.layers.stack(dec_output)
+        dec_output = paddle.stack(dec_output)
         dec_output = self.fc(self._transpose_batch_time(dec_output))
         loss = fluid.layers.softmax_with_cross_entropy(
             logits=dec_output, label=label, soft_label=False
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py
index 4f6984c5e05fc3d3c6bbd988c6b20b131a87eb87..f2914614e603e17e7428f9c50a134a46fad1b886 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py
@@ -121,7 +121,7 @@ def test_list_append_in_while_loop_with_stack(x, iter_num):
     while i < iter_num.numpy()[0]:
         a.append(x)
         i += 1
-    out = fluid.layers.stack(a, axis=1)
+    out = paddle.stack(a, axis=1)
     return out
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index 69839cf72b5c7cc879068d73e3f88db80b3fee2c..a2c6b4c225dcdc11ec8fcb8de5cdd08838a07bea 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -762,7 +762,7 @@ class Transformer(Layer):
             return probs
 
         def gather(input, indices, batch_pos):
-            topk_coordinates = fluid.layers.stack([batch_pos, indices], axis=2)
+            topk_coordinates = paddle.stack([batch_pos, indices], axis=2)
             return layers.gather_nd(input, topk_coordinates)
 
         # run encoder
@@ -876,8 +876,8 @@ class Transformer(Layer):
             if layers.reduce_all(finished).numpy():
                 break
 
-        predict_ids = layers.stack(predict_ids, axis=0)
-        parent_ids = layers.stack(parent_ids, axis=0)
+        predict_ids = paddle.stack(predict_ids, axis=0)
+        parent_ids = paddle.stack(parent_ids, axis=0)
         finished_seq = layers.transpose(
             layers.gather_tree(predict_ids, parent_ids), [1, 2, 0]
         )
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_stack_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_stack_op_ipu.py
index 1f1fbf6d789ae77a7561ec6ec0cd0ff3fb7d651c..e2e5405ff0266e414543c8cbc91ec97f439a4910 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_stack_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_stack_op_ipu.py
@@ -62,7 +62,7 @@ class TestBase(IPUOpTest):
         z = paddle.static.data(
             name=self.feed_list[2], shape=self.feed_shape[2], dtype='float32'
         )
-        out = paddle.fluid.layers.stack([x, y, z], **self.attrs)
+        out = paddle.stack([x, y, z], **self.attrs)
         self.fetch_list = [out.name]
 
     def run_model(self, exec_mode):
diff --git a/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py
index 8a9e8879b8226f00637a7774d92c3cefd190fb9e..e2509e12b270538f108b0de45ccb8aeb7441ac1f 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py
@@ -144,7 +144,7 @@ class TestStackAPIWithLoDTensorArray(unittest.TestCase):
             for i in range(self.iter_num):
                 fluid.layers.array_write(input, zero + i, tensor_array)
 
-            self.out_var = fluid.layers.stack(tensor_array, axis=self.axis)
+            self.out_var = paddle.stack(tensor_array, axis=self.axis)
 
     def test_case(self):
         self.assertTrue(self.out_var.shape[self.axis] == -1)
diff --git a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
index 3922f6a8c229d9a13f177195715d77a0ed467545..dd19157c32169f98602b22ff5d1a568da059198b 100644
--- a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
@@ -16,6 +16,7 @@ import numpy as np
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 import unittest
+import paddle
 
 
 def build_and_run_program(place, batch_size, beam_size, stop_gradient=False):
@@ -45,7 +46,7 @@ def build_and_run_program(place, batch_size, beam_size, stop_gradient=False):
             layers.unsqueeze(layers.range(0, bs, 1, dtype=bs.dtype), [1]),
             [1, beam_size],
         )
-        topk_coordinates = layers.stack([batch_pos, indices], axis=2)
+        topk_coordinates = paddle.stack([batch_pos, indices], axis=2)
         topk_coordinates.stop_gradient = stop_gradient
         score = layers.gather_nd(x, topk_coordinates)
         layers.increment(x=step_idx, value=1.0, in_place=True)
diff --git a/python/paddle/fluid/tests/unittests/test_stack_op.py b/python/paddle/fluid/tests/unittests/test_stack_op.py
index 0f71ebeac5b216d5e9950a82c5fe3e9cc807737e..15947f40f03decbcc6c9e5c6104c607d75bc9ede 100644
--- a/python/paddle/fluid/tests/unittests/test_stack_op.py
+++ b/python/paddle/fluid/tests/unittests/test_stack_op.py
@@ -172,7 +172,7 @@ class TestStackAPIWithLoDTensorArray(unittest.TestCase):
             for i in range(self.iter_num):
                 fluid.layers.array_write(input, zero + i, tensor_array)
 
-            self.out_var = fluid.layers.stack(tensor_array, axis=self.axis)
+            self.out_var = paddle.stack(tensor_array, axis=self.axis)
 
     def test_case(self):
         self.assertTrue(self.out_var.shape[self.axis] == -1)