From 33d90ae8eb344bed6734a9ee088616a515f260bc Mon Sep 17 00:00:00 2001
From: cyber-pioneer <116002591+cyber-pioneer@users.noreply.github.com>
Date: Fri, 2 Dec 2022 12:59:26 +0800
Subject: [PATCH] move paddle.fluid.layers.tensor.create_parameter to
 paddle.tensor.creation.create_parameter (#48579)

---
 python/paddle/__init__.py                     |  2 +-
 .../contrib/slim/quantization/adaround.py     |  3 +-
 .../slim/tests/test_quantization_pass.py      |  2 +-
 python/paddle/fluid/contrib/sparsity/asp.py   |  2 +-
 python/paddle/fluid/io.py                     | 20 ++---
 python/paddle/fluid/layers/tensor.py          | 83 ------------------
 .../auto_parallel/test_dist_op_cost.py        | 18 ++--
 .../unittests/ipu/test_weight_decay_ipu.py    |  2 +-
 ...n_reshape_transpose_matmul_v2_fuse_pass.py |  2 +-
 .../test_auto_parallel_reshard_dpmppp.py      |  4 +-
 .../test_auto_parallel_reshard_mppp.py        |  4 +-
 .../tests/unittests/test_calc_gradient.py     |  8 +-
 .../fluid/tests/unittests/test_cholesky_op.py |  2 +-
 .../tests/unittests/test_create_parameter.py  |  8 +-
 .../test_eager_deletion_padding_rnn.py        | 12 +--
 ...test_imperative_container_parameterlist.py |  6 +-
 .../test_imperative_load_static_param.py      |  4 +-
 .../fluid/tests/unittests/test_mul_nn_grad.py |  8 +-
 .../fluid/tests/unittests/test_nn_grad.py     |  4 +-
 .../tests/unittests/test_norm_nn_grad.py      | 13 ++-
 .../tests/unittests/test_optimizer_grad.py    |  6 +-
 python/paddle/framework/__init__.py           |  1 -
 python/paddle/static/__init__.py              |  4 +-
 python/paddle/static/nn/__init__.py           |  2 +-
 python/paddle/tensor/creation.py              | 85 ++++++++++++++++++-
 25 files changed, 150 insertions(+), 155 deletions(-)

diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index 95e179bb97..8e03c3b907 100755
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -93,6 +93,7 @@ from .tensor.attribute import shape  # noqa: F401
 from .tensor.attribute import real  # noqa: F401
 from .tensor.attribute import imag  # noqa: F401
 from .tensor.attribute import is_floating_point  # noqa: F401
+from .tensor.creation import create_parameter  # noqa: F401
 from .tensor.creation import to_tensor  # noqa: F401
 from .tensor.creation import diag  # noqa: F401
 from .tensor.creation import diagflat  # noqa: F401
@@ -326,7 +327,6 @@ from .framework.random import seed  # noqa: F401
 from .framework.random import get_cuda_rng_state  # noqa: F401
 from .framework.random import set_cuda_rng_state  # noqa: F401
 from .framework import ParamAttr  # noqa: F401
-from .framework import create_parameter  # noqa: F401
 from .framework import CPUPlace  # noqa: F401
 from .framework import IPUPlace  # noqa: F401
 from .framework import CUDAPlace  # noqa: F401
diff --git a/python/paddle/fluid/contrib/slim/quantization/adaround.py b/python/paddle/fluid/contrib/slim/quantization/adaround.py
index d6aff8d41c..b024c0d773 100644
--- a/python/paddle/fluid/contrib/slim/quantization/adaround.py
+++ b/python/paddle/fluid/contrib/slim/quantization/adaround.py
@@ -20,7 +20,6 @@ import paddle
 
 import paddle
 import paddle.fluid as fluid
-import paddle
 
 from ....log_helper import get_logger
 from .utils import (
@@ -148,7 +147,7 @@ class AdaRound:
         tensor_floor = np.floor(tensor_scale)
         tensor = tensor_scale - tensor_floor
         alpha = -np.log((ZETA - GAMMA) / (tensor - GAMMA) - 1)
-        self.alpha_v = fluid.layers.create_parameter(
+        self.alpha_v = paddle.create_parameter(
             shape=alpha.shape,
             dtype="float32",
             name=var_name + ".alpha",
diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
index be42ab5cf2..f64a047ea4 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
@@ -73,7 +73,7 @@ def residual_block(num, quant_skip_pattern=None):
         conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
         short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
         hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
-    matmul_weight = fluid.layers.create_parameter(
+    matmul_weight = paddle.create_parameter(
         shape=[1, 16, 32, 32], dtype='float32'
     )
     hidden = fluid.layers.matmul(hidden, matmul_weight, True, True)
diff --git a/python/paddle/fluid/contrib/sparsity/asp.py b/python/paddle/fluid/contrib/sparsity/asp.py
index d2165def6f..fda4170537 100644
--- a/python/paddle/fluid/contrib/sparsity/asp.py
+++ b/python/paddle/fluid/contrib/sparsity/asp.py
@@ -881,7 +881,7 @@ class ASPHelper:
             for param in params:
                 if ASPHelper._is_supported_layer(main_program, param.name):
                     if param.name not in asp_info.mask_vars:
-                        mask_param = layers.create_parameter(
+                        mask_param = paddle.create_parameter(
                             name=ASPHelper._get_mask_name(param.name),
                             shape=param.shape,
                             dtype=param.dtype,
diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py
index a308d5e261..10a91a15f8 100644
--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -214,8 +214,8 @@ def get_program_parameter(program):
 
             paddle.enable_static()
             data = fluid.data(name="img", shape=[64, 784])
-            w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
-            b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
+            w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+            b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
             list_para  = fluid.io.get_program_parameter(  fluid.default_main_program() )
     """
     return list(filter(is_parameter, program.list_vars()))
@@ -240,8 +240,8 @@ def get_program_persistable_vars(program):
 
             paddle.enable_static()
             data = fluid.data(name="img", shape=[64, 784])
-            w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
-            b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
+            w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+            b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
             list_para  = fluid.io.get_program_persistable_vars(  fluid.default_main_program() )
     """
     return list(filter(is_persistable, program.list_vars()))
@@ -356,8 +356,8 @@ def save_vars(
             startup_prog = fluid.Program()
             with fluid.program_guard(main_prog, startup_prog):
                 data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
-                w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
-                b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
+                w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+                b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
                 hidden_w = fluid.layers.matmul(x=data, y=w)
                 hidden_b = fluid.layers.elementwise_add(hidden_w, b)
             place = fluid.CPUPlace()
@@ -825,8 +825,8 @@ def load_vars(
             startup_prog = fluid.Program()
             with fluid.program_guard(main_prog, startup_prog):
                 data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
-                w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
-                b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
+                w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+                b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
                 hidden_w = fluid.layers.matmul(x=data, y=w)
                 hidden_b = fluid.layers.elementwise_add(hidden_w, b)
             place = fluid.CPUPlace()
@@ -1590,8 +1590,8 @@ def load_inference_model(
             startup_prog = fluid.Program()
             with fluid.program_guard(main_prog, startup_prog):
                 data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
-                w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32')
-                b = fluid.layers.create_parameter(shape=[200], dtype='float32')
+                w = paddle.create_parameter(shape=[784, 200], dtype='float32')
+                b = paddle.create_parameter(shape=[200], dtype='float32')
                 hidden_w = fluid.layers.matmul(x=data, y=w)
                 hidden_b = fluid.layers.elementwise_add(hidden_w, b)
             place = fluid.CPUPlace()
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index 1066efabf1..4c00061ae7 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -48,7 +48,6 @@ from paddle import _C_ops, _legacy_C_ops
 
 __all__ = [
     'create_tensor',
-    'create_parameter',
     'create_global_var',
     'cast',
     'tensor_array_to_tensor',
@@ -108,88 +107,6 @@ def create_tensor(dtype, name=None, persistable=False):
     )
 
 
-def create_parameter(
-    shape, dtype, name=None, attr=None, is_bias=False, default_initializer=None
-):
-    """
-        :api_attr: Static Graph
-
-    This function creates a parameter. The parameter is a learnable variable, which can have
-    gradient, and can be optimized.
-
-    NOTE: this is a very low-level API. This API is useful when you create
-    operator by your self. instead of using layers.
-
-    Parameters:
-        shape (list of int): Shape of the parameter
-        dtype (str): Data type of the parameter
-        name (str, optional): For detailed information, please refer to
-           :ref:`api_guide_Name` . Usually name is no need to set and None by default.
-        attr (ParamAttr, optional): Attributes of the parameter
-        is_bias (bool, optional): This can affect which default initializer is chosen
-                       when default_initializer is None. If is_bias,
-                       initializer.Constant(0.0) will be used. Otherwise,
-                       Xavier() will be used.
-        default_initializer (Initializer, optional): Initializer for the parameter
-
-    Returns:
-        The created parameter.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            paddle.enable_static()
-            W = paddle.static.create_parameter(shape=[784, 200], dtype='float32')
-    """
-    check_type(shape, 'shape', (list, tuple, numpy.ndarray), 'create_parameter')
-    for item in shape:
-        check_type(
-            item,
-            'item of shape',
-            (
-                int,
-                numpy.uint8,
-                numpy.int8,
-                numpy.int16,
-                numpy.int32,
-                numpy.int64,
-            ),
-            'create_parameter',
-        )
-
-    check_dtype(
-        dtype,
-        'dtype',
-        [
-            'bool',
-            'float16',
-            'float32',
-            'float64',
-            'int8',
-            'int16',
-            'int32',
-            'int64',
-            'uint8',
-        ],
-        'create_parameter',
-    )
-    check_type(attr, 'attr', (type(None), ParamAttr), 'create_parameter')
-    check_type(
-        default_initializer,
-        'default_initializer',
-        (type(None), Initializer),
-        'create_parameter',
-    )
-
-    helper = LayerHelper("create_parameter", **locals())
-    if attr is None:
-        attr = ParamAttr(name=name)
-    return helper.create_parameter(
-        attr, shape, convert_dtype(dtype), is_bias, default_initializer
-    )
-
-
 def create_global_var(
     shape, value, dtype, persistable=False, force_cpu=False, name=None
 ):
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py
index f3956cda20..163309f3a3 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py
@@ -152,7 +152,7 @@ class TestDistOpCost(unittest.TestCase):
                 out = paddle.transpose(out, [1, 0])  # [8, 2] [-1, 0]
 
                 # matmul
-                param1 = paddle.fluid.layers.create_parameter(
+                param1 = paddle.create_parameter(
                     [4, 8], paddle.float32
                 )  # [2, 8] [0, -1]
                 auto.shard_tensor(
@@ -160,7 +160,7 @@ class TestDistOpCost(unittest.TestCase):
                     auto.ProcessMesh([0, 1], dim_names=["x"]),
                     ["x", None],
                 )
-                param2 = paddle.fluid.layers.create_parameter(
+                param2 = paddle.create_parameter(
                     [8, 8], paddle.float32
                 )  # [8, 4] [-1, 0]
                 auto.shard_tensor(
@@ -171,7 +171,7 @@ class TestDistOpCost(unittest.TestCase):
                 out1 = paddle.fluid.layers.matmul(
                     out, param1
                 )  # [8, 8] [-1, -1]
-                tmp_param = paddle.fluid.layers.create_parameter(
+                tmp_param = paddle.create_parameter(
                     [8, 8], paddle.float32
                 )  # [8, 8] [-1, -1]
                 auto.shard_tensor(
@@ -263,7 +263,7 @@ class TestDistOpCost(unittest.TestCase):
                 out = paddle.transpose(out, [1, 0])  # [8, 2] [-1, 0]
 
                 # matmul_v2
-                param1 = paddle.fluid.layers.create_parameter(
+                param1 = paddle.create_parameter(
                     [4, 8], paddle.float32
                 )  # [2, 8] [0, -1]
                 auto.shard_tensor(
@@ -271,7 +271,7 @@ class TestDistOpCost(unittest.TestCase):
                     auto.ProcessMesh([0, 1], dim_names=["x"]),
                     ["x", None],
                 )
-                param2 = paddle.fluid.layers.create_parameter(
+                param2 = paddle.create_parameter(
                     [8, 8], paddle.float32
                 )  # [8, 4] [-1, 0]
                 auto.shard_tensor(
@@ -280,7 +280,7 @@ class TestDistOpCost(unittest.TestCase):
                     [None, "x"],
                 )
                 out1 = paddle.matmul(out, param1)  # [8, 8] [-1, -1]
-                tmp_param = paddle.fluid.layers.create_parameter(
+                tmp_param = paddle.create_parameter(
                     [8, 8], paddle.float32
                 )  # [8, 8] [-1, -1]
                 auto.shard_tensor(
@@ -370,7 +370,7 @@ class TestDistOpCost(unittest.TestCase):
                 out = paddle.transpose(out, [1, 0])  # [8, 2] [-1, 0]
 
                 # mul
-                param1 = paddle.fluid.layers.create_parameter(
+                param1 = paddle.create_parameter(
                     [4, 8], paddle.float32
                 )  # [2, 8] [0, -1]
                 auto.shard_tensor(
@@ -378,7 +378,7 @@ class TestDistOpCost(unittest.TestCase):
                     auto.ProcessMesh([0, 1], dim_names=["x"]),
                     ["x", None],
                 )
-                param2 = paddle.fluid.layers.create_parameter(
+                param2 = paddle.create_parameter(
                     [8, 8], paddle.float32
                 )  # [8, 4] [-1, 0]
                 auto.shard_tensor(
@@ -388,7 +388,7 @@ class TestDistOpCost(unittest.TestCase):
                 )
 
                 out1 = paddle.fluid.layers.mul(out, param1)  # [8, 8] [-1, -1]
-                tmp_param = paddle.fluid.layers.create_parameter(
+                tmp_param = paddle.create_parameter(
                     [8, 8], paddle.float32
                 )  # [8, 8] [-1, -1]
                 auto.shard_tensor(
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_weight_decay_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_weight_decay_ipu.py
index 7d63c06d47..08bee7c90d 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_weight_decay_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_weight_decay_ipu.py
@@ -72,7 +72,7 @@ class TestBase(IPUOpTest):
                 image = paddle.static.data(
                     name='image', shape=[1, 3, 10, 10], dtype='float32'
                 )
-                bias = paddle.fluid.layers.create_parameter(
+                bias = paddle.create_parameter(
                     shape=[1, 3, 10, 10], is_bias=True, dtype='float32'
                 )
                 add1 = image + bias
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py
index f6f8bcc17c..ad2f5777f2 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py
@@ -32,7 +32,7 @@ class TestReshapeTransposeMatmulV2OneDNNFusePass(InferencePassTest):
             data = fluid.data(
                 name="data", shape=self.data_shape, dtype="float32"
             )
-            weight = fluid.layers.create_parameter(
+            weight = paddle.create_parameter(
                 shape=self.weight_shape, dtype="float32"
             )
 
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py
index 8fc9003379..b9320eee82 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py
@@ -69,9 +69,7 @@ class MLPLayer(nn.Layer):
         out = self.linear0(out)
         out = F.gelu(out, approximate=True)
         out = self.linear1(out)
-        param = paddle.fluid.layers.create_parameter(
-            [1024, 4096], paddle.float32
-        )
+        param = paddle.create_parameter([1024, 4096], paddle.float32)
         auto.shard_tensor(param, PP_MESH_1, [None, "y"])
         out = paddle.fluid.layers.mul(out, param)
 
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py
index e3c284bd56..84309aeb8a 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py
@@ -79,9 +79,7 @@ class MLPLayer(nn.Layer):
         auto.shard_tensor(self.linear2.weight, PP_MESH_1, ["x", None])
         w_out = self.word_embeddings(input)
         out = self.linear0(w_out)
-        param = paddle.fluid.layers.create_parameter(
-            [4096, 4096], paddle.float32
-        )
+        param = paddle.create_parameter([4096, 4096], paddle.float32)
         auto.shard_tensor(param, PP_MESH_0, ["x", None])
         out = paddle.fluid.layers.mul(out, param)
         gelu_out = F.gelu(out, approximate=True)
diff --git a/python/paddle/fluid/tests/unittests/test_calc_gradient.py b/python/paddle/fluid/tests/unittests/test_calc_gradient.py
index 58c4ee6083..c6dcbc0cb7 100644
--- a/python/paddle/fluid/tests/unittests/test_calc_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_calc_gradient.py
@@ -27,8 +27,8 @@ class TestCalcGradient(unittest.TestCase):
         main = fluid.Program()
         startup = fluid.Program()
         with fluid.program_guard(main, startup):
-            x = layers.create_parameter(dtype="float32", shape=[5, 10])
-            y = layers.create_parameter(dtype="float32", shape=[10, 8])
+            x = paddle.create_parameter(dtype="float32", shape=[5, 10])
+            y = paddle.create_parameter(dtype="float32", shape=[10, 8])
             mul_out = layers.mul(x=x, y=y)
             mean_out = paddle.mean(mul_out)
             a = calc_gradient(mean_out, mul_out)
@@ -45,7 +45,7 @@ class TestDoubleGrad(unittest.TestCase):
         startup = fluid.Program()
         with fluid.program_guard(main, startup):
             net = lambda x: x * x
-            x = fluid.layers.create_parameter(
+            x = paddle.create_parameter(
                 name='x',
                 shape=[1],
                 dtype='float32',
@@ -66,7 +66,7 @@ class TestDoubleGrad(unittest.TestCase):
         main = fluid.Program()
         startup = fluid.Program()
         with fluid.program_guard(main, startup):
-            x = fluid.layers.create_parameter(
+            x = paddle.create_parameter(
                 name='x',
                 shape=[1],
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/test_cholesky_op.py b/python/paddle/fluid/tests/unittests/test_cholesky_op.py
index ffeb18b0ff..20ab3e73ab 100644
--- a/python/paddle/fluid/tests/unittests/test_cholesky_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cholesky_op.py
@@ -73,7 +73,7 @@ class TestCholeskyOp(OpTest):
         root_data = self.root_data[..., :3, :3]
         prog = fluid.Program()
         with fluid.program_guard(prog):
-            root = layers.create_parameter(
+            root = paddle.create_parameter(
                 dtype=root_data.dtype, shape=root_data.shape
             )
             root_t = paddle.transpose(root, self.trans_dims)
diff --git a/python/paddle/fluid/tests/unittests/test_create_parameter.py b/python/paddle/fluid/tests/unittests/test_create_parameter.py
index 75231c4047..e5bad11773 100644
--- a/python/paddle/fluid/tests/unittests/test_create_parameter.py
+++ b/python/paddle/fluid/tests/unittests/test_create_parameter.py
@@ -27,24 +27,24 @@ class TestCreateParameterError(unittest.TestCase):
         with program_guard(Program(), Program()):
 
             def test_shape():
-                fluid.layers.create_parameter(1, np.float32)
+                paddle.create_parameter(1, np.float32)
 
             self.assertRaises(TypeError, test_shape)
 
             def test_shape_item():
-                fluid.layers.create_parameter([1.0, 2.0, 3.0], "float32")
+                paddle.create_parameter([1.0, 2.0, 3.0], "float32")
 
             self.assertRaises(TypeError, test_shape_item)
 
             def test_attr():
-                fluid.layers.create_parameter(
+                paddle.create_parameter(
                     [1, 2, 3], np.float32, attr=np.array([i for i in range(6)])
                 )
 
             self.assertRaises(TypeError, test_attr)
 
             def test_default_initializer():
-                fluid.layers.create_parameter(
+                paddle.create_parameter(
                     [1, 2, 3],
                     np.float32,
                     default_initializer=np.array([i for i in range(6)]),
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
index 8ba799e84b..921e4a4e43 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
@@ -127,7 +127,7 @@ def lm_model(
         cell_array = []
         mask_array = []
         for i in range(num_layers):
-            weight_1 = layers.create_parameter(
+            weight_1 = paddle.create_parameter(
                 [hidden_size * 2, hidden_size * 4],
                 dtype="float32",
                 name="fc_weight1_" + str(i),
@@ -136,7 +136,7 @@ def lm_model(
                 ),
             )
             weight_1_arr.append(weight_1)
-            bias_1 = layers.create_parameter(
+            bias_1 = paddle.create_parameter(
                 [hidden_size * 4],
                 dtype="float32",
                 name="fc_bias1_" + str(i),
@@ -248,7 +248,7 @@ def lm_model(
         cell_array = []
         mask_array = []
         for i in range(num_layers):
-            weight_1 = layers.create_parameter(
+            weight_1 = paddle.create_parameter(
                 [hidden_size * 2, hidden_size * 4],
                 dtype="float32",
                 name="fc_weight1_" + str(i),
@@ -257,7 +257,7 @@ def lm_model(
                 ),
             )
             weight_1_arr.append(weight_1)
-            bias_1 = layers.create_parameter(
+            bias_1 = paddle.create_parameter(
                 [hidden_size * 4],
                 dtype="float32",
                 name="fc_bias1_" + str(i),
@@ -442,7 +442,7 @@ def lm_model(
 
     rnn_out = paddle.reshape(rnn_out, shape=[-1, num_steps, hidden_size])
 
-    softmax_weight = layers.create_parameter(
+    softmax_weight = paddle.create_parameter(
         [hidden_size, vocab_size],
         dtype="float32",
         name="softmax_weight",
@@ -450,7 +450,7 @@ def lm_model(
             low=-init_scale, high=init_scale
         ),
     )
-    softmax_bias = layers.create_parameter(
+    softmax_bias = paddle.create_parameter(
         [vocab_size],
         dtype="float32",
         name='softmax_bias',
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py b/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py
index 92957890e3..763210361f 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py
@@ -30,7 +30,7 @@ class MyLayer(fluid.Layer):
 
     def paddle_imperative_ParameterList(self, num_stacked_param):
         return paddle.nn.ParameterList(
-            [fluid.layers.create_parameter(shape=[2, 2], dtype='float32')]
+            [paddle.create_parameter(shape=[2, 2], dtype='float32')]
             * num_stacked_param
         )
 
@@ -53,13 +53,13 @@ class TestImperativeContainerParameterList(unittest.TestCase):
             loss = paddle.mean(res)
             loss.backward()
 
-            model.params[num_stacked_param - 1] = fluid.layers.create_parameter(
+            model.params[num_stacked_param - 1] = paddle.create_parameter(
                 shape=[2, 3], dtype='float32'
             )
             res = model(x)
             self.assertListEqual(res.shape, [5, 3])
             model.params.append(
-                fluid.layers.create_parameter(shape=[3, 4], dtype='float32')
+                paddle.create_parameter(shape=[3, 4], dtype='float32')
             )
             self.assertEqual(len(model.params), num_stacked_param + 1)
             res = model(x)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
index 528ddac3ff..2d80a3a1ee 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -149,10 +149,10 @@ class TestDygraphLoadStatic(unittest.TestCase):
             nodes_vector, edge_set, 6, 1, 2
         )
 
-        para1 = fluid.layers.create_parameter(
+        para1 = paddle.create_parameter(
             [100, 100], 'float32', name="weight_test_1"
         )
-        para2 = fluid.layers.create_parameter(
+        para2 = paddle.create_parameter(
             [20, 200], 'float32', name="weight_test_2"
         )
 
diff --git a/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py b/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py
index 099280161a..b6c3f03f97 100644
--- a/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py
@@ -31,8 +31,8 @@ class TestMulGradCheck(unittest.TestCase):
     def func(self, place):
         prog = fluid.Program()
         with fluid.program_guard(prog):
-            x = layers.create_parameter(dtype="float64", shape=[2, 8], name='x')
-            y = layers.create_parameter(dtype="float64", shape=[8, 4], name='y')
+            x = paddle.create_parameter(dtype="float64", shape=[2, 8], name='x')
+            y = paddle.create_parameter(dtype="float64", shape=[8, 4], name='y')
             z = layers.mul(x=x, y=y)
             gradient_checker.grad_check([x, y], z, place=place)
 
@@ -88,10 +88,10 @@ class TestMatmulDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
         typename = "float64"
-        x = layers.create_parameter(
+        x = paddle.create_parameter(
             dtype=typename, shape=self.x_shape, name='x'
         )
-        y = layers.create_parameter(
+        y = paddle.create_parameter(
             dtype=typename, shape=self.y_shape, name='y'
         )
         out = layers.matmul(
diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py
index a4030d8adb..23de988e15 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py
@@ -43,7 +43,7 @@ class TestSliceOpDoubleGradCheck(unittest.TestCase):
         self.ends = [3, 3, 6]
         self.axes = [0, 1, 2]
         self.x_arr = np.random.random([3, 4, 5, 2]).astype("float64")
-        self.inputs = layers.create_parameter(
+        self.inputs = paddle.create_parameter(
             dtype="float64", shape=[3, 4, 5, 2], name='x'
         )
 
@@ -61,7 +61,7 @@ class TestSliceOpDoubleGradCheckCase3(TestSliceOpDoubleGradCheck):
         self.ends = [3, 3, 3]
         self.axes = [0, 1, 2]
         self.x_arr = np.random.random([3, 3, 3]).astype("float64")
-        self.inputs = layers.create_parameter(
+        self.inputs = paddle.create_parameter(
             dtype="float64", shape=[3, 3, 3], name='x3'
         )
 
diff --git a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
index 3142d63afc..bbcb5ef7b9 100644
--- a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
@@ -21,7 +21,6 @@ from decorator_helper import prog_scope
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 
 
 class TestInstanceNormDoubleGradCheck(unittest.TestCase):
@@ -34,7 +33,7 @@ class TestInstanceNormDoubleGradCheck(unittest.TestCase):
             dtype = "float32"
             eps = 0.005
             atol = 1e-4
-            x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
+            x = paddle.create_parameter(dtype=dtype, shape=shape, name='x')
             z = paddle.static.nn.instance_norm(input=x)
             x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
             gradient_checker.double_grad_check(
@@ -62,7 +61,7 @@ class TestInstanceNormDoubleGradCheckWithoutParamBias(
             dtype = "float32"
             eps = 0.005
             atol = 1e-4
-            x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
+            x = paddle.create_parameter(dtype=dtype, shape=shape, name='x')
             z = paddle.static.nn.instance_norm(
                 input=x, param_attr=False, bias_attr=False
             )
@@ -85,7 +84,7 @@ class TestInstanceNormDoubleGradEagerCheck(unittest.TestCase):
             dtype = "float32"
             eps = 0.005
             atol = 1e-4
-            x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
+            x = paddle.create_parameter(dtype=dtype, shape=shape, name='x')
             z = paddle.nn.functional.instance_norm(x)
             x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
             # check for static mode
@@ -127,7 +126,7 @@ class TestInstanceNormDoubleGradEagerCheckWithParams(
             dtype = "float32"
             eps = 0.005
             atol = 1e-4
-            x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
+            x = paddle.create_parameter(dtype=dtype, shape=shape, name='x')
             z = paddle.nn.InstanceNorm2D(3)(x)
             x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
             # check for static mode
@@ -171,7 +170,7 @@ class TestBatchNormDoubleGradCheck(unittest.TestCase):
             dtype = "float32"
             eps = 0.005
             atol = 1e-4
-            x = layers.create_parameter(dtype=dtype, shape=self.shape, name='x')
+            x = paddle.create_parameter(dtype=dtype, shape=self.shape, name='x')
             z = fluid.layers.batch_norm(
                 input=x,
                 data_layout=self.data_layout,
@@ -251,7 +250,7 @@ class TestBatchNormDoubleGradCheckCase5(TestBatchNormDoubleGradCheck):
             chn = (
                 self.shape[1] if self.data_layout == 'NCHW' else self.shape[-1]
             )
-            x = layers.create_parameter(dtype=dtype, shape=self.shape, name='x')
+            x = paddle.create_parameter(dtype=dtype, shape=self.shape, name='x')
             z = fluid.layers.batch_norm(
                 input=x,
                 data_layout=self.data_layout,
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
index a9f7c5de8d..e20d563ebd 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
@@ -77,20 +77,20 @@ class SimpleNetWithCond:
             mean_out = mean(sum_all)
             optimizer.minimize(mean_out)
         """
-        param_x = fluid.layers.create_parameter(
+        param_x = paddle.create_parameter(
             dtype="float32",
             shape=self.shape,
             attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_x"),
             default_initializer=fluid.initializer.NumpyArrayInitializer(self.x),
         )
 
-        param_y = fluid.layers.create_parameter(
+        param_y = paddle.create_parameter(
             dtype="float32",
             shape=self.shape,
             attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_y"),
             default_initializer=fluid.initializer.NumpyArrayInitializer(self.y),
         )
-        param_z = fluid.layers.create_parameter(
+        param_z = paddle.create_parameter(
             dtype="float32",
             shape=self.shape,
             attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_z"),
diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py
index 11250e32d3..30d6379368 100755
--- a/python/paddle/framework/__init__.py
+++ b/python/paddle/framework/__init__.py
@@ -22,7 +22,6 @@ from .framework import set_grad_enabled  # noqa: F401
 from .framework import is_grad_enabled  # noqa: F401
 
 from ..fluid.param_attr import ParamAttr  # noqa: F401
-from ..fluid.layers.tensor import create_parameter  # noqa: F401
 from ..fluid.core import CPUPlace  # noqa: F401
 from ..fluid.core import IPUPlace  # noqa: F401
 from ..fluid.core import CUDAPlace  # noqa: F401
diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py
index 118fe0b58b..983138ce97 100644
--- a/python/paddle/static/__init__.py
+++ b/python/paddle/static/__init__.py
@@ -31,6 +31,9 @@ from .io import normalize_program  # noqa: F401
 from ..fluid import Scope  # noqa: F401
 from .input import data  # noqa: F401
 from .input import InputSpec  # noqa: F401
+
+from ..tensor.creation import create_parameter  # noqa: F401
+
 from ..fluid.executor import Executor  # noqa: F401
 from ..fluid.executor import global_scope  # noqa: F401
 from ..fluid.executor import scope_guard  # noqa: F401
@@ -67,7 +70,6 @@ from ..fluid.io import load_vars  # noqa: F401
 from ..fluid.io import save_vars  # noqa: F401
 from ..fluid.io import batch  # noqa: F401
 
-from ..fluid.layers import create_parameter  # noqa: F401
 from ..fluid.layers import create_global_var  # noqa: F401
 from ..fluid.contrib.layers import ctr_metric_bundle  # noqa: F401
 from ..fluid.layers import exponential_decay  # noqa: F401
diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py
index 8e3048b21c..3d3cc5f8a2 100755
--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -22,12 +22,12 @@ from .common import conv2d_transpose  # noqa: F401
 from .common import conv3d_transpose  # noqa: F401
 from .common import py_func  # noqa: F401
 
+from ...tensor.creation import create_parameter  # noqa: F401
 from ...fluid.layers import batch_norm  # noqa: F401
 from ...fluid.layers import bilinear_tensor_product  # noqa: F401
 from ...fluid.layers import case  # noqa: F401
 from ...fluid.layers import cond  # noqa: F401
 from ...fluid.layers import conv2d  # noqa: F401
-from ...fluid.layers import create_parameter  # noqa: F401
 from ...fluid.layers import crf_decoding  # noqa: F401
 from ...fluid.layers import layer_norm  # noqa: F401
 from ...fluid.layers import multi_box_head  # noqa: F401
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index e5005dbe16..c969ee3639 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -31,10 +31,14 @@ from ..fluid.data_feeder import (
     convert_dtype,
 )
 from ..fluid.framework import (
+    Variable,
     _in_eager_without_dygraph_check,
     _in_legacy_dygraph,
+    device_guard,
 )
+from ..fluid.initializer import Initializer
 from ..fluid.layers import utils
+from ..fluid.param_attr import ParamAttr
 from ..framework import (
     LayerHelper,
     _current_expected_place,
@@ -44,7 +48,6 @@ from ..framework import (
     core,
     in_dygraph_mode,
 )
-from ..static import Variable, device_guard
 
 __all__ = []
 
@@ -67,6 +70,86 @@ def _real_to_complex_dtype(dtype):
         return dtype
 
 
+def create_parameter(
+    shape, dtype, name=None, attr=None, is_bias=False, default_initializer=None
+):
+    """
+    This function creates a parameter. The parameter is a learnable variable, which can have
+    gradient, and can be optimized.
+
+    Note:
+        This is a very low-level API. This API is useful when you create operator by your self, instead of using layers.
+
+    Args:
+        shape (list of int): Shape of the parameter
+        dtype (str): Data type of the parameter
+        name (str, optional): For detailed information, please refer to
+           :ref:`api_guide_Name` . Usually name is no need to set and None by default.
+        attr (ParamAttr, optional): Attributes of the parameter
+        is_bias (bool, optional): This can affect which default initializer is chosen
+                       when default_initializer is None. If is_bias,
+                       initializer.Constant(0.0) will be used. Otherwise,
+                       Xavier() will be used.
+        default_initializer (Initializer, optional): Initializer for the parameter
+
+    Returns:
+        The created parameter.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            paddle.enable_static()
+            W = paddle.static.create_parameter(shape=[784, 200], dtype='float32')
+    """
+    check_type(shape, 'shape', (list, tuple, np.ndarray), 'create_parameter')
+    for item in shape:
+        check_type(
+            item,
+            'item of shape',
+            (
+                int,
+                np.uint8,
+                np.int8,
+                np.int16,
+                np.int32,
+                np.int64,
+            ),
+            'create_parameter',
+        )
+
+    check_dtype(
+        dtype,
+        'dtype',
+        [
+            'bool',
+            'float16',
+            'float32',
+            'float64',
+            'int8',
+            'int16',
+            'int32',
+            'int64',
+            'uint8',
+        ],
+        'create_parameter',
+    )
+    check_type(attr, 'attr', (type(None), ParamAttr), 'create_parameter')
+    check_type(
+        default_initializer,
+        'default_initializer',
+        (type(None), Initializer),
+        'create_parameter',
+    )
+
+    helper = LayerHelper("create_parameter", **locals())
+    if attr is None:
+        attr = ParamAttr(name=name)
+    return helper.create_parameter(
+        attr, shape, convert_dtype(dtype), is_bias, default_initializer
+    )
+
+
 def linspace(start, stop, num, dtype=None, name=None):
     r"""
     Return fixed number of evenly spaced values within a given interval.
-- 
GitLab