From 81fb7df23d0321aa9b10516d9136dee23ea2ec36 Mon Sep 17 00:00:00 2001
From: zqw_1997 <118182234+zhengqiwen1997@users.noreply.github.com>
Date: Mon, 24 Apr 2023 16:02:08 +0800
Subject: [PATCH] [Zero-Dim] Support output 0D for to_tensor. (#52741)

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* test=allcase

* fix doc erros, test=allcase
---
 paddle/fluid/pybind/imperative.cc             |  2 +-
 python/paddle/audio/functional/window.py      |  9 ++-
 .../pp_utils/p2p_communication.py             |  8 +--
 .../unittests/parallel_dygraph_none_var.py    |  2 +-
 .../fluid/tests/unittests/test_cholesky_op.py |  2 +-
 .../fluid/tests/unittests/test_deg2rad.py     |  2 +-
 .../unittests/test_deprecated_decorator.py    |  2 +-
 .../fluid/tests/unittests/test_einsum_v2.py   |  2 +-
 .../fluid/tests/unittests/test_inplace.py     |  4 +-
 .../tests/unittests/test_jit_save_load.py     |  6 +-
 .../fluid/tests/unittests/test_lbfgs_v2.py    | 66 +++++++++----------
 .../fluid/tests/unittests/test_rad2deg.py     |  4 +-
 .../unittests/test_state_dict_convert.py      |  2 +-
 .../unittests/test_tensor_register_hook.py    |  6 +-
 .../fluid/tests/unittests/test_var_base.py    |  4 +-
 .../tests/unittests/test_zero_dim_tensor.py   | 27 ++++++++
 python/paddle/tensor/creation.py              | 24 +++----
 python/paddle/tensor/math.py                  | 40 +++++------
 test/auto_parallel/random_control_unittest.py |  2 +-
 .../test_distribution_bernoulli.py            | 36 +++++-----
 .../test_distribution_transform.py            | 14 ++--
 ...t_distribution_transformed_distribution.py |  2 +-
 test/distribution/test_kl.py                  |  4 +-
 .../test_cpu_cuda_to_tensor.py                |  4 +-
 test/dygraph_to_static/test_fallback.py       |  2 +-
 test/dygraph_to_static/test_to_tensor.py      | 12 ++++
 test/legacy_test/auto_parallel_gpt_model.py   |  2 +-
 test/legacy_test/test_audio_functions.py      |  6 +-
 test/quantization/imperative_test_utils.py    |  2 +-
 29 files changed, 169 insertions(+), 129 deletions(-)

diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
index 44d4d070eaf..e60211286ed 100644
--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
@@ -1321,7 +1321,7 @@ void BindImperative(py::module *m_ptr) {
 
                 import paddle
 
-                x = paddle.to_tensor(1.0, stop_gradient=False)
+                x = paddle.to_tensor([1.0], stop_gradient=False)
                 detach_x = x.detach()
                 detach_x[:] = 10.0
                 print(x)  # Tensor(shape=[1], dtype=float32, place=CPUPlace, stop_gradient=False,
diff --git a/python/paddle/audio/functional/window.py b/python/paddle/audio/functional/window.py
index b2dd63973d3..eb84d6f1889 100644
--- a/python/paddle/audio/functional/window.py
+++ b/python/paddle/audio/functional/window.py
@@ -13,6 +13,8 @@
 import math
 from typing import List, Tuple, Union
 
+import numpy as np
+
 import paddle
 from paddle import Tensor
 
@@ -38,7 +40,12 @@ window_function_register = WindowFunctionRegister()
 
 @window_function_register.register()
 def _cat(x: List[Tensor], data_type: str) -> Tensor:
-    l = [paddle.to_tensor(_, data_type) for _ in x]
+    l = []
+    for t in x:
+        if np.isscalar(t) and not isinstance(t, str):
+            l.append(paddle.to_tensor([t], data_type))
+        else:
+            l.append(paddle.to_tensor(t, data_type))
     return paddle.concat(l)
 
 
diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py
index 81385f9a0a4..9d3cb5bd170 100644
--- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py
+++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py
@@ -117,7 +117,7 @@ class SendRecvMeta:
 
     def _send_dims_shape_dtype(self, tensor, group):
         # send len(shape)
-        dims = paddle.to_tensor(len(tensor.shape))
+        dims = paddle.to_tensor([len(tensor.shape)])
         dst_rank = _hcg._get_p2p_next_rank()
 
         paddle.distributed.send(dims, dst=dst_rank, group=group)
@@ -127,11 +127,11 @@ class SendRecvMeta:
         paddle.distributed.send(shape, dst=dst_rank, group=group)
 
         # send dtype
-        dtype = paddle.to_tensor(paddle_2_number(tensor.dtype))
+        dtype = paddle.to_tensor([paddle_2_number(tensor.dtype)])
         paddle.distributed.send(dtype, dst=dst_rank, group=group)
 
         # send trainable
-        stop_grad = paddle.to_tensor(int(tensor.stop_gradient))
+        stop_grad = paddle.to_tensor([int(tensor.stop_gradient)])
         paddle.distributed.send(stop_grad, dst=dst_rank, group=group)
 
     def send_meta(self, tensor, group):
@@ -148,7 +148,7 @@ class SendRecvMeta:
             # send tensor type
             paddle.distributed.send(tensor_type, dst=dst_rank, group=group)
 
-            nums = paddle.to_tensor(len(tensor))
+            nums = paddle.to_tensor([len(tensor)])
             paddle.distributed.send(nums, dst=dst_rank, group=group)
 
             for d in tensor:
diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_none_var.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_none_var.py
index fa5d71dbfe2..f7720902946 100644
--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_none_var.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_none_var.py
@@ -40,7 +40,7 @@ class SimpleNet(paddle.nn.Layer):
         self.step = 0
 
     def forward(self, x):
-        return paddle.to_tensor(0.0, dtype='float32')
+        return paddle.to_tensor([0.0], dtype='float32')
 
 
 def fake_sample_reader():
diff --git a/python/paddle/fluid/tests/unittests/test_cholesky_op.py b/python/paddle/fluid/tests/unittests/test_cholesky_op.py
index ba7a6fb631a..b1261c3d17b 100644
--- a/python/paddle/fluid/tests/unittests/test_cholesky_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cholesky_op.py
@@ -104,7 +104,7 @@ class TestDygraph(unittest.TestCase):
         a = np.random.rand(3, 3)
         a_t = np.transpose(a, [1, 0])
         x_data = np.matmul(a, a_t) + 1e-03
-        x = paddle.to_tensor(x_data)
+        x = paddle.to_tensor([x_data])
         out = paddle.cholesky(x, upper=False)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_deg2rad.py b/python/paddle/fluid/tests/unittests/test_deg2rad.py
index 77dce311a93..0f038e86f25 100644
--- a/python/paddle/fluid/tests/unittests/test_deg2rad.py
+++ b/python/paddle/fluid/tests/unittests/test_deg2rad.py
@@ -74,7 +74,7 @@ class TestDeg2radAPI2(TestDeg2radAPI):
     def test_dygraph(self):
         paddle.disable_static()
 
-        x2 = paddle.to_tensor(180)
+        x2 = paddle.to_tensor([180])
         result2 = paddle.deg2rad(x2)
         np.testing.assert_allclose(np.pi, result2.numpy(), rtol=1e-05)
 
diff --git a/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py b/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py
index e2ed84a57c5..4624897f016 100755
--- a/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py
+++ b/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py
@@ -111,7 +111,7 @@ class TestDeprecatedDocorator(unittest.TestCase):
     def test_tensor_gradient(self):
         paddle.__version__ = '2.1.0'
 
-        x = paddle.to_tensor(5.0, stop_gradient=False)
+        x = paddle.to_tensor([5.0], stop_gradient=False)
         y = paddle.pow(x, 4.0)
         y.backward()
 
diff --git a/python/paddle/fluid/tests/unittests/test_einsum_v2.py b/python/paddle/fluid/tests/unittests/test_einsum_v2.py
index becadd85c02..434c59b5b80 100644
--- a/python/paddle/fluid/tests/unittests/test_einsum_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_einsum_v2.py
@@ -553,7 +553,7 @@ class TestBF16(unittest.TestCase):
             B = paddle.to_tensor(np.array([2.0, 3.0])).astype(paddle.bfloat16)
             B = B.cuda()
             C = paddle.einsum('i,i->', A, B)
-            D = paddle.to_tensor(8.0).astype(paddle.bfloat16)
+            D = paddle.to_tensor([8.0]).astype(paddle.bfloat16)
             self.assertEqual(C.item(), D.item())
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_inplace.py b/python/paddle/fluid/tests/unittests/test_inplace.py
index c10d7d2c5c4..91a569d34c6 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace.py
@@ -259,11 +259,11 @@ class TestDygraphInplaceReshape(TestDygraphInplace):
 
 class TestDygraphInplaceReshapeTensor(TestDygraphInplace):
     def non_inplace_api_processing(self, var):
-        shape = paddle.to_tensor(-1)
+        shape = paddle.to_tensor([-1])
         return paddle.reshape(var, shape)
 
     def inplace_api_processing(self, var):
-        shape = paddle.to_tensor(-1)
+        shape = paddle.to_tensor([-1])
         return paddle.reshape_(var, shape)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py
index d04357e201c..7f58638e7ac 100644
--- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py
@@ -283,7 +283,7 @@ class LinearNetWithMultiStaticFunc(paddle.nn.Layer):
         super().__init__()
         self._linear_0 = Linear(in_size, out_size)
         self._linear_1 = Linear(in_size, out_size)
-        self._scale = paddle.to_tensor(9.9)
+        self._scale = paddle.to_tensor([9.9])
 
     @paddle.jit.to_static
     def forward(self, x):
@@ -1196,7 +1196,7 @@ class LayerSaved(paddle.nn.Layer):
         self._linear_1_0 = Linear(self.hidden, self.hidden)
         self._linear_1_1 = Linear(self.hidden, self.hidden)
         self._linear_2 = Linear(self.hidden, out_size)
-        self._scale = paddle.to_tensor(9.9)
+        self._scale = paddle.to_tensor([9.9])
 
     @paddle.jit.to_static
     def forward(self, x):
@@ -1319,7 +1319,7 @@ class LayerLoadFinetune(paddle.nn.Layer):
         self._linear_1_0 = Linear(out_size, in_size)
         self._linear_1_1 = Linear(out_size, in_size)
         self._linear_2 = Linear(out_size, out_size)
-        self._scale = paddle.to_tensor(9.9)
+        self._scale = paddle.to_tensor([9.9])
 
         # Load multiple times
         self._load_l1 = paddle.jit.load(load_path)
diff --git a/python/paddle/fluid/tests/unittests/test_lbfgs_v2.py b/python/paddle/fluid/tests/unittests/test_lbfgs_v2.py
index fa64c480ed9..9617938967c 100644
--- a/python/paddle/fluid/tests/unittests/test_lbfgs_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_lbfgs_v2.py
@@ -208,64 +208,64 @@ class TestLbfgs(unittest.TestCase):
 
     def test_line_search(self):
         def func1(x, alpha, d):
-            return paddle.to_tensor(x + alpha * d), paddle.to_tensor(0.0)
+            return paddle.to_tensor(x + alpha * d), paddle.to_tensor([0.0])
 
         def func2(x, alpha, d):
-            return paddle.to_tensor(x + alpha * d), paddle.to_tensor(1.0)
+            return paddle.to_tensor(x + alpha * d), paddle.to_tensor([1.0])
 
         def func3(x, alpha, d):
-            return paddle.to_tensor(x + alpha * d), paddle.to_tensor(-1.0)
+            return paddle.to_tensor(x + alpha * d), paddle.to_tensor([-1.0])
 
         _strong_wolfe(
             func1,
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(0.001),
-            paddle.to_tensor(0.0),
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(0.0),
-            paddle.to_tensor(0.0),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([0.001]),
+            paddle.to_tensor([0.0]),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([0.0]),
+            paddle.to_tensor([0.0]),
             max_ls=0,
         )
 
         _strong_wolfe(
             func2,
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(-0.001),
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(1.0),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([-0.001]),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([1.0]),
             max_ls=1,
         )
 
         _strong_wolfe(
             func3,
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(-0.001),
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(1.0),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([-0.001]),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([1.0]),
             max_ls=1,
         )
 
         _cubic_interpolate(
-            paddle.to_tensor(2.0),
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(0.0),
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(2.0),
-            paddle.to_tensor(0.0),
+            paddle.to_tensor([2.0]),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([0.0]),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([2.0]),
+            paddle.to_tensor([0.0]),
             [0.1, 0.5],
         )
 
         _cubic_interpolate(
-            paddle.to_tensor(2.0),
-            paddle.to_tensor(0.0),
-            paddle.to_tensor(-3.0),
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(1.0),
-            paddle.to_tensor(-0.1),
+            paddle.to_tensor([2.0]),
+            paddle.to_tensor([0.0]),
+            paddle.to_tensor([-3.0]),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([1.0]),
+            paddle.to_tensor([-0.1]),
             [0.1, 0.5],
         )
 
diff --git a/python/paddle/fluid/tests/unittests/test_rad2deg.py b/python/paddle/fluid/tests/unittests/test_rad2deg.py
index 8629d7dcd37..710d77f0d9f 100644
--- a/python/paddle/fluid/tests/unittests/test_rad2deg.py
+++ b/python/paddle/fluid/tests/unittests/test_rad2deg.py
@@ -73,7 +73,7 @@ class TestRad2degAPI2(TestRad2degAPI):
     def test_dygraph(self):
         paddle.disable_static()
 
-        x2 = paddle.to_tensor(np.pi / 2)
+        x2 = paddle.to_tensor([np.pi / 2])
         result2 = paddle.rad2deg(x2)
         np.testing.assert_allclose(90, result2.numpy(), rtol=1e-05)
 
@@ -91,7 +91,7 @@ class TestRad2degAPI3(TestRad2degAPI):
     def test_dygraph(self):
         paddle.disable_static()
 
-        x2 = paddle.to_tensor(1)
+        x2 = paddle.to_tensor([1])
         result2 = paddle.rad2deg(x2)
         np.testing.assert_allclose(180 / np.pi, result2.numpy(), rtol=1e-05)
 
diff --git a/python/paddle/fluid/tests/unittests/test_state_dict_convert.py b/python/paddle/fluid/tests/unittests/test_state_dict_convert.py
index a6b6975eed7..90bdd3c1949 100644
--- a/python/paddle/fluid/tests/unittests/test_state_dict_convert.py
+++ b/python/paddle/fluid/tests/unittests/test_state_dict_convert.py
@@ -88,7 +88,7 @@ class TestStateDictReturn(unittest.TestCase):
     def test_missing_keys_and_unexpected_keys(self):
         model1 = MyModel2()
         tmp_dict = {}
-        tmp_dict["unexpected_keys"] = paddle.to_tensor(1)
+        tmp_dict["unexpected_keys"] = paddle.to_tensor([1])
         missing_keys, unexpected_keys = model1.set_state_dict(tmp_dict)
         self.assertEqual(len(missing_keys), 2)
         self.assertEqual(missing_keys[0], "linear.weight")
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py
index 02934d07a89..16b6d32ce40 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py
@@ -553,7 +553,7 @@ class TestTensorRegisterBackwardHook(unittest.TestCase):
         global HOOK_INIT_VALUE
         global HOOK_IS_CALLED
         for device in self.devices:
-            x = paddle.to_tensor(5.0, stop_gradient=False)
+            x = paddle.to_tensor([5.0], stop_gradient=False)
             x._register_backward_hook(global_void_hook)
             for i in range(5):
                 y = paddle.pow(x, 4.0)
@@ -567,14 +567,14 @@ class TestTensorRegisterBackwardHook(unittest.TestCase):
             HOOK_IS_CALLED = False
 
     def test_register_backward_hook_for_interior_var(self):
-        x = paddle.to_tensor(5.0, stop_gradient=False)
+        x = paddle.to_tensor([5.0], stop_gradient=False)
         y = paddle.pow(x, 4.0)
 
         with self.assertRaises(ValueError):
             y._register_backward_hook(global_void_hook)
 
     def test_register_backward_hook_for_var_without_gradient(self):
-        x = paddle.to_tensor(5.0)
+        x = paddle.to_tensor([5.0])
         y = paddle.pow(x, 4.0)
 
         with self.assertRaises(ValueError):
diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py
index b533dc2b6e5..9d489c77374 100644
--- a/python/paddle/fluid/tests/unittests/test_var_base.py
+++ b/python/paddle/fluid/tests/unittests/test_var_base.py
@@ -114,7 +114,7 @@ class TestVarBase(unittest.TestCase):
                 )
                 np.testing.assert_array_equal(x.numpy(), [1.0])
                 self.assertEqual(x.dtype, core.VarDesc.VarType.FP32)
-                self.assertEqual(x.shape, [1])
+                self.assertEqual(x.shape, [])
                 self.assertEqual(x.stop_gradient, False)
                 self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR)
 
@@ -407,7 +407,7 @@ class TestVarBase(unittest.TestCase):
 
     def test_detach(self):
         with fluid.dygraph.guard():
-            x = paddle.to_tensor(1.0, dtype="float64", stop_gradient=False)
+            x = paddle.to_tensor([1.0], dtype="float64", stop_gradient=False)
             detach_x = x.detach()
             self.assertTrue(detach_x.stop_gradient, True)
 
diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
index d18f94e78da..7ea98f7c889 100644
--- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
@@ -2385,6 +2385,20 @@ class TestSundryAPI(unittest.TestCase):
         self.assertEqual(x.grad.shape, [])
         np.testing.assert_allclose(x.grad, np.array(1.0))
 
+    def test_to_tensor(self):
+        out1 = paddle.to_tensor(1)
+        out2 = paddle.to_tensor(2.5)
+
+        out1.retain_grads()
+        out1.backward()
+        out2.retain_grads()
+        out2.backward()
+
+        self.assertEqual(out1.shape, [])
+        self.assertEqual(out1, 1)
+        self.assertEqual(out2.shape, [])
+        self.assertEqual(out2, 2.5)
+
     def test_linalg_slogdet(self):
         # 2-D input
         x = paddle.randn([3, 3])
@@ -4355,6 +4369,19 @@ class TestSundryAPIStatic(unittest.TestCase):
         self.assertEqual(out1.shape, (2, 3))
         self.assertEqual(out2.shape, (2, 3))
 
+    @prog_scope()
+    def test_to_tensor(self):
+        out1 = paddle.to_tensor(1)
+        out2 = paddle.to_tensor(2.5)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out1, out2])
+
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[0], 1)
+        self.assertEqual(res[1].shape, ())
+        self.assertEqual(res[1], 2.5)
+
     @prog_scope()
     def test_linalg_slogdet(self):
         # 2-D input
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index d9b22ac045f..186eda03e74 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -555,7 +555,7 @@ def _to_tensor_non_static(data, dtype=None, place=None, stop_gradient=True):
             return data
 
         if np.isscalar(data) and not isinstance(data, str):
-            data = np.array([data])
+            data = np.array(data)
         elif isinstance(data, (list, tuple)):
             data = np.array(data)
             if data.dtype == np.object_:
@@ -649,7 +649,7 @@ def _to_tensor_static(data, dtype=None, stop_gradient=None):
 
         if not isinstance(data, np.ndarray):
             if np.isscalar(data) and not isinstance(data, str):
-                data = np.array([data])
+                data = np.array(data)
             elif isinstance(data, (list, tuple)):
                 data = np.array(data)
 
@@ -677,12 +677,6 @@ def _to_tensor_static(data, dtype=None, stop_gradient=None):
             and len(data.shape) > 0
             and any(isinstance(x, Variable) for x in data)
         ):
-            if not all(
-                [x.shape == (1,) for x in data if isinstance(x, Variable)]
-            ):
-                raise TypeError(
-                    "Unsupport paddle.to_tensor([Variable, Variable...]) with non-scalar variable."
-                )
             to_stack_list = [None] * data.shape[0]
             for idx, d in enumerate(data):
                 to_stack_list[idx] = _to_tensor_static(d, dtype, stop_gradient)
@@ -717,7 +711,7 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
                                 (0D-Tensor)
                     default_dtype
         Python Number ───────────────► paddle.Tensor
-                                        (1D-Tensor)
+                                        (0D-Tensor)
                     Keep dtype
         np.ndarray ───────────► paddle.Tensor
 
@@ -746,17 +740,17 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
         # <class 'paddle.Tensor'>
 
         paddle.to_tensor(1)
-        # Tensor(shape=[1], dtype=int64, place=CPUPlace, stop_gradient=True,
-        #        [1])
+        # Tensor(shape=[], dtype=int64, place=CPUPlace, stop_gradient=True,
+        #        1)
 
         x = paddle.to_tensor(1, stop_gradient=False)
         print(x)
-        # Tensor(shape=[1], dtype=int64, place=CPUPlace, stop_gradient=False,
-        #        [1])
+        # Tensor(shape=[], dtype=int64, place=CPUPlace, stop_gradient=False,
+        #        1)
 
         paddle.to_tensor(x)  # A new tensor will be created with default stop_gradient=True
-        # Tensor(shape=[1], dtype=int64, place=CPUPlace, stop_gradient=True,
-        #        [1])
+        # Tensor(shape=[], dtype=int64, place=CPUPlace, stop_gradient=True,
+        #        1)
 
         paddle.to_tensor([[0.1, 0.2], [0.3, 0.4]], place=paddle.CPUPlace(), stop_gradient=False)
         # Tensor(shape=[2, 2], dtype=float32, place=CPUPlace, stop_gradient=False,
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index efa8cbfc54e..6b07f57e33a 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -4421,8 +4421,8 @@ def rad2deg(x, name=None):
             x3 = paddle.to_tensor(1)
             result3 = paddle.rad2deg(x3)
             print(result3)
-            # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
-            #         [57.29578018])
+            # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
+            #        57.29578018)
     """
     rad2deg_scale = 180 / np.pi
     if in_dygraph_mode():
@@ -4485,8 +4485,8 @@ def deg2rad(x, name=None):
             x2 = paddle.to_tensor(180)
             result2 = paddle.deg2rad(x2)
             print(result2)
-            # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
-            #         [3.14159274])
+            # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
+            #        3.14159274)
     """
     deg2rad_scale = np.pi / 180.0
     if in_dygraph_mode():
@@ -4545,8 +4545,8 @@ def gcd(x, y, name=None):
             x1 = paddle.to_tensor(12)
             x2 = paddle.to_tensor(20)
             paddle.gcd(x1, x2)
-            # Tensor(shape=[1], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
-            #        [4])
+            # Tensor(shape=[], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
+            #        4)
 
             x3 = paddle.arange(6)
             paddle.gcd(x3, x2)
@@ -4555,17 +4555,17 @@ def gcd(x, y, name=None):
 
             x4 = paddle.to_tensor(0)
             paddle.gcd(x4, x2)
-            # Tensor(shape=[1], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
-            #        [20])
+            # Tensor(shape=[], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
+            #        20)
 
             paddle.gcd(x4, x4)
-            # Tensor(shape=[1], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
-            #        [0])
+            # Tensor(shape=[], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
+            #        0)
 
             x5 = paddle.to_tensor(-20)
             paddle.gcd(x1, x5)
-            # Tensor(shape=[1], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
-            #        [4])
+            # Tensor(shape=[], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
+            #        4)
     """
     shape = paddle.broadcast_shape(x.shape, y.shape)
     x = paddle.broadcast_to(x, shape)
@@ -4630,8 +4630,8 @@ def lcm(x, y, name=None):
             x1 = paddle.to_tensor(12)
             x2 = paddle.to_tensor(20)
             paddle.lcm(x1, x2)
-            # Tensor(shape=[1], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
-            #        [60])
+            # Tensor(shape=[], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
+            #        60)
 
             x3 = paddle.arange(6)
             paddle.lcm(x3, x2)
@@ -4640,17 +4640,17 @@ def lcm(x, y, name=None):
 
             x4 = paddle.to_tensor(0)
             paddle.lcm(x4, x2)
-            # Tensor(shape=[1], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
-            #        [0])
+            # Tensor(shape=[], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
+            #        0)
 
             paddle.lcm(x4, x4)
-            # Tensor(shape=[1], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
-            #        [0])
+            # Tensor(shape=[], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
+            #        0)
 
             x5 = paddle.to_tensor(-20)
             paddle.lcm(x1, x5)
-            # Tensor(shape=[1], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
-            #        [60])
+            # Tensor(shape=[], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
+            #        60)
     """
     d = paddle.gcd(x, y)
     # paddle.mod will raise an error when any element of y is 0. To avoid
diff --git a/test/auto_parallel/random_control_unittest.py b/test/auto_parallel/random_control_unittest.py
index 52e6e216074..f55b57b9f1b 100644
--- a/test/auto_parallel/random_control_unittest.py
+++ b/test/auto_parallel/random_control_unittest.py
@@ -82,7 +82,7 @@ class TestRandomControl(unittest.TestCase):
     ):
 
         for np_mask in [mask_np_list[i] for i in comapre_idx]:
-            mask_tensor_local = paddle.to_tensor(np_mask.astype("float32"))
+            mask_tensor_local = paddle.to_tensor([np_mask.astype("float32")])
             if rank == 0:
                 mask_tensor_remote = paddle.ones_like(mask_tensor_local)
                 dy_broadcast_helper(mask_tensor_remote)
diff --git a/test/distribution/test_distribution_bernoulli.py b/test/distribution/test_distribution_bernoulli.py
index 2229880b7a6..490bd9aa54d 100644
--- a/test/distribution/test_distribution_bernoulli.py
+++ b/test/distribution/test_distribution_bernoulli.py
@@ -184,10 +184,10 @@ class BernoulliTest(unittest.TestCase):
         ('probs_00', 0.0, 'float64', 'float32'),
         ('probs_03', 0.3, 'float64', 'float32'),
         ('probs_10', 1.0, 'float64', 'float32'),
-        ('probs_tensor_03_32', paddle.to_tensor(0.3), 'float32', 'float32'),
+        ('probs_tensor_03_32', paddle.to_tensor([0.3]), 'float32', 'float32'),
         (
             'probs_tensor_03_64',
-            paddle.to_tensor(0.3, dtype='float64'),
+            paddle.to_tensor([0.3], dtype='float64'),
             'float64',
             'float64',
         ),
@@ -257,11 +257,11 @@ class BernoulliTestFeature(BernoulliTest):
             ),
             (
                 paddle.to_tensor(
-                    0.0,
+                    [0.0],
                 ),
             ),
-            (paddle.to_tensor(1.0),),
-            (paddle.to_tensor(0.0, dtype='float64'),),
+            (paddle.to_tensor([1.0]),),
+            (paddle.to_tensor([0.0], dtype='float64'),),
         ]
     )
     def test_log_prob(self, value):
@@ -291,9 +291,9 @@ class BernoulliTestFeature(BernoulliTest):
                     ]
                 ),
             ),
-            (paddle.to_tensor(0.0),),
-            (paddle.to_tensor(1.0),),
-            (paddle.to_tensor(0.0, dtype='float64'),),
+            (paddle.to_tensor([0.0]),),
+            (paddle.to_tensor([1.0]),),
+            (paddle.to_tensor([0.0], dtype='float64'),),
         ]
     )
     def test_prob(self, value):
@@ -323,11 +323,11 @@ class BernoulliTestFeature(BernoulliTest):
                     ]
                 ),
             ),
-            (paddle.to_tensor(0.0),),
-            (paddle.to_tensor(0.3),),
-            (paddle.to_tensor(0.7),),
-            (paddle.to_tensor(1.0),),
-            (paddle.to_tensor(0.0, dtype='float64'),),
+            (paddle.to_tensor([0.0]),),
+            (paddle.to_tensor([0.3]),),
+            (paddle.to_tensor([0.7]),),
+            (paddle.to_tensor([1.0]),),
+            (paddle.to_tensor([0.0], dtype='float64'),),
         ]
     )
     def test_cdf(self, value):
@@ -359,7 +359,7 @@ class BernoulliTestFeature(BernoulliTest):
 
     def test_kl_divergence(self):
         with paddle.fluid.dygraph.guard(self.place):
-            other_probs = paddle.to_tensor(0.9, dtype=self.dtype)
+            other_probs = paddle.to_tensor([0.9], dtype=self.dtype)
 
             rv_paddle_other = Bernoulli(other_probs)
             rv_np_other = BernoulliNumpy(other_probs)
@@ -422,7 +422,7 @@ class BernoulliTestFeature(BernoulliTest):
         # 1-D probs
         (
             'probs_1d_1d_32',
-            paddle.to_tensor(0.3),
+            paddle.to_tensor([0.3]),
             'float32',
             'float32',
             [
@@ -432,7 +432,7 @@ class BernoulliTestFeature(BernoulliTest):
         ),
         (
             'probs_1d_1d_64',
-            paddle.to_tensor(0.3, dtype='float64'),
+            paddle.to_tensor([0.3], dtype='float64'),
             'float64',
             'float64',
             paddle.to_tensor(
@@ -444,7 +444,7 @@ class BernoulliTestFeature(BernoulliTest):
         ),
         (
             'probs_1d_2d',
-            paddle.to_tensor(0.3),
+            paddle.to_tensor([0.3]),
             'float32',
             'float32',
             [100, 2],
@@ -452,7 +452,7 @@ class BernoulliTestFeature(BernoulliTest):
         ),
         (
             'probs_1d_3d',
-            paddle.to_tensor(0.3),
+            paddle.to_tensor([0.3]),
             'float32',
             'float32',
             [100, 2, 3],
diff --git a/test/distribution/test_distribution_transform.py b/test/distribution/test_distribution_transform.py
index 4bc90d8f792..63ecd99d77b 100644
--- a/test/distribution/test_distribution_transform.py
+++ b/test/distribution/test_distribution_transform.py
@@ -523,7 +523,7 @@ class TestChainTransform(unittest.TestCase):
                 transform.ChainTransform(
                     (
                         transform.AffineTransform(
-                            paddle.to_tensor(0.0), paddle.to_tensor(1.0)
+                            paddle.to_tensor([0.0]), paddle.to_tensor([1.0])
                         ),
                         transform.ExpTransform(),
                     )
@@ -560,7 +560,7 @@ class TestChainTransform(unittest.TestCase):
                 transform.ChainTransform(
                     (
                         transform.AffineTransform(
-                            paddle.to_tensor(0.0), paddle.to_tensor(-1.0)
+                            paddle.to_tensor([0.0]), paddle.to_tensor([-1.0])
                         ),
                         transform.ExpTransform(),
                     )
@@ -595,9 +595,9 @@ class TestChainTransform(unittest.TestCase):
                 transform.ChainTransform(
                     (
                         transform.AffineTransform(
-                            paddle.to_tensor(0.0), paddle.to_tensor(-1.0)
+                            paddle.to_tensor([0.0]), paddle.to_tensor([-1.0])
                         ),
-                        transform.PowerTransform(paddle.to_tensor(2.0)),
+                        transform.PowerTransform(paddle.to_tensor([2.0])),
                     )
                 ),
                 np.array([1.0, 2.0, 3.0]),
@@ -619,7 +619,7 @@ class TestChainTransform(unittest.TestCase):
                 transform.ChainTransform(
                     (
                         transform.AffineTransform(
-                            paddle.to_tensor(0.0), paddle.to_tensor(-1.0)
+                            paddle.to_tensor([0.0]), paddle.to_tensor([-1.0])
                         ),
                         transform.ExpTransform(),
                     )
@@ -638,7 +638,7 @@ class TestChainTransform(unittest.TestCase):
                 transform.ChainTransform(
                     (
                         transform.AffineTransform(
-                            paddle.to_tensor(0.0), paddle.to_tensor(-1.0)
+                            paddle.to_tensor([0.0]), paddle.to_tensor([-1.0])
                         ),
                         transform.ExpTransform(),
                     )
@@ -743,7 +743,7 @@ class TestIndependentTransform(unittest.TestCase):
 @param.place(config.DEVICES)
 class TestPowerTransform(unittest.TestCase):
     def setUp(self):
-        self._t = transform.PowerTransform(paddle.to_tensor(2.0))
+        self._t = transform.PowerTransform(paddle.to_tensor([2.0]))
 
     def test_init(self):
         with self.assertRaises(TypeError):
diff --git a/test/distribution/test_distribution_transformed_distribution.py b/test/distribution/test_distribution_transformed_distribution.py
index 57264b5f897..09b5bce4ecc 100644
--- a/test/distribution/test_distribution_transformed_distribution.py
+++ b/test/distribution/test_distribution_transformed_distribution.py
@@ -41,7 +41,7 @@ class TestIndependent(unittest.TestCase):
         return np.sum(value, tuple(range(-n, 0))) if n > 0 else value
 
     def test_log_prob(self):
-        value = paddle.to_tensor(0.5)
+        value = paddle.to_tensor([0.5])
         np.testing.assert_allclose(
             self.simple_log_prob(value, self.base, self.transforms),
             self._t.log_prob(value),
diff --git a/test/distribution/test_kl.py b/test/distribution/test_kl.py
index e4b5c51fa75..d75f17208a8 100644
--- a/test/distribution/test_kl.py
+++ b/test/distribution/test_kl.py
@@ -139,8 +139,8 @@ class TestDispatch(unittest.TestCase):
         ),
         (
             'test-same-dist',
-            mock.Exponential(paddle.to_tensor(1.0)),
-            mock.Exponential(paddle.to_tensor(1.0)),
+            mock.Exponential(paddle.to_tensor([1.0])),
+            mock.Exponential(paddle.to_tensor([1.0])),
         ),
     ],
 )
diff --git a/test/dygraph_to_static/test_cpu_cuda_to_tensor.py b/test/dygraph_to_static/test_cpu_cuda_to_tensor.py
index 24f0bd84556..f8d15971a7b 100644
--- a/test/dygraph_to_static/test_cpu_cuda_to_tensor.py
+++ b/test/dygraph_to_static/test_cpu_cuda_to_tensor.py
@@ -35,8 +35,8 @@ class TestCpuCuda(unittest.TestCase):
 class TestToTensor(unittest.TestCase):
     def test_to_tensor_with_variable_list(self):
         def func(x):
-            ones = paddle.to_tensor([1])
-            twos = paddle.to_tensor([2])
+            ones = paddle.to_tensor(1)
+            twos = paddle.to_tensor(2)
             x = paddle.to_tensor([ones, twos, 3, 4])
             return x
 
diff --git a/test/dygraph_to_static/test_fallback.py b/test/dygraph_to_static/test_fallback.py
index 6da8602e604..e4dc0114054 100644
--- a/test/dygraph_to_static/test_fallback.py
+++ b/test/dygraph_to_static/test_fallback.py
@@ -52,7 +52,7 @@ class UnsuppportNet(paddle.nn.Layer):
 
 class TestFallback(unittest.TestCase):
     def setUp(self):
-        self.x = paddle.to_tensor(2).astype('int')
+        self.x = paddle.to_tensor([2]).astype('int')
 
     def tearDown(self):
         pass
diff --git a/test/dygraph_to_static/test_to_tensor.py b/test/dygraph_to_static/test_to_tensor.py
index b0131263c4e..05cd5ec78f2 100644
--- a/test/dygraph_to_static/test_to_tensor.py
+++ b/test/dygraph_to_static/test_to_tensor.py
@@ -84,6 +84,12 @@ def case6(x):
     return a
 
 
+def case7(x):
+    a = paddle.to_tensor(10.0)
+
+    return a
+
+
 class TestToTensorReturnVal(unittest.TestCase):
     def test_to_tensor_badreturn(self):
         paddle.disable_static()
@@ -131,6 +137,12 @@ class TestToTensorReturnVal(unittest.TestCase):
         self.assertTrue(a.stop_gradient == b.stop_gradient)
         self.assertTrue(a.place._equals(b.place))
 
+        a = paddle.jit.to_static(case7)(x)
+        b = case7(x)
+        self.assertTrue(a.dtype == b.dtype)
+        self.assertTrue(a.stop_gradient == b.stop_gradient)
+        self.assertTrue(a.place._equals(b.place))
+
 
 class TestStatic(unittest.TestCase):
     def test_static(self):
diff --git a/test/legacy_test/auto_parallel_gpt_model.py b/test/legacy_test/auto_parallel_gpt_model.py
index 5aef64e412a..7cf48cf1c47 100644
--- a/test/legacy_test/auto_parallel_gpt_model.py
+++ b/test/legacy_test/auto_parallel_gpt_model.py
@@ -233,7 +233,7 @@ class MultiHeadAttention(nn.Layer):
         product = paddle.matmul(x=q, y=k, transpose_y=True)
         product = paddle.multiply(
             product,
-            paddle.to_tensor(self.head_dim**-0.5, dtype=product.dtype),
+            paddle.to_tensor([self.head_dim**-0.5], dtype=product.dtype),
         )
         if attn_mask is not None:
             product = product + attn_mask
diff --git a/test/legacy_test/test_audio_functions.py b/test/legacy_test/test_audio_functions.py
index 8400bd4ecb4..47adbdd4905 100644
--- a/test/legacy_test/test_audio_functions.py
+++ b/test/legacy_test/test_audio_functions.py
@@ -60,7 +60,7 @@ class TestAudioFuncitons(unittest.TestCase):
     def test_audio_function(self, val: float, htk_flag: bool):
         mel_paddle = paddle.audio.functional.hz_to_mel(val, htk_flag)
         mel_paddle_tensor = paddle.audio.functional.hz_to_mel(
-            paddle.to_tensor(val), htk_flag
+            paddle.to_tensor([val]), htk_flag
         )
         mel_librosa = librosa.hz_to_mel(val, htk_flag)
         np.testing.assert_almost_equal(mel_paddle, mel_librosa, decimal=5)
@@ -70,7 +70,7 @@ class TestAudioFuncitons(unittest.TestCase):
 
         hz_paddle = paddle.audio.functional.mel_to_hz(val, htk_flag)
         hz_paddle_tensor = paddle.audio.functional.mel_to_hz(
-            paddle.to_tensor(val), htk_flag
+            paddle.to_tensor([val]), htk_flag
         )
         hz_librosa = librosa.mel_to_hz(val, htk_flag)
         np.testing.assert_almost_equal(hz_paddle, hz_librosa, decimal=4)
@@ -79,7 +79,7 @@ class TestAudioFuncitons(unittest.TestCase):
         )
 
         decibel_paddle = paddle.audio.functional.power_to_db(
-            paddle.to_tensor(val)
+            paddle.to_tensor([val])
         )
         decibel_librosa = librosa.power_to_db(val)
         np.testing.assert_almost_equal(
diff --git a/test/quantization/imperative_test_utils.py b/test/quantization/imperative_test_utils.py
index 9e9c2cca447..36e93109154 100644
--- a/test/quantization/imperative_test_utils.py
+++ b/test/quantization/imperative_test_utils.py
@@ -165,7 +165,7 @@ class ImperativeLenet(paddle.nn.Layer):
         x = self.features(x)
 
         x = paddle.flatten(x, 1)
-        x = self.add(x, paddle.to_tensor(0.0))  # For CI
+        x = self.add(x, paddle.to_tensor([0.0]))  # For CI
         x = self.fc(x)
         return x
 
-- 
GitLab