From 767e7b3f0b7a972f21da987d2405b3ff685afb23 Mon Sep 17 00:00:00 2001
From: JYChen <zoooo0820@qq.com>
Date: Tue, 9 May 2023 10:40:37 +0800
Subject: [PATCH] [Cherry-pick] zero-dim: support 0-D for getitem/setitem
 (#53441)

* support 0-D output and 0-D as indice in __getitem__

* fix tests

* fix inference and UT

* add unittest for setitem

* fix xpu test

* fix xpu 0-d

* fix right value is 0d and index is List/Tensor

* Hack__getitem__ from 0-d to 1-d with FLAGS_set_to_1d

* change PHI_DECLARE_xxx to DECLARE_xxx since the change not merged to 2.5

* hack 1-D tensor to Scalar

* throw warning at __getitem__, not slice_utils
---
 paddle/fluid/framework/attribute_checker.h    |  23 +-
 paddle/fluid/pybind/eager_method.cc           |  72 ++---
 paddle/fluid/pybind/imperative.cc             |  86 +++---
 paddle/phi/infermeta/unary.cc                 |   3 -
 paddle/phi/kernels/funcs/slice_utils.h        |  13 +-
 .../phi/kernels/xpu/set_value_grad_kernel.cc  |   5 +
 .../auto_parallel/operators/dist_slice.py     |  20 +-
 python/paddle/fft.py                          |   4 +-
 .../unittests/test_imperative_numpy_bridge.py |   2 +-
 .../fluid/tests/unittests/test_kthvalue_op.py |   8 +-
 .../tests/unittests/test_set_value_op.py      |   2 +-
 .../fluid/tests/unittests/test_slice_op.py    |   4 +-
 .../fluid/tests/unittests/test_var_base.py    |  11 +-
 .../fluid/tests/unittests/test_variable.py    |   3 +-
 .../fluid/tests/unittests/test_while_op.py    |   6 +-
 .../tests/unittests/test_zero_dim_tensor.py   | 246 ++++++++++++++++++
 python/paddle/fluid/variable_index.py         |  37 +--
 .../incubate/optimizer/functional/lbfgs.py    |   8 +-
 .../jit/dy2static/variable_trans_func.py      |   6 +-
 python/paddle/nn/layer/rnn.py                 |   2 +-
 python/paddle/tensor/manipulation.py          |  18 +-
 test/dygraph_to_static/test_list.py           |   2 +-
 test/xpu/test_set_value_op_xpu.py             |   2 +-
 test/xpu/test_slice_op_xpu.py                 |   4 +-
 test/xpu/test_zero_dim_tensor_xpu.py          | 134 ++++++++++
 25 files changed, 573 insertions(+), 148 deletions(-)

diff --git a/paddle/fluid/framework/attribute_checker.h b/paddle/fluid/framework/attribute_checker.h
index 67eb69efdf3..2e5e7bf8939 100644
--- a/paddle/fluid/framework/attribute_checker.h
+++ b/paddle/fluid/framework/attribute_checker.h
@@ -73,10 +73,10 @@ class TypedAttrVarInfoChecker {
         platform::errors::InvalidArgument(
             "Required Attribute with Variable type shall not be nullptr."));
     auto shape = var_desc->GetShape();
-    PADDLE_ENFORCE_EQ(shape.size(),
+    PADDLE_ENFORCE_LE(shape.size(),
                       1U,
                       platform::errors::InvalidArgument(
-                          "Required shape rank of Attribute(%s) == 1, "
+                          "Required shape rank of Attribute(%s) <= 1, "
                           "but received rank == %s",
                           var_desc->Name(),
                           shape.size()));
@@ -105,20 +105,21 @@ class TypedAttrVarInfoChecker {
           platform::errors::InvalidArgument(
               "Required Attribute with Variable type shall not be nullptr."));
       auto shape = var_desc->GetShape();
-      PADDLE_ENFORCE_EQ(shape.size(),
+      PADDLE_ENFORCE_LE(shape.size(),
                         1U,
                         platform::errors::InvalidArgument(
-                            "Required shape rank of Attribute(%s) == 1, "
+                            "Required shape rank of Attribute(%s) <= 1, "
                             "but received rank == %s",
                             var_desc->Name(),
                             shape.size()));
-      PADDLE_ENFORCE_EQ(shape[0] == 1U || shape[0] == -1,
-                        true,
-                        platform::errors::InvalidArgument(
-                            "Required shape[0] of Attribute(%s) == 1 or -1, "
-                            "but received shape[0] == %s",
-                            var_desc->Name(),
-                            shape[0]));
+      PADDLE_ENFORCE_EQ(
+          shape.size() == 0U || shape[0] == 1U || shape[0] == -1,
+          true,
+          platform::errors::InvalidArgument(
+              "Required shape is (), or shape[0] of Attribute(%s) == 1 or -1, "
+              "but received shape[0] == %s",
+              var_desc->Name(),
+              shape[0]));
     }
   }
 };
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index 2455eed34fe..0092bccba70 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -135,17 +135,18 @@ static PyObject* tensor_method_numpy(TensorObject* self,
       }
     }
     if (set_to_1d) {
-      // 0D Tensor hack process to 1D numpy, will remove in future
+      // 0D Tensor hack process to 1D numpy, will remove in release 2.6
       VLOG(0)
           << "Warning:: 0D Tensor cannot be used as 'Tensor.numpy()[0]' . In "
              "order to avoid this problem, "
              "0D Tensor will be changed to 1D numpy currently, but it's not "
              "correct and will be "
-             "removed in future. For Tensor contain only one element, Please "
+             "removed in release 2.6. For Tensor contain only one element, "
+             "Please "
              "modify "
              " 'Tensor.numpy()[0]' to 'float(Tensor)' as soon as "
              "possible, "
-             "otherwise 'Tensor.numpy()[0]' will raise error in future.";
+             "otherwise 'Tensor.numpy()[0]' will raise error in release 2.6.";
       py_rank = 1;
       py_dims[0] = 1;
       py_strides[0] = sizeof_dtype * numel;
@@ -922,39 +923,50 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self,
     }
   }
 
-  if (!none_axes.empty()) {
-    // Deal with cases when all axes are decreased.
-    // After slice, the shape of out is [1], which should have been
-    // [], but Paddle doesn't support scalar.
-    // In order to ensure the correctness of the final shape of out,
-    // one dimension of out needs to be decreased.
-    // For example:
-    // # x.shape: (2,3,4)
-    // out = x[0, 1, 1, None] # out.shape : (1)
+  bool set_to_1d = FLAGS_set_to_1d;
+
+  if (set_to_1d) {
+    // NOTE(zoooo0820): When all axes are decreased, the output will be 1-D
+    // with FLAGS_set_to_1d=True. In this case, one `None` should be pop out,
+    // otherwise the output shape will be not correct.
     if (static_cast<int>(decrease_axis.size()) == tensor->dims().size()) {
-      none_axes.pop_back();
+      VLOG(0)
+          << "Warning: In Tensor '__getitem__', if the number of scalar "
+             "elements "
+             "in the index is equal to the rank of the Tensor, the output "
+             "should "
+             "be 0-D. In order to be consistent with the behavior of previous "
+             "versions, it will be processed to 1-D. But it is not correct and "
+             "will be "
+             "removed in release 2.6. "
+             "If 1-D is still wanted, please modify the index element from "
+             "scalar to slice "
+             "(e.g. 'x[i]' => 'x[i:i+1]'). ";
+      if (!none_axes.empty()) {
+        none_axes.pop_back();
+      }
     }
-    if (!none_axes.empty()) {
-      paddle::Tensor new_out;
-      {
-        eager_gil_scoped_release guard;
-        // Deal with cases that decrease_axes is not empty
-        // For example:
-        // # x.shape: (2,3,4)
-        // out = x[0, 0:2, None] # out.shape : (2, 1, 4)
-        for (auto& axis : none_axes) {
-          int len = 0;
-          for (int da : decrease_axis) {
-            if (da < axis) {
-              len++;
-            }
+  }
+  if (!none_axes.empty()) {
+    paddle::Tensor new_out;
+    {
+      eager_gil_scoped_release guard;
+      // Deal with cases that decrease_axes is not empty
+      // For example:
+      // # x.shape: (2,3,4)
+      // out = x[0, 0:2, None] # out.shape : (2, 1, 4)
+      for (auto& axis : none_axes) {
+        int len = 0;
+        for (int da : decrease_axis) {
+          if (da < axis) {
+            len++;
           }
-          axis -= len;
         }
-        new_out = unsqueeze_ad_func(out, none_axes);
+        axis -= len;
       }
-      return ToPyObject(new_out);
+      new_out = unsqueeze_ad_func(out, none_axes);
     }
+    return ToPyObject(new_out);
   }
 
   // the index is a list
diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
index 1be8371ad4f..1440918eb48 100644
--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
@@ -63,6 +63,7 @@ limitations under the License. */
 #include "paddle/phi/core/compat/arg_map_context.h"
 #include "paddle/phi/core/type_defs.h"
 
+DECLARE_bool(set_to_1d);
 namespace paddle {
 namespace pybind {
 
@@ -1067,46 +1068,63 @@ void BindImperative(py::module *m_ptr) {
                }
                tracer->TraceOp(op_type, ins, outs, std::move(attrs));
              }
-             if (!none_axes.empty()) {
-               // Deal with cases when all axes are decreased.
-               // After slice, the shape of out is [1], which should have been
-               // [], but Paddle doesn't support scalar.
-               // In order to ensure the correctness of the final shape of out,
-               // one dimension of out needs to be decreased.
-               // For example:
-               // # x.shape: (2,3,4)
-               // out = x[0, 1, 1, None] # out.shape : (1)
+
+             bool set_to_1d = FLAGS_set_to_1d;
+
+             if (set_to_1d) {
+               // NOTE(zoooo0820): When all axes are decreased, the output
+               // will be 1-D with FLAGS_set_to_1d=True. In this case, one
+               // `None` should be pop out, otherwise the output shape will be
+               // not correct.
                if (static_cast<int>(decrease_axis.size()) ==
                    tensor->dims().size()) {
-                 none_axes.pop_back();
+                 VLOG(0) << "Warning: In Tensor '__getitem__', if the number "
+                            "of scalar "
+                            "elements "
+                            "in the index is equal to the rank of the Tensor, "
+                            "the output "
+                            "should "
+                            "be 0-D. In order to be consistent with the "
+                            "behavior of previous "
+                            "versions, it will be processed to 1-D. But it is "
+                            "not correct and "
+                            "will be "
+                            "removed in release 2.6. "
+                            "If 1-D is still wanted, please modify the index "
+                            "element from "
+                            "scalar to slice "
+                            "(e.g. 'x[i]' => 'x[i:i+1]'). ";
+                 if (!none_axes.empty()) {
+                   none_axes.pop_back();
+                 }
                }
-               if (!none_axes.empty()) {
-                 // Deal with cases that decrease_axes is not empty
-                 // For example:
-                 // # x.shape: (2,3,4)
-                 // out = x[0, 0:2, None] # out.shape : (2, 1, 4)
-                 for (auto &axis : none_axes) {
-                   int len = 0;
-                   for (int da : decrease_axis) {
-                     if (da < axis) {
-                       len++;
-                     }
+             }
+             if (!none_axes.empty()) {
+               // Deal with cases that decrease_axes is not empty
+               // For example:
+               // # x.shape: (2,3,4)
+               // out = x[0, 0:2, None] # out.shape : (2, 1, 4)
+               for (auto &axis : none_axes) {
+                 int len = 0;
+                 for (int da : decrease_axis) {
+                   if (da < axis) {
+                     len++;
                    }
-                   axis -= len;
                  }
-
-                 imperative::NameVarBaseMap ins = {{"X", {out}}};
-                 framework::AttributeMap attrs = {{"axes", none_axes}};
-                 auto new_out = std::shared_ptr<imperative::VarBase>(
-                     new imperative::VarBase(tracer->GenerateUniqueName()));
-                 auto out_xshape = std::shared_ptr<imperative::VarBase>(
-                     new imperative::VarBase(tracer->GenerateUniqueName()));
-                 imperative::NameVarBaseMap outs = {{"Out", {new_out}},
-                                                    {"XShape", {out_xshape}}};
-                 tracer->TraceOp("unsqueeze2", ins, outs, std::move(attrs));
-
-                 return new_out;
+                 axis -= len;
                }
+
+               imperative::NameVarBaseMap ins = {{"X", {out}}};
+               framework::AttributeMap attrs = {{"axes", none_axes}};
+               auto new_out = std::shared_ptr<imperative::VarBase>(
+                   new imperative::VarBase(tracer->GenerateUniqueName()));
+               auto out_xshape = std::shared_ptr<imperative::VarBase>(
+                   new imperative::VarBase(tracer->GenerateUniqueName()));
+               imperative::NameVarBaseMap outs = {{"Out", {new_out}},
+                                                  {"XShape", {out_xshape}}};
+               tracer->TraceOp("unsqueeze2", ins, outs, std::move(attrs));
+
+               return new_out;
              }
 
              // the index is a list
diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
index 14ae02246ba..dbb19380907 100644
--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -3918,9 +3918,6 @@ void StridedSliceRawInferMeta(const MetaTensor& x,
         new_out_shape.push_back(out_dims[i]);
       }
     }
-    if (new_out_shape.size() == 0) {
-      new_out_shape.push_back(1);
-    }
     out_dims = phi::make_ddim(new_out_shape);
   }
   VLOG(4) << "out_dims: " << out_dims;
diff --git a/paddle/phi/kernels/funcs/slice_utils.h b/paddle/phi/kernels/funcs/slice_utils.h
index a56a5e16f65..78fa4c4ba13 100644
--- a/paddle/phi/kernels/funcs/slice_utils.h
+++ b/paddle/phi/kernels/funcs/slice_utils.h
@@ -13,10 +13,13 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
+#include <glog/logging.h>
 #include <paddle/phi/core/ddim.h>
-
 #include <string>
 #include <vector>
+#include "paddle/phi/core/flags.h"
+
+DECLARE_bool(set_to_1d);
 
 namespace phi {
 
@@ -202,13 +205,11 @@ inline DDim GetDecreasedDims(const DDim slice_dims,
         new_shape.push_back(decreased_dims[i]);
       }
     }
-
-    // NOTE(liym27): Paddle does not support that the rank of Tensor is 0, and
-    // uses [1] instead.
-    if (new_shape.size() == 0) {
+    if (FLAGS_set_to_1d && new_shape.size() == 0) {
+      // NOTE(zoooo0820): Hack procssing to 1-D, when axes decrease to 0-D in
+      // slice. This will remove in release 2.6.
       new_shape.push_back(1);
     }
-
     decreased_dims = phi::make_ddim(new_shape);
   }
   return decreased_dims;
diff --git a/paddle/phi/kernels/xpu/set_value_grad_kernel.cc b/paddle/phi/kernels/xpu/set_value_grad_kernel.cc
index d7e1ed8114e..d80a2a97da8 100644
--- a/paddle/phi/kernels/xpu/set_value_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/set_value_grad_kernel.cc
@@ -266,6 +266,11 @@ void SetValueGradImpl(const Context& dev_ctx,
                   {fake_value_grad_dims.Get(), fake_value_grad_dims.size()},
                   static_cast<T>(0));
       auto value_grad_dims_vec = phi::vectorize<int64_t>(value_grad_dims);
+      // for value is a 0-D Tensor
+      if (value_grad_dims.size() == 0) {
+        value_grad_dims_vec =
+            phi::vectorize<int64_t>(phi::make_ddim(std::vector<int>({1})));
+      }
       for (auto offset : offsets) {
         for (int i = 0; i < out_dims_size; i++) {
           slice_end[i] = offset[i] + fake_value_grad_dims[i];
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_slice.py b/python/paddle/distributed/auto_parallel/operators/dist_slice.py
index 17e68002fa4..0110f54d481 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_slice.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_slice.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import paddle
+
 from ..utils import compute_compatible_dim_mapping, is_dim_shard
 from .common import (
     DistributedOperatorImpl,
@@ -70,9 +72,14 @@ class DistributedSliceImpl(DistributedOperatorImpl):
             if i not in decrease_axis:
                 ref_indices.append(i)
         if ref_indices == []:
-            assert len(out_dims_mapping) == 1
-            if is_dim_shard(out_dims_mapping[0]):
-                return False
+            # NOTE(zoooo0820): When all axes are decreased, the output will be 1-D
+            # with FLAGS_set_to_1d=True.
+            if paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']:
+                assert len(out_dims_mapping) == 1
+                if is_dim_shard(out_dims_mapping[0]):
+                    return False
+            else:
+                assert len(out_dims_mapping) == 0
         else:
             for i in range(len(out_dims_mapping)):
                 ref_index = ref_indices[i]
@@ -142,9 +149,12 @@ class DistributedSliceImpl(DistributedOperatorImpl):
                 ref_indices.append(i)
 
         if ref_dims_mapping == []:
-            ref_dims_mapping = [-1]
+            # NOTE(zoooo0820): When all axes are decreased, the output will be 1-D
+            # with FLAGS_set_to_1d=True.
+            if paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']:
+                ref_dims_mapping = [-1]
+                assert ref_dims_mapping[0] == out_dims_mapping[0]
             assert len(ref_dims_mapping) == len(out_dims_mapping)
-            assert ref_dims_mapping[0] == out_dims_mapping[0]
             changed = False
         else:
             assert len(ref_dims_mapping) == len(out_dims_mapping)
diff --git a/python/paddle/fft.py b/python/paddle/fft.py
index 1ce18f120c1..438c65ae2f0 100644
--- a/python/paddle/fft.py
+++ b/python/paddle/fft.py
@@ -1371,7 +1371,7 @@ def fftshift(x, axes=None, name=None):
     elif isinstance(axes, int):
         shifts = shape[axes] // 2
     else:
-        shifts = paddle.concat([shape[ax] // 2 for ax in axes])
+        shifts = paddle.concat([shape[ax : ax + 1] // 2 for ax in axes])
     return paddle.roll(x, shifts, axes, name=name)
 
 
@@ -1416,7 +1416,7 @@ def ifftshift(x, axes=None, name=None):
     elif isinstance(axes, int):
         shifts = -shape[axes] // 2
     else:
-        shifts = paddle.concat([-shape[ax] // 2 for ax in axes])
+        shifts = paddle.concat([-shape[ax : ax + 1] // 2 for ax in axes])
     return paddle.roll(x, shifts, axes, name=name)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py b/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py
index effcfece0f5..58059a29553 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py
@@ -43,7 +43,7 @@ class TestImperativeNumpyBridge(unittest.TestCase):
             np.testing.assert_array_equal(var2.numpy(), data_np)
             data_np[0][0] = -1
             self.assertEqual(data_np[0][0], -1)
-            self.assertNotEqual(var2[0][0].numpy()[0], -1)
+            self.assertNotEqual(var2[0][0].numpy(), -1)
             self.assertFalse(np.array_equal(var2.numpy(), data_np))
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py
index 66389a870e4..0bf3d8e9480 100644
--- a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py
+++ b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py
@@ -140,16 +140,16 @@ class TestKthvalueOpWithNaN(unittest.TestCase):
             nan_position = 100
             self.x[0, nan_position, 2] = float('nan')
             v, inds = self.x.kthvalue(k=200, axis=1)
-            self.assertTrue(np.isnan(v[0, 2].numpy()[0]))
-            self.assertEqual(inds[0, 2].numpy()[0], nan_position)
+            self.assertTrue(np.isnan(v[0, 2].numpy()))
+            self.assertEqual(inds[0, 2].numpy(), nan_position)
 
         def test_nan_in_gpu_kernel():
             paddle.set_device('gpu')
             nan_position = 100
             self.x[0, nan_position, 2] = float('nan')
             v, inds = self.x.kthvalue(k=200, axis=1)
-            self.assertTrue(np.isnan(v[0, 2].numpy()[0]))
-            self.assertEqual(inds[0, 2].numpy()[0], nan_position)
+            self.assertTrue(np.isnan(v[0, 2].numpy()))
+            self.assertEqual(inds[0, 2].numpy(), nan_position)
 
         test_nan_in_cpu_kernel()
         if fluid.core.is_compiled_with_cuda():
diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py
index 9c5a71df018..66d4b8f05b7 100644
--- a/python/paddle/fluid/tests/unittests/test_set_value_op.py
+++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py
@@ -1590,7 +1590,7 @@ class TestSetValueInplace(unittest.TestCase):
             a.stop_gradient = False
             b = a[:]
             c = b
-            b[paddle.to_tensor(0)] = 1.0
+            b[paddle.zeros([], dtype='int32')] = 1.0
 
             self.assertTrue(id(b) == id(c))
             np.testing.assert_array_equal(b.numpy(), c.numpy())
diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py
index 0314a37170d..f43bd4b140d 100644
--- a/python/paddle/fluid/tests/unittests/test_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_slice_op.py
@@ -541,8 +541,8 @@ class TestSliceAPI(unittest.TestCase):
     def test_1(self):
         with paddle_static_guard():
             input = np.random.random([3, 4, 5, 6]).astype("float64")
-            minus_1 = paddle.tensor.fill_constant([1], "int32", -1)
-            minus_3 = paddle.tensor.fill_constant([1], "int64", -3)
+            minus_1 = paddle.tensor.fill_constant([], "int32", -1)
+            minus_3 = paddle.tensor.fill_constant([], "int64", -3)
             starts = paddle.static.data(
                 name='starts', shape=[1, 3], dtype="float32"
             )
diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py
index 24920eb375c..c9607f89197 100644
--- a/python/paddle/fluid/tests/unittests/test_var_base.py
+++ b/python/paddle/fluid/tests/unittests/test_var_base.py
@@ -604,8 +604,7 @@ class TestVarBase(unittest.TestCase):
 
         nw = w[1, 1, 1]
 
-        self.assertEqual(len(nw.shape), 1)
-        self.assertEqual(nw.shape[0], 1)
+        self.assertEqual(len(nw.shape), 0)
 
         nw = w[:, :, :-1]
         self.assertEqual((784, 100, 99), tuple(nw.shape))
@@ -705,10 +704,10 @@ class TestVarBase(unittest.TestCase):
 
         var = paddle.to_tensor(tensor_array)
 
-        one = paddle.ones(shape=[1], dtype="int32")
-        two = paddle.full(shape=[1], fill_value=2, dtype="int32")
-        negative_one = paddle.full(shape=[1], fill_value=-1, dtype="int32")
-        four = paddle.full(shape=[1], fill_value=4, dtype="int32")
+        one = paddle.ones(shape=[], dtype="int32")
+        two = paddle.full(shape=[], fill_value=2, dtype="int32")
+        negative_one = paddle.full(shape=[], fill_value=-1, dtype="int32")
+        four = paddle.full(shape=[], fill_value=4, dtype="int32")
 
         var = fluid.dygraph.to_variable(tensor_array)
         var1 = var[0, one, one]
diff --git a/python/paddle/fluid/tests/unittests/test_variable.py b/python/paddle/fluid/tests/unittests/test_variable.py
index b709510371e..6d5bd96f9ac 100644
--- a/python/paddle/fluid/tests/unittests/test_variable.py
+++ b/python/paddle/fluid/tests/unittests/test_variable.py
@@ -132,8 +132,7 @@ class TestVariable(unittest.TestCase):
 
         nw = w[1, 1, 1]
 
-        self.assertEqual(len(nw.shape), 1)
-        self.assertEqual(nw.shape[0], 1)
+        self.assertEqual(len(nw.shape), 0)
 
         nw = w[:, :, :-1]
         self.assertEqual((784, 100, 99), nw.shape)
diff --git a/python/paddle/fluid/tests/unittests/test_while_op.py b/python/paddle/fluid/tests/unittests/test_while_op.py
index ea6d2d49453..8ae9fa8c5c2 100644
--- a/python/paddle/fluid/tests/unittests/test_while_op.py
+++ b/python/paddle/fluid/tests/unittests/test_while_op.py
@@ -192,9 +192,9 @@ class TestOutputsMustExistsInputs(unittest.TestCase):
         with fluid.program_guard(main_program, startup_program):
 
             def func(x):
-                s = paddle.zeros([1])
-                i = paddle.ones([1])
-                max_len = paddle.shape(x)[0]
+                s = paddle.zeros([])
+                i = paddle.ones([])
+                max_len = paddle.shape(x)
 
                 def cond(i, s, x):
                     return i < max_len
diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
index f3bea4cf246..8d824a68298 100644
--- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
@@ -666,6 +666,140 @@ class TestSundryAPI(unittest.TestCase):
         self.assertEqual(zero_dim_var.shape, [])
         self.assertEqual(zero_dim_var.item(), 0.5)
 
+    def test_getitem(self):
+        # case1: When all axis have a scalar indice, output should be a 0-d Tensor;
+        x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
+        x.stop_gradient = False
+        out = x[1, 2, 3, 4]
+        out.retain_grads()
+        out.backward()
+        self.assertEqual(out.shape, [])
+        np.testing.assert_allclose(out, np.array(119))
+        self.assertEqual(out.grad.shape, [])
+        np.testing.assert_allclose(out.grad, 1.0)
+        self.assertEqual(x.grad.shape, [2, 3, 4, 5])
+        x_grad_expected = np.zeros((2, 3, 4, 5))
+        x_grad_expected[1, 2, 3, 4] = 1.0
+        np.testing.assert_allclose(x.grad, x_grad_expected)
+
+        # case2: When one axis has a 0-d Tensor indice, the output should be same as int indice.
+        x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
+        out1 = x[1, 2]
+        out2 = x[
+            paddle.full([], 1, dtype='int32'), paddle.full([], 2, dtype='int32')
+        ]
+        np.testing.assert_allclose(out1, out2)
+
+        # case3: When all axis have a scalar indice (i.e. case1) and has None indice,
+        # ndim of output should be same with numbers of None.
+        x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
+        out1 = x[1, 2, None, 3, 4]
+        self.assertEqual(out1.shape, [1])
+        np.testing.assert_allclose(out1, np.array([119]))
+        out2 = x[1, None, 2, None, 3, 4]
+        self.assertEqual(out2.shape, [1, 1])
+        np.testing.assert_allclose(out2, np.array([[119]]))
+
+        # case4: 1-D Tensor will be treated as vector, no axis decrease will happen.
+        x = paddle.ones((2, 3, 4))
+        indice = paddle.ones([1], dtype='int32')
+        out1 = x[indice]
+        self.assertEqual(out1.shape, [1, 3, 4])
+        np.testing.assert_allclose(out1, np.ones((1, 3, 4)))
+        out2 = x[indice, indice]
+        self.assertEqual(out2.shape, [1, 4])
+        np.testing.assert_allclose(out2, np.ones((1, 4)))
+
+    def test_setitem(self):
+        # case1: all axis have a scalar indice
+        x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
+        x.stop_gradient = False
+        out = x * 2
+        out[1, 2, 3, 4] = 10
+        out.backward()
+
+        self.assertEqual(out.shape, x.shape)
+        np.testing.assert_allclose(out[1, 2, 3, 4], np.array(10))
+        self.assertEqual(x.grad.shape, [2, 3, 4, 5])
+        x_grad_expected = np.ones((2, 3, 4, 5)) * 2
+        x_grad_expected[1, 2, 3, 4] = 0
+        np.testing.assert_allclose(x.grad, x_grad_expected)
+
+        # case2: 0-D Tensor indice in some axis
+        # NOTE(zoooo0820): Now, int/slice with 0-D Tensor will still be
+        # treated as combined indexing, which is not support backward.
+        # There should have more test cases such as out[1, indice, :] = 0.5 when this
+        # problem is fixed.
+        x = paddle.randn((2, 3, 4, 5))
+        x.stop_gradient = False
+        indice = paddle.full([], 1, dtype='int32')
+        out = x * 1
+        out[indice, indice] = 0.5
+        out.backward()
+
+        self.assertEqual(out.shape, x.shape)
+        np.testing.assert_allclose(out[1, 1], np.ones((4, 5)) * 0.5)
+        x_grad_expected = np.ones((2, 3, 4, 5))
+        x_grad_expected[1, 1] = 0
+        np.testing.assert_allclose(x.grad, x_grad_expected)
+
+        # case3：0-D Tensor indice in some axis, value is a Tensor
+        # and there is broadcast
+        x = paddle.randn((2, 3, 4, 5))
+        x.stop_gradient = False
+        v = paddle.ones((4, 5), dtype='float32') * 5
+        v.stop_gradient = False
+        indice = paddle.full([], 1, dtype='int32')
+        out = x * 1
+        out[indice] = v
+        out.backward()
+
+        self.assertEqual(out.shape, x.shape)
+        np.testing.assert_allclose(out[1], np.ones((3, 4, 5)) * 5)
+        x_grad_expected = np.ones((2, 3, 4, 5))
+        x_grad_expected[1] = 0
+        np.testing.assert_allclose(x.grad, x_grad_expected)
+        value_grad_expected = np.ones((4, 5)) * 3
+        np.testing.assert_allclose(v.grad, value_grad_expected)
+
+        # case4: value is a 0-D tensor and there is broadcast
+        x = paddle.randn((2, 3, 4, 5))
+        x.stop_gradient = False
+        v = paddle.ones([], dtype='float32') * 5
+        v.stop_gradient = False
+        out = x * 1
+        indice = paddle.full([], 0, dtype='int32')
+        out[indice] = v
+        out.backward()
+
+        self.assertEqual(out.shape, x.shape)
+        self.assertEqual(v.grad.shape, [])
+        np.testing.assert_allclose(out[0], np.ones((3, 4, 5)) * 5)
+        x_grad_expected = np.ones((2, 3, 4, 5))
+        x_grad_expected[0] = 0
+        np.testing.assert_allclose(x.grad, x_grad_expected)
+        value_grad_expected = np.ones(()) * 3 * 4 * 5
+        np.testing.assert_allclose(v.grad, value_grad_expected)
+
+        # case5: indice / value is 0-D Tensor, and there is no broadcast
+        x = paddle.randn((2, 3, 4, 5))
+        x.stop_gradient = False
+        v = paddle.ones([], dtype='float32') * 2
+        v.stop_gradient = False
+        out = x * 1
+        indice = paddle.full([], 0, dtype='int32')
+        out[indice, indice, indice, indice] = v
+        out.backward()
+
+        self.assertEqual(out.shape, x.shape)
+        self.assertEqual(v.grad.shape, [])
+        np.testing.assert_allclose(out[0, 0, 0, 0], np.ones(()) * 2)
+        x_grad_expected = np.ones((2, 3, 4, 5))
+        x_grad_expected[0, 0, 0, 0] = 0
+        np.testing.assert_allclose(x.grad, x_grad_expected)
+        value_grad_expected = np.ones(())
+        np.testing.assert_allclose(v.grad, value_grad_expected)
+
     def test_expand(self):
         # case1
         x = paddle.full([], 1, 'float32')
@@ -2456,6 +2590,118 @@ class TestSundryAPIStatic(unittest.TestCase):
         self.assertEqual(res[0].shape, ())
         self.assertEqual(res[0], 0.5)
 
+    @prog_scope()
+    def test_getitem(self):
+        # case1: When all axis have a scalar indice, output should be a 0-d Tensor;
+        x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
+        x.stop_gradient = False
+        out = x[1, 2, 3, 4]
+        paddle.static.append_backward(out.sum())
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out, x.grad_name, out.grad_name])
+
+        self.assertEqual(res[0].shape, ())
+        np.testing.assert_allclose(res[0], np.array(119))
+        self.assertEqual(res[2].shape, ())
+        np.testing.assert_allclose(res[2], 1.0)
+        self.assertEqual(res[1].shape, (2, 3, 4, 5))
+        x_grad_expected = np.zeros((2, 3, 4, 5))
+        x_grad_expected[1, 2, 3, 4] = 1.0
+        np.testing.assert_allclose(res[1], x_grad_expected)
+
+        # case2: When one axis has a 0-d Tensor indice, the output should be same as int indice.
+        x2 = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
+        out1 = x2[1, 2]
+        out2 = x2[
+            paddle.full([], 1, dtype='int32'), paddle.full([], 2, dtype='int32')
+        ]
+        res = self.exe.run(prog, fetch_list=[out1, out2])
+        np.testing.assert_allclose(res[0], res[1])
+
+        # case3: When all axis have a scalar indice (i.e. case1) and has None indice,
+        # ndim of output should be same with numbers of None.
+        x3 = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
+        out3 = x3[1, 2, None, 3, 4]
+        out4 = x3[1, None, 2, None, 3, 4]
+        res = self.exe.run(prog, fetch_list=[out3, out4])
+        self.assertEqual(res[0].shape, (1,))
+        np.testing.assert_allclose(res[0], np.array([119]))
+        self.assertEqual(res[1].shape, (1, 1))
+        np.testing.assert_allclose(res[1], np.array([[119]]))
+
+        # case4: 1-D Tensor will be treated as vector, no axis decrease will happen.
+        x4 = paddle.ones((2, 3, 4))
+        indice = paddle.ones([1], dtype='int32')
+        out5 = x4[indice]
+        out6 = x4[indice, indice]
+        res = self.exe.run(prog, fetch_list=[out5, out6])
+
+        self.assertEqual(res[0].shape, (1, 3, 4))
+        np.testing.assert_allclose(res[0], np.ones((1, 3, 4)))
+        self.assertEqual(res[1].shape, (1, 4))
+        np.testing.assert_allclose(res[1], np.ones((1, 4)))
+
+    @prog_scope()
+    def test_setitem(self):
+        # NOTE(zoooo0820): __setitem__ has gradient problem in static graph.
+        # To solve this, we may not support __setitem__ in static graph.
+        # These unit tests will delete soon.
+
+        # case1: all axis have a scalar indice
+        x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
+        x.stop_gradient = False
+        out = x * 2
+        out[1, 2, 3, 4] = 10
+        paddle.static.append_backward(out.sum())
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out, x.grad_name])
+
+        self.assertEqual(out.shape, x.shape)
+        np.testing.assert_allclose(res[0][1, 2, 3, 4], np.array(10))
+        self.assertEqual(res[1].shape, (2, 3, 4, 5))
+        x_grad_expected = np.ones((2, 3, 4, 5)) * 2
+        x_grad_expected[1, 2, 3, 4] = 0
+        np.testing.assert_allclose(res[1], x_grad_expected)
+
+        # case2: 0-D Tensor indice in some axis
+        # NOTE(zoooo0820): Now, int/slice with 0-D Tensor will still be
+        # treated as combined indexing, which is not support backward.
+        # There should have more test cases such as out[1, indice, :] = 0.5 when this
+        # problem is fixed.
+        x = paddle.randn((2, 3, 4, 5))
+        x.stop_gradient = False
+        indice = paddle.full([], 1, dtype='int32')
+        out = x * 1
+        out[indice, indice] = 0.5
+        paddle.static.append_backward(out.sum())
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out, x.grad_name])
+
+        self.assertEqual(out.shape, x.shape)
+        np.testing.assert_allclose(res[0][1, 1], np.ones((4, 5)) * 0.5)
+        x_grad_expected = np.ones((2, 3, 4, 5))
+        x_grad_expected[1, 1] = 0
+        np.testing.assert_allclose(res[1], x_grad_expected)
+
+        # case3：0-D Tensor indice in some axis, value is a Tensor
+        # and there is broadcast
+        x = paddle.randn((2, 3, 4, 5))
+        x.stop_gradient = False
+        v = paddle.ones((4, 5), dtype='float32') * 5
+        v.stop_gradient = False
+        indice = paddle.full([], 1, dtype='int32')
+        out = x * 1
+        out[indice] = v
+        paddle.static.append_backward(out.sum())
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out, x.grad_name, v.grad_name])
+
+        self.assertEqual(out.shape, x.shape)
+        np.testing.assert_allclose(res[0][1], np.ones((3, 4, 5)) * 5)
+        x_grad_expected = np.ones((2, 3, 4, 5))
+        x_grad_expected[1] = 0
+        np.testing.assert_allclose(res[1], x_grad_expected)
+
     @prog_scope()
     def test_expand(self):
         x = paddle.full([], 1, 'float32')
diff --git a/python/paddle/fluid/variable_index.py b/python/paddle/fluid/variable_index.py
index 0d866860b31..451e8c3bf98 100644
--- a/python/paddle/fluid/variable_index.py
+++ b/python/paddle/fluid/variable_index.py
@@ -17,6 +17,7 @@ import numpy as np
 from . import unique_name
 from . import core
 import paddle
+import warnings
 
 MAX_INTEGER = 2**31 - 1
 
@@ -185,7 +186,8 @@ class SliceInfo:
 
         for i in range(len(gather_tensor_shape)):
             if not (
-                value_dims_bd[i] == gather_tensor_shape[i]
+                len(value_dims_bd) == 0
+                or value_dims_bd[i] == gather_tensor_shape[i]
                 or value_dims_bd[i] == 1
             ):
                 raise ValueError(
@@ -282,7 +284,16 @@ def is_integer_or_scalar_tensor(ele):
     if isinstance(ele, int):
         return True
     elif isinstance(ele, Variable):
-        if len(ele.shape) == 1 and ele.shape[0] == 1:
+        # NOTE(zoooo0820): For compatibility, if FLAGS_set_to_1d is set to True,
+        # 1-D tensor is still treated as a scalar, which means basic indexing.
+        # This will be removed in future.
+        if paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']:
+            if len(ele.shape) == 1 and ele.shape[0] == 1:
+                warnings.warn(
+                    "1-D Tensor will be treat as advanced indexing in future version. Currently, 1-D Tensor means a scalar, not vector, and please modify it to 0-D Tensor. If advanced indexing is needed, please use `export FLAGS_set_to_1d=False` to set the flag."
+                )
+                return True
+        if len(ele.shape) == 0:
             return True
     return False
 
@@ -573,13 +584,14 @@ def _getitem_impl_(var, item):
 
         out = reverse(out, axis=reverse_axes)
 
-    # Deal with cases when all axes are decreased.
-    # After slice, the shape of out is [1], which should have been [], but Paddle doesn't support scalar.
-    # In order to ensure the correctness of the final shape of out, one dimension of out needs to be decreased.
-    # For example:
-    # # x.shape: (2,3,4)
-    # out = x[0, 1, 1, None] # out.shape : (1)
-    if len(decrease_axes) == len(var.shape):
+    # NOTE(zoooo0820): When all axes are decreased, the output will be 1-D
+    # with FLAGS_set_to_1d=True. In this case, one `None` should be pop out,
+    # otherwise the output shape will be not correct.
+    set_to_1d = paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']
+    if set_to_1d and len(decrease_axes) == len(var.shape):
+        warnings.warn(
+            "Warning: In Tensor '__getitem__', if the number of scalar elements in the index is equal to the rank of the Tensor, the output should be 0-D. In order to be consistent with the behavior of previous versions, it will be processed to 1-D. But it is not correct and will be removed in release 2.6. If 1-D is still wanted, please modify the index element from scalar to slice (e.g. 'x[i]' => 'x[i:i+1]')."
+        )
         none_axes = none_axes[1:]
 
     if len(none_axes) > 0:
@@ -592,13 +604,6 @@ def _getitem_impl_(var, item):
             new_axis = axis - l
             none_axes[idx] = new_axis
 
-        # Deal with cases when all axes are decreased.
-        # After slice, the shape of out is [1], which should have been [], but Paddle doesn't support scalar.
-        # In order to ensure the correctness of the final shape of out, one dimension of out needs to be decreased.
-        # For example:
-        # # x.shape: (2,3,4)
-        # out = x[0, 1, 1, None] # out.shape : (1)
-
         from ..tensor import unsqueeze
 
         out = unsqueeze(out, axis=none_axes)
diff --git a/python/paddle/incubate/optimizer/functional/lbfgs.py b/python/paddle/incubate/optimizer/functional/lbfgs.py
index a7221f0925e..e3620c4ffc0 100644
--- a/python/paddle/incubate/optimizer/functional/lbfgs.py
+++ b/python/paddle/incubate/optimizer/functional/lbfgs.py
@@ -125,9 +125,7 @@ def minimize_lbfgs(
     is_converge = paddle.full(shape=[1], fill_value=False, dtype='bool')
     num_func_calls = paddle.full(shape=[1], fill_value=1, dtype='int64')
 
-    history_size = paddle.full(
-        shape=[1], fill_value=history_size, dtype='int64'
-    )
+    history_size = paddle.full(shape=[], fill_value=history_size, dtype='int64')
     head = paddle.full(shape=[1], fill_value=1, dtype='int64')
     tail = paddle.full(shape=[1], fill_value=0, dtype='int64')
 
@@ -177,7 +175,7 @@ def minimize_lbfgs(
         q = paddle.assign(g1)
         # In a array circle, the index may out of range, so must use mod.
         i = paddle.full(
-            shape=[1], fill_value=(head - 1).mod(history_size), dtype='int64'
+            shape=[], fill_value=(head - 1).mod(history_size), dtype='int64'
         )
 
         def cond(i, q):
@@ -193,7 +191,7 @@ def minimize_lbfgs(
 
         r = paddle.matmul(H0, q)
 
-        i = paddle.full(shape=[1], fill_value=tail + 1, dtype='int64')
+        i = paddle.full(shape=[], fill_value=tail + 1, dtype='int64')
 
         def cond(i, r):
             return i != head
diff --git a/python/paddle/jit/dy2static/variable_trans_func.py b/python/paddle/jit/dy2static/variable_trans_func.py
index 20f0fb6317e..80c4487dc29 100644
--- a/python/paddle/jit/dy2static/variable_trans_func.py
+++ b/python/paddle/jit/dy2static/variable_trans_func.py
@@ -51,11 +51,11 @@ def to_static_variable(x):
     Translate a Python Tensor to PaddlePaddle static graph Tensor
     '''
     if isinstance(x, bool):
-        return paddle.full(shape=[1], dtype='bool', fill_value=x)
+        return paddle.full(shape=[], dtype='bool', fill_value=x)
     if isinstance(x, float):
-        return paddle.full(shape=[1], dtype='float64', fill_value=x)
+        return paddle.full(shape=[], dtype='float64', fill_value=x)
     if isinstance(x, int):
-        return paddle.full(shape=[1], dtype='int64', fill_value=x)
+        return paddle.full(shape=[], dtype='int64', fill_value=x)
     if isinstance(x, UndefinedVar) or x is None:
         """
         for early return case, we need a variable to represent None, current we use data_layer_not_check.
diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py
index ffd27a545b9..cc8ab648b88 100644
--- a/python/paddle/nn/layer/rnn.py
+++ b/python/paddle/nn/layer/rnn.py
@@ -271,7 +271,7 @@ def _rnn_static_graph(
         mask = paddle.reverse(mask, axis=[0]) if sequence_length else None
 
     with paddle.fluid.framework.device_guard("cpu"):
-        start_i = paddle.zeros([1], dtype="int64")
+        start_i = paddle.zeros([], dtype="int64")
         end = max_seq_len
 
         end = paddle.cast(end, "int64")
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index c2b4ec71263..2b42d795baf 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -3169,19 +3169,19 @@ def tile(x, repeat_times, name=None):
         )
         if isinstance(repeat_times, Variable):
             assert (
-                len(repeat_times.shape) == 1
-            ), 'repeat_times must be an 1-D Tensor.'
+                repeat_times.numel() == 1
+            ), 'repeat_times must be a Tensor with one element.'
         else:
             for elem in repeat_times:
                 if isinstance(elem, Variable):
                     assert (
-                        len(elem.shape) == 1
-                    ), 'Elements in repeat_times must be 1-D Tensors or integers.'
+                        elem.numel() == 1
+                    ), 'Elements in repeat_times must be Tensor with one element or integers.'
                 else:
                     type_tuple = (int, np.int32, np.int64)
                     assert isinstance(
                         elem, type_tuple
-                    ), 'Elements in repeat_times must be 1-D Tensors or integers.'
+                    ), 'Elements in repeat_times must be Tensor with one element or integers.'
 
         check_variable_and_dtype(
             x,
@@ -3425,18 +3425,18 @@ def expand(x, shape, name=None):
         return _C_ops.expand(x, shape)
     else:
         if isinstance(shape, Variable):
-            assert len(shape.shape) == 1, 'shape must be an 1-D Tensor.'
+            assert shape.numel() == 1, 'shape must be a Tensor with one element'
         else:
             for elem in shape:
                 if isinstance(elem, Variable):
                     assert (
-                        len(elem.shape) == 1
-                    ), 'Elements in shape must be 1-D Tensors or integers.'
+                        elem.numel() == 1
+                    ), 'Elements in shape must be Tensor with one element or integers.'
                 else:
                     type_tuple = (int, np.int32, np.int64)
                     assert isinstance(
                         elem, type_tuple
-                    ), 'Elements in shape must be 1-D Tensors or integers.'
+                    ), 'Elements in shape must be Tensor with one element or integers.'
 
         check_variable_and_dtype(
             x,
diff --git a/test/dygraph_to_static/test_list.py b/test/dygraph_to_static/test_list.py
index 44e02950bc5..091d261ed74 100644
--- a/test/dygraph_to_static/test_list.py
+++ b/test/dygraph_to_static/test_list.py
@@ -364,7 +364,7 @@ class TestListWithCondGradInferVarType(unittest.TestCase):
         x = paddle.to_tensor([2, 3, 4], dtype='float32')
         index = paddle.to_tensor([1])
         res = net(x, index)
-        self.assertEqual(res[0], 48.0)
+        self.assertEqual(res, 48.0)
 
 
 if __name__ == '__main__':
diff --git a/test/xpu/test_set_value_op_xpu.py b/test/xpu/test_set_value_op_xpu.py
index e749eb8bc1b..a373d6a0ba5 100644
--- a/test/xpu/test_set_value_op_xpu.py
+++ b/test/xpu/test_set_value_op_xpu.py
@@ -1432,7 +1432,7 @@ class XPUTestSetValueOp(XPUOpTestWrapper):
                 a.stop_gradient = False
                 b = a[:]
                 c = b
-                b[paddle.to_tensor(0)] = 1.0
+                b[paddle.zeros([], dtype='int32')] = 1.0
 
                 self.assertTrue(id(b) == id(c))
                 np.testing.assert_array_equal(b.numpy(), c.numpy())
diff --git a/test/xpu/test_slice_op_xpu.py b/test/xpu/test_slice_op_xpu.py
index f19c3d37e28..7cc0550740e 100644
--- a/test/xpu/test_slice_op_xpu.py
+++ b/test/xpu/test_slice_op_xpu.py
@@ -166,7 +166,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper):
             self.starts = [0, 1, 2, 3]
             self.ends = [1, 2, 3, 4]
             self.axes = [0, 1, 2, 3]
-            self.decrease_axis = [0, 1, 2, 3]
+            self.decrease_axis = [0, 1, 2]
             self.infer_flags = [1, 1, 1]
             self.out = self.input[0, 1, 2, 3:4]
 
@@ -188,7 +188,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper):
             self.axes = [0, 1, 2, 3]
             self.decrease_axis = [0, 1, 2, 3]
             self.infer_flags = [1, 1, 1]
-            self.out = self.input[0, 1, 2, 3:4]
+            self.out = self.input[0, 1, 2, 3]
 
 
 support_types = get_xpu_op_support_types('slice')
diff --git a/test/xpu/test_zero_dim_tensor_xpu.py b/test/xpu/test_zero_dim_tensor_xpu.py
index 1a9f59040d5..9ecce0af830 100644
--- a/test/xpu/test_zero_dim_tensor_xpu.py
+++ b/test/xpu/test_zero_dim_tensor_xpu.py
@@ -344,6 +344,140 @@ class TestSundryAPI(unittest.TestCase):
         paddle.disable_static()
         self.x = paddle.rand([])
 
+    def test_getitem(self):
+        # case1: When all axis have a scalar indice, output should be a 0-d Tensor;
+        x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
+        x.stop_gradient = False
+        out = x[1, 2, 3, 4]
+        out.retain_grads()
+        out.backward()
+        self.assertEqual(out.shape, [])
+        np.testing.assert_allclose(out, np.array(119))
+        self.assertEqual(out.grad.shape, [])
+        np.testing.assert_allclose(out.grad, 1.0)
+        self.assertEqual(x.grad.shape, [2, 3, 4, 5])
+        x_grad_expected = np.zeros((2, 3, 4, 5))
+        x_grad_expected[1, 2, 3, 4] = 1.0
+        np.testing.assert_allclose(x.grad, x_grad_expected)
+
+        # case2: When one axis has a 0-d Tensor indice, the output should be same as int indice.
+        x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
+        out1 = x[1, 2]
+        out2 = x[
+            paddle.full([], 1, dtype='int32'), paddle.full([], 2, dtype='int32')
+        ]
+        np.testing.assert_allclose(out1, out2)
+
+        # case3: When all axis have a scalar indice (i.e. case1) and has None indice,
+        # ndim of output should be same with numbers of None.
+        x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
+        out1 = x[1, 2, None, 3, 4]
+        self.assertEqual(out1.shape, [1])
+        np.testing.assert_allclose(out1, np.array([119]))
+        out2 = x[1, None, 2, None, 3, 4]
+        self.assertEqual(out2.shape, [1, 1])
+        np.testing.assert_allclose(out2, np.array([[119]]))
+
+        # case4: 1-D Tensor will be treated as vector, no axis decrease will happen.
+        x = paddle.ones((2, 3, 4))
+        indice = paddle.ones([1], dtype='int32')
+        out1 = x[indice]
+        self.assertEqual(out1.shape, [1, 3, 4])
+        np.testing.assert_allclose(out1, np.ones((1, 3, 4)))
+        out2 = x[indice, indice]
+        self.assertEqual(out2.shape, [1, 4])
+        np.testing.assert_allclose(out2, np.ones((1, 4)))
+
+    def test_setitem(self):
+        # case1: all axis have a scalar indice
+        x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
+        x.stop_gradient = False
+        out = x * 2
+        out[1, 2, 3, 4] = 10
+        out.backward()
+
+        self.assertEqual(out.shape, x.shape)
+        np.testing.assert_allclose(out[1, 2, 3, 4], np.array(10))
+        self.assertEqual(x.grad.shape, [2, 3, 4, 5])
+        x_grad_expected = np.ones((2, 3, 4, 5)) * 2
+        x_grad_expected[1, 2, 3, 4] = 0
+        np.testing.assert_allclose(x.grad, x_grad_expected)
+
+        # case2: 0-D Tensor indice in some axis
+        # NOTE(zoooo0820): Now, int/slice with 0-D Tensor will still be
+        # treated as combined indexing, which is not support backward.
+        # There should have more test cases such as out[1, indice, :] = 0.5 when this
+        # problem is fixed.
+        x = paddle.randn((2, 3, 4, 5))
+        x.stop_gradient = False
+        indice = paddle.full([], 1, dtype='int32')
+        out = x * 1
+        out[indice, indice] = 0.5
+        out.backward()
+
+        self.assertEqual(out.shape, x.shape)
+        np.testing.assert_allclose(out[1, 1], np.ones((4, 5)) * 0.5)
+        x_grad_expected = np.ones((2, 3, 4, 5))
+        x_grad_expected[1, 1] = 0
+        np.testing.assert_allclose(x.grad, x_grad_expected)
+
+        # case3：0-D Tensor indice in some axis, value is a Tensor
+        # and there is broadcast
+        x = paddle.randn((2, 3, 4, 5))
+        x.stop_gradient = False
+        v = paddle.ones((4, 5), dtype='float32') * 5
+        v.stop_gradient = False
+        indice = paddle.full([], 1, dtype='int32')
+        out = x * 1
+        out[indice] = v
+        out.backward()
+
+        self.assertEqual(out.shape, x.shape)
+        np.testing.assert_allclose(out[1], np.ones((3, 4, 5)) * 5)
+        x_grad_expected = np.ones((2, 3, 4, 5))
+        x_grad_expected[1] = 0
+        np.testing.assert_allclose(x.grad, x_grad_expected)
+        value_grad_expected = np.ones((4, 5)) * 3
+        np.testing.assert_allclose(v.grad, value_grad_expected)
+
+        # case4: value is a 0-D tensor and there is broadcast
+        x = paddle.randn((2, 3, 4, 5))
+        x.stop_gradient = False
+        v = paddle.ones([], dtype='float32') * 5
+        v.stop_gradient = False
+        out = x * 1
+        indice = paddle.full([], 0, dtype='int32')
+        out[indice] = v
+        out.backward()
+
+        self.assertEqual(out.shape, x.shape)
+        self.assertEqual(v.grad.shape, [])
+        np.testing.assert_allclose(out[0], np.ones((3, 4, 5)) * 5)
+        x_grad_expected = np.ones((2, 3, 4, 5))
+        x_grad_expected[0] = 0
+        np.testing.assert_allclose(x.grad, x_grad_expected)
+        value_grad_expected = np.ones(()) * 3 * 4 * 5
+        np.testing.assert_allclose(v.grad, value_grad_expected)
+
+        # case5: indice / value is 0-D Tensor, and there is no broadcast
+        x = paddle.randn((2, 3, 4, 5))
+        x.stop_gradient = False
+        v = paddle.ones([], dtype='float32') * 2
+        v.stop_gradient = False
+        out = x * 1
+        indice = paddle.full([], 0, dtype='int32')
+        out[indice, indice, indice, indice] = v
+        out.backward()
+
+        self.assertEqual(out.shape, x.shape)
+        self.assertEqual(v.grad.shape, [])
+        np.testing.assert_allclose(out[0, 0, 0, 0], np.ones(()) * 2)
+        x_grad_expected = np.ones((2, 3, 4, 5))
+        x_grad_expected[0, 0, 0, 0] = 0
+        np.testing.assert_allclose(x.grad, x_grad_expected)
+        value_grad_expected = np.ones(())
+        np.testing.assert_allclose(v.grad, value_grad_expected)
+
     def test_expand(self):
         # case1
         x = paddle.full([], 1, 'float32')
-- 
GitLab