[Zero-Dim] Support output 0D for argmin/argmax/median/kthvalue/mode/equal_all/allclose (#51889)

* [Zero-Dim] Support output 0D for argmin/argmax/median/kthvalue/mode/equal_all/allclose * fix CI

[Zero-Dim] Support output 0D for argmin/argmax/median/kthvalue/mode/equal_all/allclose (#51889)
* [Zero-Dim] Support output 0D for argmin/argmax/median/kthvalue/mode/equal_all/allclose * fix CI
cdefcd00 · zhouweiwei2014 · GitHub · c31ffbe8 · cdefcd00 · cdefcd00
15 changed file
--- a/paddle/phi/infermeta/binary.cc
+++ b/paddle/phi/infermeta/binary.cc
@@ -85,14 +85,7 @@ void AllValueCompareInferMeta(const MetaTensor& x,
                              MetaTensor* out,
                              MetaConfig config) {
  detail::BinarySameInputDimsCheck(x, y, config);
-
-  auto x_dims = x.dims();
-  auto y_dims = y.dims();
-  if (x_dims.size() == 0 && y_dims.size() == 0) {
-    out->set_dims(phi::make_ddim({}));
-  } else {
-    out->set_dims(phi::make_ddim({1}));
-  }
+  out->set_dims(phi::make_ddim({}));
  out->set_dtype(DataType::BOOL);
 }

@@ -403,12 +396,7 @@ void CompareAllInferMeta(const MetaTensor& x,
      errors::InvalidArgument(
          "The size of dim_y should not be greater than dim_x's."));
  out->share_lod(x);
-  if (!x.dims().size() || !y.dims().size()) {
-    out->set_dims(make_ddim({}));
-  } else {
-    out->set_dims(make_ddim({1}));
-  }
-  out->set_dtype(DataType::BOOL);
+  out->set_dims(make_ddim({}));
 }

 void ComplexInferMeta(const MetaTensor& x,

--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -148,7 +148,11 @@ void ArgMinMaxInferMeta(const MetaTensor& x,
  if (!config.is_runtime && axis.FromTensor()) {
    std::vector<int64_t> vec;
    if (flatten) {
-      vec = {1};
+      if (keepdims) {
+        vec = std::vector<int64_t>(x.dims().size(), -1);
+      } else {
+        vec = {};
+      }
    } else {
      if (keepdims) {
        vec = std::vector<int64_t>(x.dims().size(), -1);
@@ -169,7 +173,6 @@ void ArgMinMaxInferMeta(const MetaTensor& x,
  const auto& x_dims = x.dims();

  auto x_rank = x.dims().size();
-  auto zero_dim_tensor = x_rank == 0;
  if (x_rank > 0) {
    PADDLE_ENFORCE_GE(int_axis,
                      -x_rank,
@@ -200,7 +203,7 @@ void ArgMinMaxInferMeta(const MetaTensor& x,
  if (config.is_runtime) {
    if (dtype == phi::TransToProtoVarType(DataType::INT32)) {
      int64_t all_element_num = 0;
-      if (flatten || zero_dim_tensor) {
+      if (flatten) {
        all_element_num = phi::product(x_dims);
      } else {
        all_element_num = x_dims[int_axis];
@@ -218,11 +221,12 @@ void ArgMinMaxInferMeta(const MetaTensor& x,
  }

  std::vector<int64_t> vec;
-
-  if (x_rank == 0) {
-    // vec is set to empty
-  } else if (flatten) {
-    vec.emplace_back(static_cast<int64_t>(1));
+  if (flatten) {
+    if (keepdims) {
+      vec = std::vector<int64_t>(x.dims().size(), 1);
+    } else {
+      vec = {};
+    }
  } else {
    for (int64_t i = 0; i < int_axis; i++) vec.emplace_back(x_dims[i]);
    if (keepdims) {
@@ -1838,14 +1842,14 @@ void KthvalueInferMeta(const MetaTensor& x,
                       MetaConfig config) {
  auto input_dims = x.dims();
  const int& dim_size = input_dims.size();
-  PADDLE_ENFORCE_LE(axis,
-                    dim_size,
-                    phi::errors::InvalidArgument(
-                        "the axis must be [-%d, %d), but received %d .",
-                        dim_size,
-                        dim_size,
-                        axis));
  if (dim_size > 0) {
+    PADDLE_ENFORCE_LT(axis,
+                      dim_size,
+                      phi::errors::InvalidArgument(
+                          "the axis must be [-%d, %d), but received %d .",
+                          dim_size,
+                          dim_size,
+                          axis));
    PADDLE_ENFORCE_GE(axis,
                      -dim_size,
                      phi::errors::InvalidArgument(
@@ -1853,6 +1857,14 @@ void KthvalueInferMeta(const MetaTensor& x,
                          dim_size,
                          dim_size,
                          axis));
+  } else if (dim_size == 0) {
+    // 0-dim tensor
+    PADDLE_ENFORCE_EQ(axis == 0 || axis == -1,
+                      true,
+                      phi::errors::InvalidArgument(
+                          "'axis'(%d) must be 0 or -1 if input tensor is "
+                          "0-dim.",
+                          axis));
  }
  if (axis < 0) axis += dim_size;
  PADDLE_ENFORCE_GE(

--- a/paddle/phi/kernels/cpu/arg_min_max_kernel.cc
+++ b/paddle/phi/kernels/cpu/arg_min_max_kernel.cc
@@ -32,26 +32,34 @@ template <typename Context,
          ArgMinMaxType argMinMaxValue>
 struct ArgMinMaxFunctor {};

-#define DECLARE_ARG_MIN_MAX_FUNCTOR(eigen_op_type, enum_argminmax_value)  \
-  template <typename Context, typename T, typename Tout, int64_t Rank>    \
-  struct ArgMinMaxFunctor<Context, T, Tout, Rank, enum_argminmax_value> { \
-    void operator()(const Context& dev_ctx,                               \
-                    const DenseTensor& in,                                \
-                    DenseTensor* out,                                     \
-                    phi::DDim x_dims,                                     \
-                    int64_t axis,                                         \
-                    bool keepdims) {                                      \
-      auto in_eigen = EigenTensor<T, Rank>::From(in, x_dims);             \
-      if (keepdims) {                                                     \
-        auto out_eigen = EigenTensor<Tout, Rank>::From(*out);             \
-        out_eigen.device(*(dev_ctx.eigen_device())) =                     \
-            in_eigen.eigen_op_type(axis).template cast<Tout>();           \
-      } else {                                                            \
-        auto out_eigen = EigenTensor<Tout, Rank - 1>::From(*out);         \
-        out_eigen.device(*(dev_ctx.eigen_device())) =                     \
-            in_eigen.eigen_op_type(axis).template cast<Tout>();           \
-      }                                                                   \
-    }                                                                     \
+#define DECLARE_ARG_MIN_MAX_FUNCTOR(eigen_op_type, enum_argminmax_value)      \
+  template <typename Context, typename T, typename Tout, int64_t Rank>        \
+  struct ArgMinMaxFunctor<Context, T, Tout, Rank, enum_argminmax_value> {     \
+    void operator()(const Context& dev_ctx,                                   \
+                    const DenseTensor& in,                                    \
+                    DenseTensor* out,                                         \
+                    phi::DDim x_dims,                                         \
+                    phi::DDim out_dims,                                       \
+                    int64_t axis,                                             \
+                    bool keepdims,                                            \
+                    bool flatten) {                                           \
+      auto in_eigen = EigenTensor<T, Rank>::From(in, x_dims);                 \
+      if (flatten) {                                                          \
+        auto out_eigen = EigenTensor<Tout, 0>::From(*out, out_dims);          \
+        out_eigen.device(*(dev_ctx.eigen_device())) =                         \
+            in_eigen.eigen_op_type(axis).template cast<Tout>();               \
+      } else {                                                                \
+        if (keepdims) {                                                       \
+          auto out_eigen = EigenTensor<Tout, Rank>::From(*out, out_dims);     \
+          out_eigen.device(*(dev_ctx.eigen_device())) =                       \
+              in_eigen.eigen_op_type(axis).template cast<Tout>();             \
+        } else {                                                              \
+          auto out_eigen = EigenTensor<Tout, Rank - 1>::From(*out, out_dims); \
+          out_eigen.device(*(dev_ctx.eigen_device())) =                       \
+              in_eigen.eigen_op_type(axis).template cast<Tout>();             \
+        }                                                                     \
+      }                                                                       \
+    }                                                                         \
  }

 DECLARE_ARG_MIN_MAX_FUNCTOR(argmin, ArgMinMaxType::kArgMin);
@@ -81,32 +89,30 @@ struct VisitDataArgMinMaxFunctor {
  template <typename Tout>
  void apply() const {
    dev_ctx.template Alloc<Tout>(out);
-    bool new_keepdims = keepdims;
-    if (flatten) new_keepdims = true;

    // if flatten, will construct the new dims for the cacluate
    phi::DDim x_dims;
+    phi::DDim out_dims;
    int new_axis = axis;
    if (flatten) {
+      // always reduce 1D -> 0D
      x_dims = phi::make_ddim({x.numel()});
-      // if flatten, the axis just as 0
+      out_dims = phi::make_ddim({});
      new_axis = 0;
    } else {
      x_dims = x.dims();
+      out_dims = out->dims();
      if (axis < 0) new_axis = axis + x_dims.size();
    }

-    // For 0D Tensor
-    if (x.dims().size() == 0) {
-      phi::funcs::set_constant(dev_ctx, out, 0);
-      return;
-    }
-
 #define CALL_ARG_MINMAX_FUNCTOR(rank)                                         \
  ArgMinMaxFunctor<Context, T, Tout, rank, EnumArgMinMaxValue> functor##rank; \
-  functor##rank(dev_ctx, x, out, x_dims, new_axis, new_keepdims)
+  functor##rank(dev_ctx, x, out, x_dims, out_dims, new_axis, keepdims, flatten)

    switch (x_dims.size()) {
+      case 0:
+        phi::funcs::set_constant(dev_ctx, out, 0);
+        return;
      case 1:
        CALL_ARG_MINMAX_FUNCTOR(1);
        break;
@@ -195,9 +201,7 @@ PD_REGISTER_KERNEL(argmin,
                   int32_t,
                   int64_t,
                   int16_t,
-                   uint8_t) {
-  kernel->OutputAt(0).SetDataType(phi::DataType::UNDEFINED);
-}
+                   uint8_t) {}

 PD_REGISTER_KERNEL(argmax,
                   CPU,
@@ -208,6 +212,4 @@ PD_REGISTER_KERNEL(argmax,
                   int32_t,
                   int64_t,
                   int16_t,
-                   uint8_t) {
-  kernel->OutputAt(0).SetDataType(phi::DataType::UNDEFINED);
-}
+                   uint8_t) {}
--- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py
+++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py
@@ -95,8 +95,10 @@ class LearningRateDecay:
            if isinstance(value, Variable):
                assert (
                    value.size == 1
-                ), "size of Variable in state_dict must be 1"
-                value = float(value)
+                ), "the size of Variable in state_dict must be 1, but its size is {} with shape {}".format(
+                    value.size, value.shape
+                )
+                value = value.item()
            state_dict[key] = value

        return state_dict

--- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_gradient_check.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_gradient_check.py
@@ -52,7 +52,7 @@ class SimpleNet(paddle.nn.Layer):

    def forward(self, x):
        is_use = (
-            paddle.equal_all(x, paddle.ones(shape=(batch, in_dim))).numpy()[0]
+            paddle.equal_all(x, paddle.ones(shape=(batch, in_dim))).item()
            and self.trainer_id == 1
        )


--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check_in_eager_mode.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check_in_eager_mode.py
@@ -52,7 +52,7 @@ class SimpleNet(paddle.nn.Layer):

    def forward(self, x):
        is_use = (
-            paddle.equal_all(x, paddle.ones(shape=(batch, in_dim))).numpy()[0]
+            paddle.equal_all(x, paddle.ones(shape=(batch, in_dim))).item()
            and self.trainer_id == 1
        )


--- a/python/paddle/fluid/tests/unittests/test_allclose_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_allclose_layer.py
@@ -19,6 +19,8 @@ import numpy as np
 import paddle
 import paddle.fluid as fluid

+paddle.enable_static()
+

 class TestAllcloseLayer(unittest.TestCase):
    def allclose_check(self, use_cuda, dtype='float32'):
@@ -44,31 +46,31 @@ class TestAllcloseLayer(unittest.TestCase):
        result_v, result_nan_v = exe.run(
            feed={'a': x, 'b': y}, fetch_list=[result, result_nan]
        )
-        self.assertEqual(result_v[0], False)
-        self.assertEqual(result_nan_v[0], False)
+        self.assertEqual(result_v, False)
+        self.assertEqual(result_nan_v, False)

        x = np.array([10000.0, 1e-08]).astype(dtype)
        y = np.array([10000.1, 1e-09]).astype(dtype)
        result_v, result_nan_v = exe.run(
            feed={'a': x, 'b': y}, fetch_list=[result, result_nan]
        )
-        self.assertEqual(result_v[0], True)
-        self.assertEqual(result_nan_v[0], True)
+        self.assertEqual(result_v, True)
+        self.assertEqual(result_nan_v, True)

        x = np.array([1.0, float('nan')]).astype(dtype)
        y = np.array([1.0, float('nan')]).astype(dtype)
        result_v, result_nan_v = exe.run(
            feed={'a': x, 'b': y}, fetch_list=[result, result_nan]
        )
-        self.assertEqual(result_v[0], False)
-        self.assertEqual(result_nan_v[0], True)
+        self.assertEqual(result_v, False)
+        self.assertEqual(result_nan_v, True)

        # for corner case
        x = np.array([10.1, 10.1]).astype(dtype)
        y = np.array([10, 10]).astype(dtype)
        (result_c,) = exe.run(feed={'a': x, 'b': y}, fetch_list=[result_corner])
        corner_res = dtype == 'float64'
-        self.assertEqual(result_c[0], corner_res)
+        self.assertEqual(result_c, corner_res)

    def test_allclose_cpu_fp32(self):
        main = fluid.Program()
@@ -123,7 +125,7 @@ class TestAllcloseLayer(unittest.TestCase):
                equal_nan=False,
                name='test_1',
            )
-            self.assertEqual(ret_1.numpy()[0], False)
+            self.assertEqual(ret_1.numpy(), False)
            ret_1 = paddle.allclose(
                x_v_1,
                y_v_1,
@@ -132,7 +134,7 @@ class TestAllcloseLayer(unittest.TestCase):
                equal_nan=True,
                name='test_2',
            )
-            self.assertEqual(ret_1.numpy()[0], False)
+            self.assertEqual(ret_1.numpy(), False)
            x_v_2 = paddle.to_tensor(x_2)
            y_v_2 = paddle.to_tensor(y_2)
            ret_2 = paddle.allclose(
@@ -143,7 +145,7 @@ class TestAllcloseLayer(unittest.TestCase):
                equal_nan=False,
                name='test_3',
            )
-            self.assertEqual(ret_2.numpy()[0], True)
+            self.assertEqual(ret_2.numpy(), True)
            ret_2 = paddle.allclose(
                x_v_2,
                y_v_2,
@@ -152,7 +154,7 @@ class TestAllcloseLayer(unittest.TestCase):
                equal_nan=True,
                name='test_4',
            )
-            self.assertEqual(ret_2.numpy()[0], True)
+            self.assertEqual(ret_2.numpy(), True)
            x_v_3 = paddle.to_tensor(x_3)
            y_v_3 = paddle.to_tensor(y_3)
            ret_3 = paddle.allclose(
@@ -163,7 +165,7 @@ class TestAllcloseLayer(unittest.TestCase):
                equal_nan=False,
                name='test_5',
            )
-            self.assertEqual(ret_3.numpy()[0], False)
+            self.assertEqual(ret_3.numpy(), False)
            ret_3 = paddle.allclose(
                x_v_3,
                y_v_3,
@@ -172,20 +174,20 @@ class TestAllcloseLayer(unittest.TestCase):
                equal_nan=True,
                name='test_6',
            )
-            self.assertEqual(ret_3.numpy()[0], True)
+            self.assertEqual(ret_3.numpy(), True)
            # for corner case
            x_v_4 = paddle.to_tensor(x_4)
            y_v_4 = paddle.to_tensor(y_4)
            ret_4 = paddle.allclose(
                x_v_4, y_v_4, rtol=0.01, atol=0.0, name='test_7'
            )
-            self.assertEqual(ret_4.numpy()[0], False)
+            self.assertEqual(ret_4.numpy(), False)
            x_v_5 = paddle.to_tensor(x_5)
            y_v_5 = paddle.to_tensor(y_5)
            ret_5 = paddle.allclose(
                x_v_5, y_v_5, rtol=0.015, atol=0.0, name='test_8'
            )
-            self.assertEqual(ret_5.numpy()[0], True)
+            self.assertEqual(ret_5.numpy(), True)


 if __name__ == "__main__":

--- a/python/paddle/fluid/tests/unittests/test_median.py
+++ b/python/paddle/fluid/tests/unittests/test_median.py
@@ -32,8 +32,6 @@ class TestMedian(unittest.TestCase):
        paddle.enable_static()
        x, axis, keepdims = lis_test
        res_np = np.median(x, axis=axis, keepdims=keepdims)
-        if not isinstance(res_np, np.ndarray):
-            res_np = np.array([res_np])
        main_program = Program()
        startup_program = Program()
        exe = paddle.static.Executor()
@@ -47,10 +45,8 @@ class TestMedian(unittest.TestCase):
    def dygraph_single_test_median(self, lis_test):
        x, axis, keepdims = lis_test
        res_np = np.median(x, axis=axis, keepdims=keepdims)
-        if not isinstance(res_np, np.ndarray):
-            res_np = np.array([res_np])
        res_pd = paddle.median(paddle.to_tensor(x), axis, keepdims)
-        self.check_numpy_res(res_pd.numpy(), res_np)
+        self.check_numpy_res(res_pd.numpy(False), res_np)

    def test_median_static(self):
        h = 3

--- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
@@ -784,35 +784,78 @@ class TestSundryAPI(unittest.TestCase):
        out = paddle.broadcast_shape(x, y)
        self.assertEqual(out, [])

+        self.assertEqual(out, [])
+
    def test_argmin(self):
+        # 1) x is 0D
        x = paddle.rand([])
        out1 = paddle.argmin(x, 0)
        out2 = paddle.argmin(x, -1)
        out3 = paddle.argmin(x, None)
+
        self.assertEqual(out1.shape, [])
-        np.testing.assert_allclose(out1, 0.0)
+        np.testing.assert_allclose(out1, 0)

        self.assertEqual(out2.shape, [])
-        np.testing.assert_allclose(out2, 0.0)
+        np.testing.assert_allclose(out2, 0)

        self.assertEqual(out3.shape, [])
-        np.testing.assert_allclose(out3, 0.0)
+        np.testing.assert_allclose(out3, 0)
+
+        # 2) x is 1D
+        x = paddle.rand([5])
+        x.stop_gradient = False
+        out = paddle.argmin(x, 0)
+        out.backward()
+        self.assertEqual(out.shape, [])
+
+        # 3) x is ND
+        x = paddle.rand([3, 5])
+        x.stop_gradient = False
+        out = paddle.argmin(x)
+        out.backward()
+        self.assertEqual(out.shape, [])
+
+        # 4) x is ND, keepdim=True
+        x = paddle.rand([3, 5])
+        x.stop_gradient = False
+        out = paddle.argmin(x, keepdim=True)
+        out.backward()
+        self.assertEqual(out.shape, [1, 1])

    def test_argmax(self):
+        # 1) x is 0D
        x = paddle.rand([])
        out1 = paddle.argmax(x, 0)
        out2 = paddle.argmax(x, -1)
        out3 = paddle.argmax(x, None)
+
        self.assertEqual(out1.shape, [])
-        np.testing.assert_allclose(out1, 0.0)
+        np.testing.assert_allclose(out1, 0)

        self.assertEqual(out2.shape, [])
-        np.testing.assert_allclose(out2, 0.0)
+        np.testing.assert_allclose(out2, 0)

        self.assertEqual(out3.shape, [])
-        np.testing.assert_allclose(out3, 0.0)
+        np.testing.assert_allclose(out3, 0)
+
+        # 2) x is 1D
+        x = paddle.rand([5])
+        out = paddle.argmax(x, 0)
+        self.assertEqual(out.shape, [])
+
+        # 3) x is ND
+        x = paddle.rand([3, 5])
+        out = paddle.argmax(x)
+        self.assertEqual(out.shape, [])
+
+        # 4) x is ND, keepdim=True
+        x = paddle.rand([3, 5])
+        out = paddle.argmax(x, keepdim=True)
+        self.assertEqual(out.shape, [1, 1])

    def test_median(self):
+        # 1) x is 0D
        x = paddle.rand([])
        x.stop_gradient = False
        out1 = paddle.median(x, 0)
@@ -835,6 +878,81 @@ class TestSundryAPI(unittest.TestCase):
        self.assertEqual(x.grad.shape, [])
        np.testing.assert_allclose(x.grad, 3.0)

+        # 2) x is 1D
+        x = paddle.rand([5])
+        x.stop_gradient = False
+        out = paddle.median(x, 0)
+        out.backward()
+        self.assertEqual(out.shape, [])
+        self.assertEqual(x.grad.shape, [5])
+
+        # 3) x is ND
+        x = paddle.rand([3, 5])
+        x.stop_gradient = False
+        out = paddle.median(x, None)
+        out.backward()
+        self.assertEqual(out.shape, [])
+        self.assertEqual(x.grad.shape, [3, 5])
+
+        # 4) x is ND, keepdim=True
+        x = paddle.rand([3, 5])
+        x.stop_gradient = False
+        out = paddle.median(x, keepdim=True)
+        out.backward()
+        self.assertEqual(out.shape, [1, 1])
+        self.assertEqual(x.grad.shape, [3, 5])
+
+    def test_kthvalue(self):
+        # 1) x is 0D
+        x = paddle.randn([])
+        x.stop_gradient = False
+        out, index = paddle.kthvalue(x, 1)
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out, x)
+        self.assertEqual(index.shape, [])
+        self.assertEqual(index, 0)
+
+        self.assertEqual(x.grad.shape, [])
+        self.assertEqual(x.grad, 1.0)
+
+        # 2) x is 1D
+        x1 = paddle.randn([5])
+        x1.stop_gradient = False
+        out1, index1 = paddle.kthvalue(x1, 1)
+        out1.backward()
+
+        self.assertEqual(out1.shape, [])
+        self.assertEqual(index1.shape, [])
+        self.assertEqual(x1.grad.shape, [5])
+
+    def test_mode(self):
+        # 1) x is 0D
+        x = paddle.randn([])
+        x.stop_gradient = False
+        out, index = paddle.mode(x)
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out, x)
+        self.assertEqual(index.shape, [])
+        self.assertEqual(index, 0)
+
+        self.assertEqual(x.grad.shape, [])
+        self.assertEqual(x.grad, 1.0)
+
+        # 2) x is 1D
+        x1 = paddle.randn([5])
+        x1.stop_gradient = False
+        out1, index1 = paddle.mode(x1)
+        out1.backward()
+
+        self.assertEqual(out1.shape, [])
+        self.assertEqual(index1.shape, [])
+
+        self.assertEqual(x1.grad.shape, [5])
+
    def test_std(self):
        x = paddle.rand([])
        x.stop_gradient = False
@@ -1279,48 +1397,6 @@ class TestSundryAPI(unittest.TestCase):
        self.assertEqual(out.grad.shape, [5])
        self.assertEqual(updates.grad.shape, [])

-    def test_kthvalue(self):
-        places = ['cpu']
-        if paddle.is_compiled_with_cuda():
-            places.append('gpu')
-        for place in places:
-            paddle.set_device(place)
-
-            x = paddle.randn(())
-            x.stop_gradient = False
-
-            out = paddle.kthvalue(x, 1)
-            out[0].backward()
-
-            # check shape of output value and indice
-            self.assertEqual(out[0].shape, [])
-            self.assertEqual(out[1].shape, [])
-
-            # check grad shape and value
-            self.assertEqual(x.grad.shape, [])
-            self.assertTrue(x.grad.numpy() == 1.0)
-
-    def test_mode(self):
-        places = ['cpu']
-        if paddle.is_compiled_with_cuda():
-            places.append('gpu')
-        for place in places:
-            paddle.set_device(place)
-
-            x = paddle.randn(())
-            x.stop_gradient = False
-
-            out = paddle.mode(x)
-            out[0].backward()
-
-            # check shape of output value and indice
-            self.assertEqual(out[0].shape, [])
-            self.assertEqual(out[1].shape, [])
-
-            # check grad shape and value
-            self.assertEqual(x.grad.shape, [])
-            self.assertTrue(x.grad.numpy() == 1)
-
    def test_flatten(self):
        x = paddle.rand([])
        x.stop_gradient = False
@@ -1712,17 +1788,35 @@ class TestSundryAPI(unittest.TestCase):
        self.assertEqual(logit.grad.shape, [2, 3])

    def test_allclose(self):
+        # 1) x is 0D
        x = paddle.full([], 0.5)
        y = paddle.full([], 0.6)
-        self.assertFalse(paddle.allclose(x, y))
+        out = paddle.allclose(x, y)
+        self.assertEqual(out.shape, [])
+        self.assertFalse(out)

-    def test_equalall(self):
+        # 2) x is ND
+        x = paddle.full([2, 3], 0.5)
+        y = paddle.full([2, 3], 0.6)
+        out = paddle.allclose(x, y)
+        self.assertEqual(out.shape, [])
+        self.assertFalse(out)
+
+    def test_equal_all(self):
+        # 1) x is 0D
        x = paddle.full([], 0.5)
        y = paddle.full([], 0.6)
        out = paddle.equal_all(x, y)
        self.assertEqual(out.shape, [])
        self.assertFalse(out)

+        # 2) x is ND
+        x = paddle.full([2, 3], 0.5)
+        y = paddle.full([2, 3], 0.6)
+        out = paddle.equal_all(x, y)
+        self.assertEqual(out.shape, [])
+        self.assertFalse(out)
+
    def test_where(self):
        x1 = paddle.full([], 1)
        x2 = paddle.full([], 2)
@@ -2155,11 +2249,16 @@ class TestSundryAPIStatic(unittest.TestCase):
        self.assertEqual(res[3], 1.0)

    def test_argmin(self):
+        # 1) x is 0D
        x = paddle.rand([])
        out1 = paddle.argmin(x, 0)
        out2 = paddle.argmin(x, -1)
        out3 = paddle.argmin(x, None)

+        # 2) x is ND
+        x4 = paddle.rand([3, 5])
+        out4 = paddle.argmin(x, None)
+
        prog = paddle.static.default_main_program()
        res = self.exe.run(
            prog,
@@ -2167,6 +2266,7 @@ class TestSundryAPIStatic(unittest.TestCase):
                out1,
                out2,
                out3,
+                out4,
            ],
        )
        self.assertEqual(res[0].shape, ())
@@ -2175,14 +2275,20 @@ class TestSundryAPIStatic(unittest.TestCase):
        np.testing.assert_allclose(res[1], 0.0)
        self.assertEqual(res[2].shape, ())
        np.testing.assert_allclose(res[2], 0.0)
+        self.assertEqual(res[3].shape, ())

    @prog_scope()
    def test_argmax(self):
+        # 1) x is 0D
        x = paddle.rand([])
        out1 = paddle.argmax(x, 0)
        out2 = paddle.argmax(x, -1)
        out3 = paddle.argmax(x, None)

+        # 2) x is ND
+        x4 = paddle.rand([3, 5])
+        out4 = paddle.argmax(x, None)
+
        prog = paddle.static.default_main_program()
        res = self.exe.run(
            prog,
@@ -2190,6 +2296,7 @@ class TestSundryAPIStatic(unittest.TestCase):
                out1,
                out2,
                out3,
+                out4,
            ],
        )
        self.assertEqual(res[0].shape, ())
@@ -2198,14 +2305,22 @@ class TestSundryAPIStatic(unittest.TestCase):
        np.testing.assert_allclose(res[1], 0.0)
        self.assertEqual(res[2].shape, ())
        np.testing.assert_allclose(res[2], 0.0)
+        self.assertEqual(res[3].shape, ())

    @prog_scope()
    def test_median(self):
+        # 1) x is 0D
        x = paddle.rand([])
        x.stop_gradient = False
        out = paddle.median(x)
+        paddle.static.append_backward(out)
+
+        # 2) x is ND
+        x1 = paddle.rand([3, 5])
+        x1.stop_gradient = False
+        out1 = paddle.median(x1)
+        paddle.static.append_backward(out1)

-        paddle.static.append_backward(out.sum())
        prog = paddle.static.default_main_program()
        res = self.exe.run(
            prog,
@@ -2213,6 +2328,8 @@ class TestSundryAPIStatic(unittest.TestCase):
                x,
                out,
                x.grad_name,
+                out1,
+                x1.grad_name,
            ],
        )
        self.assertEqual(res[1].shape, ())
@@ -2221,6 +2338,67 @@ class TestSundryAPIStatic(unittest.TestCase):
        self.assertEqual(res[2].shape, ())
        np.testing.assert_allclose(res[2], 1.0)

+        self.assertEqual(res[3].shape, ())
+        self.assertEqual(res[4].shape, (3, 5))
+
+    @prog_scope()
+    def test_kthvalue(self):
+        # 1) x is 0D
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out, index = paddle.kthvalue(x, 1)
+        paddle.static.append_backward(out)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[x, out, index, x.grad_name])
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, ())
+        self.assertTrue(res[1] == res[0])
+        self.assertEqual(res[2].shape, ())
+        self.assertTrue(res[2] == 0)
+
+        self.assertEqual(res[3].shape, ())
+        self.assertTrue(res[3] == 1.0)
+
+        # 2) x is 1D
+        x1 = paddle.rand([5])
+        x1.stop_gradient = False
+        out1, index1 = paddle.kthvalue(x1, 1)
+        paddle.static.append_backward(out1)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out1, index1, x1.grad_name])
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, ())
+        self.assertEqual(res[2].shape, (5,))
+
+    @prog_scope()
+    def test_mode(self):
+        # 1) x is 0D
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out, index = paddle.mode(x)
+        paddle.static.append_backward(out)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out, index, x.grad_name])
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, ())
+        self.assertEqual(res[2].shape, ())
+        self.assertTrue(res[2] == 1.0)
+
+        # 2) x is 1D
+        x1 = paddle.rand([5])
+        x1.stop_gradient = False
+        out1, index1 = paddle.mode(x1)
+        paddle.static.append_backward(out1)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out1, index1, x1.grad_name])
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, ())
+        self.assertEqual(res[2].shape, (5,))
+
    @prog_scope()
    def test_std(self):
        x = paddle.rand([])
@@ -2314,7 +2492,7 @@ class TestSundryAPIStatic(unittest.TestCase):
        x = paddle.rand([])
        x.stop_gradient = False
        out = paddle.flip(x, axis=[])
-        paddle.static.append_backward(out.sum())
+        paddle.static.append_backward(out)

        prog = paddle.static.default_main_program()
        res = self.exe.run(
@@ -2340,7 +2518,7 @@ class TestSundryAPIStatic(unittest.TestCase):
        x = paddle.rand([])
        x.stop_gradient = False
        out = paddle.pow(x, 2.0)
-        paddle.static.append_backward(out.sum())
+        paddle.static.append_backward(out)

        prog = paddle.static.default_main_program()
        res = self.exe.run(
@@ -2356,7 +2534,7 @@ class TestSundryAPIStatic(unittest.TestCase):
        x = paddle.full([], 1.0, 'float32')
        x.stop_gradient = False
        out = paddle.cast(x, 'int32')
-        paddle.static.append_backward(out.sum())
+        paddle.static.append_backward(out)

        prog = paddle.static.default_main_program()
        res = self.exe.run(
@@ -2372,7 +2550,7 @@ class TestSundryAPIStatic(unittest.TestCase):
        x = paddle.full([], 1.0, 'float32')
        x.stop_gradient = False
        out = paddle.cumprod(x, 0)
-        paddle.static.append_backward(out.sum())
+        paddle.static.append_backward(out)

        prog = paddle.static.default_main_program()
        res = self.exe.run(prog, fetch_list=[out, x.grad_name, out.grad_name])
@@ -2388,7 +2566,7 @@ class TestSundryAPIStatic(unittest.TestCase):
        x = paddle.uniform([], None, -10, 10)
        x.stop_gradient = False
        out = paddle.clip(x, -5, 5)
-        paddle.static.append_backward(out.sum())
+        paddle.static.append_backward(out)

        prog = paddle.static.default_main_program()
        res = self.exe.run(
@@ -2404,7 +2582,7 @@ class TestSundryAPIStatic(unittest.TestCase):
        x = paddle.rand([])
        x.stop_gradient = False
        out = paddle.increment(x, 1.0)
-        paddle.static.append_backward(out.sum())
+        paddle.static.append_backward(out)

        prog = paddle.static.default_main_program()
        res = self.exe.run(
@@ -2454,7 +2632,7 @@ class TestSundryAPIStatic(unittest.TestCase):
        x = paddle.rand([])
        x.stop_gradient = False
        out = paddle.transpose(x, [])
-        paddle.static.append_backward(out.sum())
+        paddle.static.append_backward(out)

        prog = paddle.static.default_main_program()
        res = self.exe.run(prog, fetch_list=[out, x.grad_name, out.grad_name])
@@ -2472,7 +2650,7 @@ class TestSundryAPIStatic(unittest.TestCase):
        x = paddle.rand([])
        x.stop_gradient = False
        out = paddle.moveaxis(x, [], [])
-        paddle.static.append_backward(out.sum())
+        paddle.static.append_backward(out)

        prog = paddle.static.default_main_program()
        res = self.exe.run(prog, fetch_list=[out, x.grad_name, out.grad_name])
@@ -2619,34 +2797,6 @@ class TestSundryAPIStatic(unittest.TestCase):
        self.assertEqual(res[1].shape, (5,))
        self.assertEqual(res[2].shape, ())

-    @prog_scope()
-    def test_kthvalue(self):
-        x = paddle.full([], 1, 'float32')
-        x.stop_gradient = False
-        out, index = paddle.kthvalue(x, 1)
-        paddle.static.append_backward(out.sum())
-
-        prog = paddle.static.default_main_program()
-        res = self.exe.run(prog, fetch_list=[out, index, x.grad_name])
-        self.assertEqual(res[0].shape, ())
-        self.assertEqual(res[1].shape, ())
-        self.assertEqual(res[2].shape, ())
-        self.assertTrue(res[2] == 1.0)
-
-    @prog_scope()
-    def test_mode(self):
-        x = paddle.full([], 1, 'float32')
-        x.stop_gradient = False
-        out, index = paddle.mode(x)
-        paddle.static.append_backward(out.sum())
-
-        prog = paddle.static.default_main_program()
-        res = self.exe.run(prog, fetch_list=[out, index, x.grad_name])
-        self.assertEqual(res[0].shape, ())
-        self.assertEqual(res[1].shape, ())
-        self.assertEqual(res[2].shape, ())
-        self.assertTrue(res[2] == 1.0)
-
    @prog_scope()
    def test_flatten(self):
        x = paddle.full([], 1, 'float32')
@@ -2682,7 +2832,7 @@ class TestSundryAPIStatic(unittest.TestCase):
        x = paddle.rand([])
        x.stop_gradient = False
        out = paddle.scale(x, scale=2.0, bias=1.0)
-        paddle.static.append_backward(out.sum())
+        paddle.static.append_backward(out)

        prog = paddle.static.default_main_program()
        res = self.exe.run(prog, fetch_list=[out, x.grad_name, out.grad_name])
@@ -2908,7 +3058,7 @@ class TestSundryAPIStatic(unittest.TestCase):
        x.stop_gradient = False

        out = paddle.reverse(x, axis=[])
-        paddle.static.append_backward(out.sum())
+        paddle.static.append_backward(out)

        prog = paddle.static.default_main_program()
        res = self.exe.run(
@@ -3033,6 +3183,50 @@ class TestSundryAPIStatic(unittest.TestCase):
        self.assertEqual(res[4].shape, (2,))
        self.assertEqual(res[5].shape, (3,))

+    @prog_scope()
+    def test_allclose(self):
+        # 1) x is 0D
+        x = paddle.full([], 0.5)
+        y = paddle.full([], 0.6)
+        out = paddle.allclose(x, y)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out])
+        self.assertEqual(res[0].shape, ())
+        self.assertFalse(res[0])
+
+        # 2) x is ND
+        x = paddle.full([2, 3], 0.5)
+        y = paddle.full([2, 3], 0.6)
+        out = paddle.allclose(x, y)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out])
+        self.assertEqual(res[0].shape, ())
+        self.assertFalse(res[0])
+
+    @prog_scope()
+    def test_equal_all(self):
+        # 1) x is 0D
+        x = paddle.full([], 0.5)
+        y = paddle.full([], 0.6)
+        out = paddle.equal_all(x, y)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out])
+        self.assertEqual(res[0].shape, ())
+        self.assertFalse(res[0])
+
+        # 2) x is ND
+        x = paddle.full([2, 3], 0.5)
+        y = paddle.full([2, 3], 0.6)
+        out = paddle.equal_all(x, y)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out])
+        self.assertEqual(res[0].shape, ())
+        self.assertFalse(res[0])
+
    @prog_scope()
    def test_where(self):
        x1 = paddle.full([], 1, 'float32')
@@ -3178,7 +3372,7 @@ class TestSundryAPIStatic(unittest.TestCase):
        x = paddle.full([], 2.0)
        x.stop_gradient = False
        out = paddle.t(x)
-        paddle.static.append_backward(out.sum())
+        paddle.static.append_backward(out)
        prog = paddle.static.default_main_program()
        res = self.exe.run(
            prog, feed={}, fetch_list=[out, out.grad_name, x.grad_name]

--- a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py
+++ b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py
@@ -52,7 +52,7 @@ class SimpleNet(paddle.nn.Layer):

    def forward(self, x):
        is_use = (
-            paddle.equal_all(x, paddle.ones(shape=(batch, in_dim))).numpy()[0]
+            paddle.equal_all(x, paddle.ones(shape=(batch, in_dim))).item()
            and self.trainer_id == 1
        )


--- a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
+++ b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
@@ -52,7 +52,7 @@ class SimpleNet(paddle.nn.Layer):

    def forward(self, x):
        is_use = (
-            paddle.equal_all(x, paddle.ones(shape=(batch, in_dim))).numpy()[0]
+            paddle.equal_all(x, paddle.ones(shape=(batch, in_dim))).item()
            and self.trainer_id == 1
        )


--- a/python/paddle/hapi/model.py
+++ b/python/paddle/hapi/model.py
@@ -2324,9 +2324,9 @@ class Model:
                outs = getattr(self, mode + '_batch')(*_inputs)

                if self._metrics and self._loss:
-                    metrics = [[l[0] for l in outs[0]]]
+                    metrics = [[float(l) for l in outs[0]]]
                elif self._loss:
-                    metrics = [[l[0] for l in outs]]
+                    metrics = [[float(l) for l in outs]]
                else:
                    metrics = []


--- a/python/paddle/hapi/progressbar.py
+++ b/python/paddle/hapi/progressbar.py
@@ -201,10 +201,10 @@ class ProgressBar:
                        and v.size == 1
                        and v.dtype in [np.float32, np.float64]
                    ):
-                        if abs(v[0]) > 1e-3:
-                            info += ' %.4f' % v[0]
+                        if abs(v.item()) > 1e-3:
+                            info += ' %.4f' % v.item()
                        else:
-                            info += ' %.4e' % v[0]
+                            info += ' %.4e' % v.item()
                    else:
                        info += ' %s' % v


--- a/python/paddle/static/quantization/tests/imperative_test_utils.py
+++ b/python/paddle/static/quantization/tests/imperative_test_utils.py
@@ -90,7 +90,7 @@ def train_lenet(lenet, reader, optimizer):

        if batch_id % 100 == 0:
            loss_list.append(float(avg_loss))
-            _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
+            _logger.info('{}: {}'.format('loss', float(avg_loss)))

    return loss_list


--- a/python/paddle/tensor/stat.py
+++ b/python/paddle/tensor/stat.py
@@ -412,11 +412,19 @@ def median(x, axis=None, keepdim=False, name=None):
    if x.size == 0:
        raise ValueError("In median, the size of input x should not be 0.")

-    if len(x.shape) == 0:
-        return x.clone()
-
-    is_flatten = axis is None
+    is_flatten = False
    dims = len(x.shape)
+    if dims == 0:
+        assert axis in [
+            -1,
+            0,
+            None,
+        ], 'when input 0D, axis can only be [-1, 0] or default None'
+        is_flatten = True
+
+    if axis is None:
+        is_flatten = True
+
    if is_flatten:
        x = paddle.flatten(x)
        axis = 0
@@ -446,16 +454,14 @@ def median(x, axis=None, keepdim=False, name=None):
    out_tensor = out_tensor + paddle.sum(
        paddle.cast(paddle.isnan(x), dtype=dtype) * x, axis=axis, keepdim=True
    )
-    if not keepdim or is_flatten:
-        if not is_flatten:
-            newshape = x.shape[:axis] + x.shape[axis + 1 :]
-        elif not keepdim:
-            newshape = [1]
+    if is_flatten:
+        if keepdim:
+            out_tensor = out_tensor.reshape([1] * dims)
        else:
-            newshape = [1] * dims
+            out_tensor = out_tensor.reshape([])
    else:
-        newshape = out_tensor.shape
-    out_tensor = out_tensor.reshape(newshape, name=name)
+        if not keepdim:
+            out_tensor = out_tensor.squeeze(axis)
    return out_tensor