[Zero-Size]support zero-size tensor for detach/numpy/reshape (#50389)

af23efe0 · zhouweiwei2014 · GitHub · e89baf91 · af23efe0 · af23efe0
7 changed file
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -732,7 +732,7 @@ static PyObject* tensor_method_detach(TensorObject* self,
                                      PyObject* kwargs) {
  EAGER_TRY
  PADDLE_ENFORCE_EQ(
-      self->tensor.initialized(),
+      self->tensor.defined(),
      true,
      platform::errors::InvalidArgument("Tensor %s has not been initialized!",
                                        self->tensor.name()));

--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -1641,19 +1641,13 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape,
                               const phi::DDim& in_dims) {
  const int64_t in_size = phi::product(in_dims);
  auto in_dims_vec = phi::vectorize(in_dims);
-  bool all_positive = std::all_of(in_dims_vec.cbegin(),
-                                  in_dims_vec.cend(),
-                                  [](int64_t i) { return i > 0; });
-  // only one dimension can be set to -1, whose size will be automatically
-  // infered.
-  const int64_t unk_dim_val = -1;
-  const int64_t copy_dim_val = 0;
-
  std::vector<int64_t> output_shape(shape.size(), 0);
  int64_t capacity = 1;
  int unk_dim_idx = -1;
+
  for (size_t i = 0; i < shape.size(); ++i) {
-    if (shape[i] == unk_dim_val) {
+    if (shape[i] == -1) {
+      // only one dimension can be set to -1, whose size will be infered.
      PADDLE_ENFORCE_EQ(
          unk_dim_idx,
          -1,
@@ -1663,19 +1657,27 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape,
              phi::make_ddim(shape),
              i));
      unk_dim_idx = i;
-    } else if (shape[i] == copy_dim_val) {
-      PADDLE_ENFORCE_LT(
-          static_cast<int>(i),
-          in_dims.size(),
-          phi::errors::InvalidArgument(
-              "The index of 0 in `shape` must be less than "
-              "the input tensor X's dimensions. "
-              "But received shape = [%s], shape[%d] = 0, X's shape = [%s], "
-              "X's dimensions = %d.",
-              phi::make_ddim(shape),
-              i,
-              in_dims,
-              in_dims.size()));
+    } else if (shape[i] == 0) {
+      // for 0-Size Tensor, 0 is 0
+      // for not 0-Size Tensor, 0 represent copy origin shape
+      if (in_size > 0) {
+        PADDLE_ENFORCE_LT(
+            static_cast<int>(i),
+            in_dims.size(),
+            phi::errors::InvalidArgument(
+                "The index of 0 in `shape` must be less than "
+                "the input tensor X's dimensions. "
+                "But received shape = [%s], shape[%d] = 0, X's shape = [%s], "
+                "X's dimensions = %d.",
+                phi::make_ddim(shape),
+                i,
+                in_dims,
+                in_dims.size()));
+        output_shape[i] = in_dims[i];
+      } else {
+        output_shape[i] = shape[i];
+      }
+      capacity *= output_shape[i];
    } else {
      PADDLE_ENFORCE_GT(
          shape[i],
@@ -1687,24 +1689,36 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape,
              phi::make_ddim(shape),
              i,
              shape[i]));
+      output_shape[i] = shape[i];
+      capacity *= output_shape[i];
    }
+  }

-    // NOTE all non-zero values will be converted to True (include negative
-    // value)
-    capacity *= (shape[i] ? shape[i] : in_dims[i]);
-    output_shape[i] = (shape[i] ? static_cast<int64_t>(shape[i]) : in_dims[i]);
+  if (capacity == 0) {
+    PADDLE_ENFORCE_EQ(in_size,
+                      0,
+                      phi::errors::InvalidArgument(
+                          "Only Zero-Size Tensor'shape can contain 0"));
+    PADDLE_ENFORCE_EQ(unk_dim_idx,
+                      -1,
+                      phi::errors::InvalidArgument(
+                          "can not rehsape %s to %s, because the unspecified "
+                          "dimension %i can be any number and is ambiguous",
+                          in_dims,
+                          phi::make_ddim(shape),
+                          unk_dim_idx));
  }

+  bool no_negative = std::all_of(in_dims_vec.cbegin(),
+                                 in_dims_vec.cend(),
+                                 [](int64_t i) { return i >= 0; });
  if (unk_dim_idx != -1) {
-    if (all_positive) {
-      // in_size < 0 and is un-determinate in compile time, skip the check,
-      // for example, in_dims = [-1, 8, 1, 1], shape = [-1, 3, 8],
-      // capacity = -24, in_size = -8, output_shape[0] = 0
-      // the following check will fail.
-      output_shape[unk_dim_idx] = -in_size / capacity;
+    // in compile time, no_negative may be False.
+    if (no_negative) {
+      output_shape[unk_dim_idx] = in_size / capacity;
      PADDLE_ENFORCE_EQ(
          output_shape[unk_dim_idx] * capacity,
-          -in_size,
+          in_size,
          phi::errors::InvalidArgument(
              "The 'shape' attribute in ReshapeOp is invalid. "
              "The input tensor X'size must be divisible by known "
@@ -1716,10 +1730,11 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape,
              phi::make_ddim(shape),
              capacity));
    } else {
+      // such as [-1, 8, 3]->[-1, 8], out_shape will remain [-1, 8]
      output_shape[unk_dim_idx] = -1;
    }
  } else {
-    if (all_positive) {
+    if (no_negative) {
      PADDLE_ENFORCE_EQ(
          capacity,
          in_size,
@@ -1736,24 +1751,6 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape,
    }
  }

-  // support reshape with zero-input(input tensor with product(shape) == 0)
-  // by now we require that if the input tensor is zero shape, the target
-  // shape of output must be zero
-  if (in_size == 0) {
-    PADDLE_ENFORCE_LE(
-        capacity,
-        in_size,
-        phi::errors::InvalidArgument(
-            "The 'shape' in ReshapeOp is invalid. "
-            "The input tensor X's shape = [%s], X's capacity = %d."
-            "But the target shape of Out is [%s],  the "
-            "capacity of 'Out' is %d.",
-            in_dims,
-            in_size,
-            phi::make_ddim(shape),
-            capacity));
-  }
-
  return phi::make_ddim(output_shape);
 }

@@ -1765,7 +1762,7 @@ void InferMetaFromVecValue(const MetaTensor& x,
  out->set_dims(out_dims);
  out->set_dtype(x.dtype());
  out->set_layout(x.layout());
-  if (x_dims[0] == out_dims[0]) {
+  if (x_dims.size() > 0 && (x_dims[0] == out_dims[0])) {
    // Only pass LoD when the first dimension of output and Input(X)
    // are the same.
    out->share_lod(x);

--- a/paddle/phi/kernels/reshape_kernel.cc
+++ b/paddle/phi/kernels/reshape_kernel.cc
@@ -32,6 +32,10 @@ void ReshapeInferKernel(const Context& dev_ctx,
                        DenseTensor* out) {
  MetaTensor meta_out(out);
  InferMetaFromVecValue(x, shape.GetData(), &meta_out);
+  // Zero-Size Tensor
+  if (x.numel() == 0) {
+    return;
+  }
  if (x.initialized() && x.Holder() == out->Holder()) {
    dev_ctx.Alloc(out, x.dtype());
    return;

--- a/python/paddle/fluid/backward.py
+++ b/python/paddle/fluid/backward.py
@@ -385,7 +385,11 @@ def _create_op_desc_(op_type, inputs, outputs, attrs):


 def _create_loss_op_desc_(loss):
-    create_shape = [] if len(loss.shape) == 0 else [1]
+    # 0D Tensor or 0-Size Tensor
+    if len(loss.shape) == 0 or 0 in loss.shape:
+        create_shape = loss.shape
+    else:
+        create_shape = [1]
    op_desc = _create_op_desc_(
        "fill_constant",
        {},

--- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
@@ -12,6 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# Note:
+# 0D Tensor indicates that the tensor's dimension is 0
+# 0D Tensor's shape is always [], numel is 1
+# which can be created by paddle.rand([])
+
 import unittest

 import numpy as np

--- a/python/paddle/fluid/tests/unittests/test_zero_size_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_zero_size_tensor.py
+#   Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Note:
+# 0-Size Tensor indicates that the tensor's shape contains 0
+# 0-Size Tensor's shape can be [2, 0, 3], [0, 2]...etc, numel is 0
+# which can be created by paddle.rand([2, 0, 3])
+
+import unittest
+
+import paddle
+
+
+# Use to test zero-size of Sundry API, which is unique and can not be classified
+# with others. It can be implemented here flexibly.
+class TestSundryAPI(unittest.TestCase):
+    def test_detach(self):
+        x = paddle.rand([0, 2])
+        out = x.detach()
+
+        self.assertEqual(out.shape, [0, 2])
+        self.assertEqual(out.size, 0)
+
+    def test_numpy(self):
+        x = paddle.rand([0, 2])
+        out = x.numpy()
+
+        self.assertEqual(out.shape, (0, 2))
+        self.assertEqual(out.size, 0)
+
+    def test_reshape(self):
+        # case 1
+        x1 = paddle.rand([0, 2])
+        x1.stop_gradient = False
+        out1 = paddle.reshape(x1, [-1])
+
+        self.assertEqual(out1.shape, [0])
+        self.assertEqual(out1.size, 0)
+
+        # case 2
+        x2 = paddle.rand([0, 2])
+        x2.stop_gradient = False
+        out2 = paddle.reshape(x2, [2, -1])
+
+        self.assertEqual(out2.shape, [2, 0])
+        self.assertEqual(out2.size, 0)
+
+        # case 3
+        x3 = paddle.rand([0, 2])
+        x3.stop_gradient = False
+        out3 = paddle.reshape(x3, [2, 3, 0])
+
+        self.assertEqual(out3.shape, [2, 3, 0])
+        self.assertEqual(out3.size, 0)
+
+        # case 4
+        x4 = paddle.rand([0, 2])
+        x4.stop_gradient = False
+        out4 = paddle.reshape(x4, [0])
+
+        self.assertEqual(out4.shape, [0])
+        self.assertEqual(out4.size, 0)
+
+        # 5
+        x5 = paddle.rand([0])
+        with self.assertRaises(ValueError):
+            out4 = paddle.reshape(x5, [2, 0, -1])
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -3485,21 +3485,19 @@ def reshape(x, shape, name=None):
            # the value is [10.]

    """
-    actual_shape = None
-
    if in_dygraph_mode():
-        tmp_tensor_type = core.eager.Tensor
        if isinstance(shape, (list, tuple)):
-            shape = [
-                item.numpy().item(0)
-                if isinstance(item, tmp_tensor_type)
-                else item
-                for item in shape
-            ]
-            if shape == x.shape:
+            new_shape = []
+            for ele in shape:
+                if isinstance(ele, core.eager.Tensor):
+                    new_shape.append(ele.item())
+                else:
+                    new_shape.append(ele)
+
+            if new_shape == x.shape:
                out = x
            else:
-                out = _C_ops.reshape(x, shape)
+                out = _C_ops.reshape(x, new_shape)
        elif isinstance(shape, core.eager.Tensor):
            shape.stop_gradient = True
            out = _C_ops.reshape(x, shape)
@@ -3527,11 +3525,6 @@ def reshape(x, shape, name=None):
            'reshape',
        )
        check_type(shape, 'shape', (list, tuple, Variable), 'reshape')
-        check_type(
-            actual_shape, 'actual_shape', (Variable, type(None)), 'reshape'
-        )
-
-        helper = LayerHelper("reshape2", **locals())

        def get_attr_shape(list_shape):
            unk_dim_idx = -1
@@ -3579,10 +3572,8 @@ def reshape(x, shape, name=None):
            attrs["shape"] = get_attr_shape(shape)
            if utils._contain_var(shape):
                inputs['ShapeTensor'] = utils._convert_to_tensor_list(shape)
-            elif isinstance(actual_shape, Variable):
-                actual_shape.stop_gradient = True
-                inputs["Shape"] = actual_shape

+        helper = LayerHelper("reshape2", **locals())
        out = helper.create_variable_for_type_inference(dtype=x.dtype)
        x_shape = helper.create_variable_for_type_inference(dtype=x.dtype)
        helper.append_op(