From 767e7b3f0b7a972f21da987d2405b3ff685afb23 Mon Sep 17 00:00:00 2001 From: JYChen Date: Tue, 9 May 2023 10:40:37 +0800 Subject: [PATCH] [Cherry-pick] zero-dim: support 0-D for getitem/setitem (#53441) * support 0-D output and 0-D as indice in __getitem__ * fix tests * fix inference and UT * add unittest for setitem * fix xpu test * fix xpu 0-d * fix right value is 0d and index is List/Tensor * Hack__getitem__ from 0-d to 1-d with FLAGS_set_to_1d * change PHI_DECLARE_xxx to DECLARE_xxx since the change not merged to 2.5 * hack 1-D tensor to Scalar * throw warning at __getitem__, not slice_utils --- paddle/fluid/framework/attribute_checker.h | 23 +- paddle/fluid/pybind/eager_method.cc | 72 ++--- paddle/fluid/pybind/imperative.cc | 86 +++--- paddle/phi/infermeta/unary.cc | 3 - paddle/phi/kernels/funcs/slice_utils.h | 13 +- .../phi/kernels/xpu/set_value_grad_kernel.cc | 5 + .../auto_parallel/operators/dist_slice.py | 20 +- python/paddle/fft.py | 4 +- .../unittests/test_imperative_numpy_bridge.py | 2 +- .../fluid/tests/unittests/test_kthvalue_op.py | 8 +- .../tests/unittests/test_set_value_op.py | 2 +- .../fluid/tests/unittests/test_slice_op.py | 4 +- .../fluid/tests/unittests/test_var_base.py | 11 +- .../fluid/tests/unittests/test_variable.py | 3 +- .../fluid/tests/unittests/test_while_op.py | 6 +- .../tests/unittests/test_zero_dim_tensor.py | 246 ++++++++++++++++++ python/paddle/fluid/variable_index.py | 37 +-- .../incubate/optimizer/functional/lbfgs.py | 8 +- .../jit/dy2static/variable_trans_func.py | 6 +- python/paddle/nn/layer/rnn.py | 2 +- python/paddle/tensor/manipulation.py | 18 +- test/dygraph_to_static/test_list.py | 2 +- test/xpu/test_set_value_op_xpu.py | 2 +- test/xpu/test_slice_op_xpu.py | 4 +- test/xpu/test_zero_dim_tensor_xpu.py | 134 ++++++++++ 25 files changed, 573 insertions(+), 148 deletions(-) diff --git a/paddle/fluid/framework/attribute_checker.h b/paddle/fluid/framework/attribute_checker.h index 67eb69efdf3..2e5e7bf8939 100644 --- a/paddle/fluid/framework/attribute_checker.h +++ b/paddle/fluid/framework/attribute_checker.h @@ -73,10 +73,10 @@ class TypedAttrVarInfoChecker { platform::errors::InvalidArgument( "Required Attribute with Variable type shall not be nullptr.")); auto shape = var_desc->GetShape(); - PADDLE_ENFORCE_EQ(shape.size(), + PADDLE_ENFORCE_LE(shape.size(), 1U, platform::errors::InvalidArgument( - "Required shape rank of Attribute(%s) == 1, " + "Required shape rank of Attribute(%s) <= 1, " "but received rank == %s", var_desc->Name(), shape.size())); @@ -105,20 +105,21 @@ class TypedAttrVarInfoChecker { platform::errors::InvalidArgument( "Required Attribute with Variable type shall not be nullptr.")); auto shape = var_desc->GetShape(); - PADDLE_ENFORCE_EQ(shape.size(), + PADDLE_ENFORCE_LE(shape.size(), 1U, platform::errors::InvalidArgument( - "Required shape rank of Attribute(%s) == 1, " + "Required shape rank of Attribute(%s) <= 1, " "but received rank == %s", var_desc->Name(), shape.size())); - PADDLE_ENFORCE_EQ(shape[0] == 1U || shape[0] == -1, - true, - platform::errors::InvalidArgument( - "Required shape[0] of Attribute(%s) == 1 or -1, " - "but received shape[0] == %s", - var_desc->Name(), - shape[0])); + PADDLE_ENFORCE_EQ( + shape.size() == 0U || shape[0] == 1U || shape[0] == -1, + true, + platform::errors::InvalidArgument( + "Required shape is (), or shape[0] of Attribute(%s) == 1 or -1, " + "but received shape[0] == %s", + var_desc->Name(), + shape[0])); } } }; diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 2455eed34fe..0092bccba70 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -135,17 +135,18 @@ static PyObject* tensor_method_numpy(TensorObject* self, } } if (set_to_1d) { - // 0D Tensor hack process to 1D numpy, will remove in future + // 0D Tensor hack process to 1D numpy, will remove in release 2.6 VLOG(0) << "Warning:: 0D Tensor cannot be used as 'Tensor.numpy()[0]' . In " "order to avoid this problem, " "0D Tensor will be changed to 1D numpy currently, but it's not " "correct and will be " - "removed in future. For Tensor contain only one element, Please " + "removed in release 2.6. For Tensor contain only one element, " + "Please " "modify " " 'Tensor.numpy()[0]' to 'float(Tensor)' as soon as " "possible, " - "otherwise 'Tensor.numpy()[0]' will raise error in future."; + "otherwise 'Tensor.numpy()[0]' will raise error in release 2.6."; py_rank = 1; py_dims[0] = 1; py_strides[0] = sizeof_dtype * numel; @@ -922,39 +923,50 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self, } } - if (!none_axes.empty()) { - // Deal with cases when all axes are decreased. - // After slice, the shape of out is [1], which should have been - // [], but Paddle doesn't support scalar. - // In order to ensure the correctness of the final shape of out, - // one dimension of out needs to be decreased. - // For example: - // # x.shape: (2,3,4) - // out = x[0, 1, 1, None] # out.shape : (1) + bool set_to_1d = FLAGS_set_to_1d; + + if (set_to_1d) { + // NOTE(zoooo0820): When all axes are decreased, the output will be 1-D + // with FLAGS_set_to_1d=True. In this case, one `None` should be pop out, + // otherwise the output shape will be not correct. if (static_cast(decrease_axis.size()) == tensor->dims().size()) { - none_axes.pop_back(); + VLOG(0) + << "Warning: In Tensor '__getitem__', if the number of scalar " + "elements " + "in the index is equal to the rank of the Tensor, the output " + "should " + "be 0-D. In order to be consistent with the behavior of previous " + "versions, it will be processed to 1-D. But it is not correct and " + "will be " + "removed in release 2.6. " + "If 1-D is still wanted, please modify the index element from " + "scalar to slice " + "(e.g. 'x[i]' => 'x[i:i+1]'). "; + if (!none_axes.empty()) { + none_axes.pop_back(); + } } - if (!none_axes.empty()) { - paddle::Tensor new_out; - { - eager_gil_scoped_release guard; - // Deal with cases that decrease_axes is not empty - // For example: - // # x.shape: (2,3,4) - // out = x[0, 0:2, None] # out.shape : (2, 1, 4) - for (auto& axis : none_axes) { - int len = 0; - for (int da : decrease_axis) { - if (da < axis) { - len++; - } + } + if (!none_axes.empty()) { + paddle::Tensor new_out; + { + eager_gil_scoped_release guard; + // Deal with cases that decrease_axes is not empty + // For example: + // # x.shape: (2,3,4) + // out = x[0, 0:2, None] # out.shape : (2, 1, 4) + for (auto& axis : none_axes) { + int len = 0; + for (int da : decrease_axis) { + if (da < axis) { + len++; } - axis -= len; } - new_out = unsqueeze_ad_func(out, none_axes); + axis -= len; } - return ToPyObject(new_out); + new_out = unsqueeze_ad_func(out, none_axes); } + return ToPyObject(new_out); } // the index is a list diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 1be8371ad4f..1440918eb48 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -63,6 +63,7 @@ limitations under the License. */ #include "paddle/phi/core/compat/arg_map_context.h" #include "paddle/phi/core/type_defs.h" +DECLARE_bool(set_to_1d); namespace paddle { namespace pybind { @@ -1067,46 +1068,63 @@ void BindImperative(py::module *m_ptr) { } tracer->TraceOp(op_type, ins, outs, std::move(attrs)); } - if (!none_axes.empty()) { - // Deal with cases when all axes are decreased. - // After slice, the shape of out is [1], which should have been - // [], but Paddle doesn't support scalar. - // In order to ensure the correctness of the final shape of out, - // one dimension of out needs to be decreased. - // For example: - // # x.shape: (2,3,4) - // out = x[0, 1, 1, None] # out.shape : (1) + + bool set_to_1d = FLAGS_set_to_1d; + + if (set_to_1d) { + // NOTE(zoooo0820): When all axes are decreased, the output + // will be 1-D with FLAGS_set_to_1d=True. In this case, one + // `None` should be pop out, otherwise the output shape will be + // not correct. if (static_cast(decrease_axis.size()) == tensor->dims().size()) { - none_axes.pop_back(); + VLOG(0) << "Warning: In Tensor '__getitem__', if the number " + "of scalar " + "elements " + "in the index is equal to the rank of the Tensor, " + "the output " + "should " + "be 0-D. In order to be consistent with the " + "behavior of previous " + "versions, it will be processed to 1-D. But it is " + "not correct and " + "will be " + "removed in release 2.6. " + "If 1-D is still wanted, please modify the index " + "element from " + "scalar to slice " + "(e.g. 'x[i]' => 'x[i:i+1]'). "; + if (!none_axes.empty()) { + none_axes.pop_back(); + } } - if (!none_axes.empty()) { - // Deal with cases that decrease_axes is not empty - // For example: - // # x.shape: (2,3,4) - // out = x[0, 0:2, None] # out.shape : (2, 1, 4) - for (auto &axis : none_axes) { - int len = 0; - for (int da : decrease_axis) { - if (da < axis) { - len++; - } + } + if (!none_axes.empty()) { + // Deal with cases that decrease_axes is not empty + // For example: + // # x.shape: (2,3,4) + // out = x[0, 0:2, None] # out.shape : (2, 1, 4) + for (auto &axis : none_axes) { + int len = 0; + for (int da : decrease_axis) { + if (da < axis) { + len++; } - axis -= len; } - - imperative::NameVarBaseMap ins = {{"X", {out}}}; - framework::AttributeMap attrs = {{"axes", none_axes}}; - auto new_out = std::shared_ptr( - new imperative::VarBase(tracer->GenerateUniqueName())); - auto out_xshape = std::shared_ptr( - new imperative::VarBase(tracer->GenerateUniqueName())); - imperative::NameVarBaseMap outs = {{"Out", {new_out}}, - {"XShape", {out_xshape}}}; - tracer->TraceOp("unsqueeze2", ins, outs, std::move(attrs)); - - return new_out; + axis -= len; } + + imperative::NameVarBaseMap ins = {{"X", {out}}}; + framework::AttributeMap attrs = {{"axes", none_axes}}; + auto new_out = std::shared_ptr( + new imperative::VarBase(tracer->GenerateUniqueName())); + auto out_xshape = std::shared_ptr( + new imperative::VarBase(tracer->GenerateUniqueName())); + imperative::NameVarBaseMap outs = {{"Out", {new_out}}, + {"XShape", {out_xshape}}}; + tracer->TraceOp("unsqueeze2", ins, outs, std::move(attrs)); + + return new_out; } // the index is a list diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 14ae02246ba..dbb19380907 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -3918,9 +3918,6 @@ void StridedSliceRawInferMeta(const MetaTensor& x, new_out_shape.push_back(out_dims[i]); } } - if (new_out_shape.size() == 0) { - new_out_shape.push_back(1); - } out_dims = phi::make_ddim(new_out_shape); } VLOG(4) << "out_dims: " << out_dims; diff --git a/paddle/phi/kernels/funcs/slice_utils.h b/paddle/phi/kernels/funcs/slice_utils.h index a56a5e16f65..78fa4c4ba13 100644 --- a/paddle/phi/kernels/funcs/slice_utils.h +++ b/paddle/phi/kernels/funcs/slice_utils.h @@ -13,10 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include - #include #include +#include "paddle/phi/core/flags.h" + +DECLARE_bool(set_to_1d); namespace phi { @@ -202,13 +205,11 @@ inline DDim GetDecreasedDims(const DDim slice_dims, new_shape.push_back(decreased_dims[i]); } } - - // NOTE(liym27): Paddle does not support that the rank of Tensor is 0, and - // uses [1] instead. - if (new_shape.size() == 0) { + if (FLAGS_set_to_1d && new_shape.size() == 0) { + // NOTE(zoooo0820): Hack procssing to 1-D, when axes decrease to 0-D in + // slice. This will remove in release 2.6. new_shape.push_back(1); } - decreased_dims = phi::make_ddim(new_shape); } return decreased_dims; diff --git a/paddle/phi/kernels/xpu/set_value_grad_kernel.cc b/paddle/phi/kernels/xpu/set_value_grad_kernel.cc index d7e1ed8114e..d80a2a97da8 100644 --- a/paddle/phi/kernels/xpu/set_value_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/set_value_grad_kernel.cc @@ -266,6 +266,11 @@ void SetValueGradImpl(const Context& dev_ctx, {fake_value_grad_dims.Get(), fake_value_grad_dims.size()}, static_cast(0)); auto value_grad_dims_vec = phi::vectorize(value_grad_dims); + // for value is a 0-D Tensor + if (value_grad_dims.size() == 0) { + value_grad_dims_vec = + phi::vectorize(phi::make_ddim(std::vector({1}))); + } for (auto offset : offsets) { for (int i = 0; i < out_dims_size; i++) { slice_end[i] = offset[i] + fake_value_grad_dims[i]; diff --git a/python/paddle/distributed/auto_parallel/operators/dist_slice.py b/python/paddle/distributed/auto_parallel/operators/dist_slice.py index 17e68002fa4..0110f54d481 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_slice.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_slice.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle + from ..utils import compute_compatible_dim_mapping, is_dim_shard from .common import ( DistributedOperatorImpl, @@ -70,9 +72,14 @@ class DistributedSliceImpl(DistributedOperatorImpl): if i not in decrease_axis: ref_indices.append(i) if ref_indices == []: - assert len(out_dims_mapping) == 1 - if is_dim_shard(out_dims_mapping[0]): - return False + # NOTE(zoooo0820): When all axes are decreased, the output will be 1-D + # with FLAGS_set_to_1d=True. + if paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']: + assert len(out_dims_mapping) == 1 + if is_dim_shard(out_dims_mapping[0]): + return False + else: + assert len(out_dims_mapping) == 0 else: for i in range(len(out_dims_mapping)): ref_index = ref_indices[i] @@ -142,9 +149,12 @@ class DistributedSliceImpl(DistributedOperatorImpl): ref_indices.append(i) if ref_dims_mapping == []: - ref_dims_mapping = [-1] + # NOTE(zoooo0820): When all axes are decreased, the output will be 1-D + # with FLAGS_set_to_1d=True. + if paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']: + ref_dims_mapping = [-1] + assert ref_dims_mapping[0] == out_dims_mapping[0] assert len(ref_dims_mapping) == len(out_dims_mapping) - assert ref_dims_mapping[0] == out_dims_mapping[0] changed = False else: assert len(ref_dims_mapping) == len(out_dims_mapping) diff --git a/python/paddle/fft.py b/python/paddle/fft.py index 1ce18f120c1..438c65ae2f0 100644 --- a/python/paddle/fft.py +++ b/python/paddle/fft.py @@ -1371,7 +1371,7 @@ def fftshift(x, axes=None, name=None): elif isinstance(axes, int): shifts = shape[axes] // 2 else: - shifts = paddle.concat([shape[ax] // 2 for ax in axes]) + shifts = paddle.concat([shape[ax : ax + 1] // 2 for ax in axes]) return paddle.roll(x, shifts, axes, name=name) @@ -1416,7 +1416,7 @@ def ifftshift(x, axes=None, name=None): elif isinstance(axes, int): shifts = -shape[axes] // 2 else: - shifts = paddle.concat([-shape[ax] // 2 for ax in axes]) + shifts = paddle.concat([-shape[ax : ax + 1] // 2 for ax in axes]) return paddle.roll(x, shifts, axes, name=name) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py b/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py index effcfece0f5..58059a29553 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py @@ -43,7 +43,7 @@ class TestImperativeNumpyBridge(unittest.TestCase): np.testing.assert_array_equal(var2.numpy(), data_np) data_np[0][0] = -1 self.assertEqual(data_np[0][0], -1) - self.assertNotEqual(var2[0][0].numpy()[0], -1) + self.assertNotEqual(var2[0][0].numpy(), -1) self.assertFalse(np.array_equal(var2.numpy(), data_np)) diff --git a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py index 66389a870e4..0bf3d8e9480 100644 --- a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py +++ b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py @@ -140,16 +140,16 @@ class TestKthvalueOpWithNaN(unittest.TestCase): nan_position = 100 self.x[0, nan_position, 2] = float('nan') v, inds = self.x.kthvalue(k=200, axis=1) - self.assertTrue(np.isnan(v[0, 2].numpy()[0])) - self.assertEqual(inds[0, 2].numpy()[0], nan_position) + self.assertTrue(np.isnan(v[0, 2].numpy())) + self.assertEqual(inds[0, 2].numpy(), nan_position) def test_nan_in_gpu_kernel(): paddle.set_device('gpu') nan_position = 100 self.x[0, nan_position, 2] = float('nan') v, inds = self.x.kthvalue(k=200, axis=1) - self.assertTrue(np.isnan(v[0, 2].numpy()[0])) - self.assertEqual(inds[0, 2].numpy()[0], nan_position) + self.assertTrue(np.isnan(v[0, 2].numpy())) + self.assertEqual(inds[0, 2].numpy(), nan_position) test_nan_in_cpu_kernel() if fluid.core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py index 9c5a71df018..66d4b8f05b7 100644 --- a/python/paddle/fluid/tests/unittests/test_set_value_op.py +++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py @@ -1590,7 +1590,7 @@ class TestSetValueInplace(unittest.TestCase): a.stop_gradient = False b = a[:] c = b - b[paddle.to_tensor(0)] = 1.0 + b[paddle.zeros([], dtype='int32')] = 1.0 self.assertTrue(id(b) == id(c)) np.testing.assert_array_equal(b.numpy(), c.numpy()) diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py index 0314a37170d..f43bd4b140d 100644 --- a/python/paddle/fluid/tests/unittests/test_slice_op.py +++ b/python/paddle/fluid/tests/unittests/test_slice_op.py @@ -541,8 +541,8 @@ class TestSliceAPI(unittest.TestCase): def test_1(self): with paddle_static_guard(): input = np.random.random([3, 4, 5, 6]).astype("float64") - minus_1 = paddle.tensor.fill_constant([1], "int32", -1) - minus_3 = paddle.tensor.fill_constant([1], "int64", -3) + minus_1 = paddle.tensor.fill_constant([], "int32", -1) + minus_3 = paddle.tensor.fill_constant([], "int64", -3) starts = paddle.static.data( name='starts', shape=[1, 3], dtype="float32" ) diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index 24920eb375c..c9607f89197 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -604,8 +604,7 @@ class TestVarBase(unittest.TestCase): nw = w[1, 1, 1] - self.assertEqual(len(nw.shape), 1) - self.assertEqual(nw.shape[0], 1) + self.assertEqual(len(nw.shape), 0) nw = w[:, :, :-1] self.assertEqual((784, 100, 99), tuple(nw.shape)) @@ -705,10 +704,10 @@ class TestVarBase(unittest.TestCase): var = paddle.to_tensor(tensor_array) - one = paddle.ones(shape=[1], dtype="int32") - two = paddle.full(shape=[1], fill_value=2, dtype="int32") - negative_one = paddle.full(shape=[1], fill_value=-1, dtype="int32") - four = paddle.full(shape=[1], fill_value=4, dtype="int32") + one = paddle.ones(shape=[], dtype="int32") + two = paddle.full(shape=[], fill_value=2, dtype="int32") + negative_one = paddle.full(shape=[], fill_value=-1, dtype="int32") + four = paddle.full(shape=[], fill_value=4, dtype="int32") var = fluid.dygraph.to_variable(tensor_array) var1 = var[0, one, one] diff --git a/python/paddle/fluid/tests/unittests/test_variable.py b/python/paddle/fluid/tests/unittests/test_variable.py index b709510371e..6d5bd96f9ac 100644 --- a/python/paddle/fluid/tests/unittests/test_variable.py +++ b/python/paddle/fluid/tests/unittests/test_variable.py @@ -132,8 +132,7 @@ class TestVariable(unittest.TestCase): nw = w[1, 1, 1] - self.assertEqual(len(nw.shape), 1) - self.assertEqual(nw.shape[0], 1) + self.assertEqual(len(nw.shape), 0) nw = w[:, :, :-1] self.assertEqual((784, 100, 99), nw.shape) diff --git a/python/paddle/fluid/tests/unittests/test_while_op.py b/python/paddle/fluid/tests/unittests/test_while_op.py index ea6d2d49453..8ae9fa8c5c2 100644 --- a/python/paddle/fluid/tests/unittests/test_while_op.py +++ b/python/paddle/fluid/tests/unittests/test_while_op.py @@ -192,9 +192,9 @@ class TestOutputsMustExistsInputs(unittest.TestCase): with fluid.program_guard(main_program, startup_program): def func(x): - s = paddle.zeros([1]) - i = paddle.ones([1]) - max_len = paddle.shape(x)[0] + s = paddle.zeros([]) + i = paddle.ones([]) + max_len = paddle.shape(x) def cond(i, s, x): return i < max_len diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py index f3bea4cf246..8d824a68298 100644 --- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py @@ -666,6 +666,140 @@ class TestSundryAPI(unittest.TestCase): self.assertEqual(zero_dim_var.shape, []) self.assertEqual(zero_dim_var.item(), 0.5) + def test_getitem(self): + # case1: When all axis have a scalar indice, output should be a 0-d Tensor; + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + x.stop_gradient = False + out = x[1, 2, 3, 4] + out.retain_grads() + out.backward() + self.assertEqual(out.shape, []) + np.testing.assert_allclose(out, np.array(119)) + self.assertEqual(out.grad.shape, []) + np.testing.assert_allclose(out.grad, 1.0) + self.assertEqual(x.grad.shape, [2, 3, 4, 5]) + x_grad_expected = np.zeros((2, 3, 4, 5)) + x_grad_expected[1, 2, 3, 4] = 1.0 + np.testing.assert_allclose(x.grad, x_grad_expected) + + # case2: When one axis has a 0-d Tensor indice, the output should be same as int indice. + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + out1 = x[1, 2] + out2 = x[ + paddle.full([], 1, dtype='int32'), paddle.full([], 2, dtype='int32') + ] + np.testing.assert_allclose(out1, out2) + + # case3: When all axis have a scalar indice (i.e. case1) and has None indice, + # ndim of output should be same with numbers of None. + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + out1 = x[1, 2, None, 3, 4] + self.assertEqual(out1.shape, [1]) + np.testing.assert_allclose(out1, np.array([119])) + out2 = x[1, None, 2, None, 3, 4] + self.assertEqual(out2.shape, [1, 1]) + np.testing.assert_allclose(out2, np.array([[119]])) + + # case4: 1-D Tensor will be treated as vector, no axis decrease will happen. + x = paddle.ones((2, 3, 4)) + indice = paddle.ones([1], dtype='int32') + out1 = x[indice] + self.assertEqual(out1.shape, [1, 3, 4]) + np.testing.assert_allclose(out1, np.ones((1, 3, 4))) + out2 = x[indice, indice] + self.assertEqual(out2.shape, [1, 4]) + np.testing.assert_allclose(out2, np.ones((1, 4))) + + def test_setitem(self): + # case1: all axis have a scalar indice + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + x.stop_gradient = False + out = x * 2 + out[1, 2, 3, 4] = 10 + out.backward() + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(out[1, 2, 3, 4], np.array(10)) + self.assertEqual(x.grad.shape, [2, 3, 4, 5]) + x_grad_expected = np.ones((2, 3, 4, 5)) * 2 + x_grad_expected[1, 2, 3, 4] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + + # case2: 0-D Tensor indice in some axis + # NOTE(zoooo0820): Now, int/slice with 0-D Tensor will still be + # treated as combined indexing, which is not support backward. + # There should have more test cases such as out[1, indice, :] = 0.5 when this + # problem is fixed. + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + indice = paddle.full([], 1, dtype='int32') + out = x * 1 + out[indice, indice] = 0.5 + out.backward() + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(out[1, 1], np.ones((4, 5)) * 0.5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[1, 1] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + + # case3:0-D Tensor indice in some axis, value is a Tensor + # and there is broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones((4, 5), dtype='float32') * 5 + v.stop_gradient = False + indice = paddle.full([], 1, dtype='int32') + out = x * 1 + out[indice] = v + out.backward() + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(out[1], np.ones((3, 4, 5)) * 5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[1] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + value_grad_expected = np.ones((4, 5)) * 3 + np.testing.assert_allclose(v.grad, value_grad_expected) + + # case4: value is a 0-D tensor and there is broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones([], dtype='float32') * 5 + v.stop_gradient = False + out = x * 1 + indice = paddle.full([], 0, dtype='int32') + out[indice] = v + out.backward() + + self.assertEqual(out.shape, x.shape) + self.assertEqual(v.grad.shape, []) + np.testing.assert_allclose(out[0], np.ones((3, 4, 5)) * 5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[0] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + value_grad_expected = np.ones(()) * 3 * 4 * 5 + np.testing.assert_allclose(v.grad, value_grad_expected) + + # case5: indice / value is 0-D Tensor, and there is no broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones([], dtype='float32') * 2 + v.stop_gradient = False + out = x * 1 + indice = paddle.full([], 0, dtype='int32') + out[indice, indice, indice, indice] = v + out.backward() + + self.assertEqual(out.shape, x.shape) + self.assertEqual(v.grad.shape, []) + np.testing.assert_allclose(out[0, 0, 0, 0], np.ones(()) * 2) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[0, 0, 0, 0] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + value_grad_expected = np.ones(()) + np.testing.assert_allclose(v.grad, value_grad_expected) + def test_expand(self): # case1 x = paddle.full([], 1, 'float32') @@ -2456,6 +2590,118 @@ class TestSundryAPIStatic(unittest.TestCase): self.assertEqual(res[0].shape, ()) self.assertEqual(res[0], 0.5) + @prog_scope() + def test_getitem(self): + # case1: When all axis have a scalar indice, output should be a 0-d Tensor; + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + x.stop_gradient = False + out = x[1, 2, 3, 4] + paddle.static.append_backward(out.sum()) + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out, x.grad_name, out.grad_name]) + + self.assertEqual(res[0].shape, ()) + np.testing.assert_allclose(res[0], np.array(119)) + self.assertEqual(res[2].shape, ()) + np.testing.assert_allclose(res[2], 1.0) + self.assertEqual(res[1].shape, (2, 3, 4, 5)) + x_grad_expected = np.zeros((2, 3, 4, 5)) + x_grad_expected[1, 2, 3, 4] = 1.0 + np.testing.assert_allclose(res[1], x_grad_expected) + + # case2: When one axis has a 0-d Tensor indice, the output should be same as int indice. + x2 = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + out1 = x2[1, 2] + out2 = x2[ + paddle.full([], 1, dtype='int32'), paddle.full([], 2, dtype='int32') + ] + res = self.exe.run(prog, fetch_list=[out1, out2]) + np.testing.assert_allclose(res[0], res[1]) + + # case3: When all axis have a scalar indice (i.e. case1) and has None indice, + # ndim of output should be same with numbers of None. + x3 = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + out3 = x3[1, 2, None, 3, 4] + out4 = x3[1, None, 2, None, 3, 4] + res = self.exe.run(prog, fetch_list=[out3, out4]) + self.assertEqual(res[0].shape, (1,)) + np.testing.assert_allclose(res[0], np.array([119])) + self.assertEqual(res[1].shape, (1, 1)) + np.testing.assert_allclose(res[1], np.array([[119]])) + + # case4: 1-D Tensor will be treated as vector, no axis decrease will happen. + x4 = paddle.ones((2, 3, 4)) + indice = paddle.ones([1], dtype='int32') + out5 = x4[indice] + out6 = x4[indice, indice] + res = self.exe.run(prog, fetch_list=[out5, out6]) + + self.assertEqual(res[0].shape, (1, 3, 4)) + np.testing.assert_allclose(res[0], np.ones((1, 3, 4))) + self.assertEqual(res[1].shape, (1, 4)) + np.testing.assert_allclose(res[1], np.ones((1, 4))) + + @prog_scope() + def test_setitem(self): + # NOTE(zoooo0820): __setitem__ has gradient problem in static graph. + # To solve this, we may not support __setitem__ in static graph. + # These unit tests will delete soon. + + # case1: all axis have a scalar indice + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + x.stop_gradient = False + out = x * 2 + out[1, 2, 3, 4] = 10 + paddle.static.append_backward(out.sum()) + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out, x.grad_name]) + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(res[0][1, 2, 3, 4], np.array(10)) + self.assertEqual(res[1].shape, (2, 3, 4, 5)) + x_grad_expected = np.ones((2, 3, 4, 5)) * 2 + x_grad_expected[1, 2, 3, 4] = 0 + np.testing.assert_allclose(res[1], x_grad_expected) + + # case2: 0-D Tensor indice in some axis + # NOTE(zoooo0820): Now, int/slice with 0-D Tensor will still be + # treated as combined indexing, which is not support backward. + # There should have more test cases such as out[1, indice, :] = 0.5 when this + # problem is fixed. + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + indice = paddle.full([], 1, dtype='int32') + out = x * 1 + out[indice, indice] = 0.5 + paddle.static.append_backward(out.sum()) + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out, x.grad_name]) + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(res[0][1, 1], np.ones((4, 5)) * 0.5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[1, 1] = 0 + np.testing.assert_allclose(res[1], x_grad_expected) + + # case3:0-D Tensor indice in some axis, value is a Tensor + # and there is broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones((4, 5), dtype='float32') * 5 + v.stop_gradient = False + indice = paddle.full([], 1, dtype='int32') + out = x * 1 + out[indice] = v + paddle.static.append_backward(out.sum()) + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out, x.grad_name, v.grad_name]) + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(res[0][1], np.ones((3, 4, 5)) * 5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[1] = 0 + np.testing.assert_allclose(res[1], x_grad_expected) + @prog_scope() def test_expand(self): x = paddle.full([], 1, 'float32') diff --git a/python/paddle/fluid/variable_index.py b/python/paddle/fluid/variable_index.py index 0d866860b31..451e8c3bf98 100644 --- a/python/paddle/fluid/variable_index.py +++ b/python/paddle/fluid/variable_index.py @@ -17,6 +17,7 @@ import numpy as np from . import unique_name from . import core import paddle +import warnings MAX_INTEGER = 2**31 - 1 @@ -185,7 +186,8 @@ class SliceInfo: for i in range(len(gather_tensor_shape)): if not ( - value_dims_bd[i] == gather_tensor_shape[i] + len(value_dims_bd) == 0 + or value_dims_bd[i] == gather_tensor_shape[i] or value_dims_bd[i] == 1 ): raise ValueError( @@ -282,7 +284,16 @@ def is_integer_or_scalar_tensor(ele): if isinstance(ele, int): return True elif isinstance(ele, Variable): - if len(ele.shape) == 1 and ele.shape[0] == 1: + # NOTE(zoooo0820): For compatibility, if FLAGS_set_to_1d is set to True, + # 1-D tensor is still treated as a scalar, which means basic indexing. + # This will be removed in future. + if paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']: + if len(ele.shape) == 1 and ele.shape[0] == 1: + warnings.warn( + "1-D Tensor will be treat as advanced indexing in future version. Currently, 1-D Tensor means a scalar, not vector, and please modify it to 0-D Tensor. If advanced indexing is needed, please use `export FLAGS_set_to_1d=False` to set the flag." + ) + return True + if len(ele.shape) == 0: return True return False @@ -573,13 +584,14 @@ def _getitem_impl_(var, item): out = reverse(out, axis=reverse_axes) - # Deal with cases when all axes are decreased. - # After slice, the shape of out is [1], which should have been [], but Paddle doesn't support scalar. - # In order to ensure the correctness of the final shape of out, one dimension of out needs to be decreased. - # For example: - # # x.shape: (2,3,4) - # out = x[0, 1, 1, None] # out.shape : (1) - if len(decrease_axes) == len(var.shape): + # NOTE(zoooo0820): When all axes are decreased, the output will be 1-D + # with FLAGS_set_to_1d=True. In this case, one `None` should be pop out, + # otherwise the output shape will be not correct. + set_to_1d = paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d'] + if set_to_1d and len(decrease_axes) == len(var.shape): + warnings.warn( + "Warning: In Tensor '__getitem__', if the number of scalar elements in the index is equal to the rank of the Tensor, the output should be 0-D. In order to be consistent with the behavior of previous versions, it will be processed to 1-D. But it is not correct and will be removed in release 2.6. If 1-D is still wanted, please modify the index element from scalar to slice (e.g. 'x[i]' => 'x[i:i+1]')." + ) none_axes = none_axes[1:] if len(none_axes) > 0: @@ -592,13 +604,6 @@ def _getitem_impl_(var, item): new_axis = axis - l none_axes[idx] = new_axis - # Deal with cases when all axes are decreased. - # After slice, the shape of out is [1], which should have been [], but Paddle doesn't support scalar. - # In order to ensure the correctness of the final shape of out, one dimension of out needs to be decreased. - # For example: - # # x.shape: (2,3,4) - # out = x[0, 1, 1, None] # out.shape : (1) - from ..tensor import unsqueeze out = unsqueeze(out, axis=none_axes) diff --git a/python/paddle/incubate/optimizer/functional/lbfgs.py b/python/paddle/incubate/optimizer/functional/lbfgs.py index a7221f0925e..e3620c4ffc0 100644 --- a/python/paddle/incubate/optimizer/functional/lbfgs.py +++ b/python/paddle/incubate/optimizer/functional/lbfgs.py @@ -125,9 +125,7 @@ def minimize_lbfgs( is_converge = paddle.full(shape=[1], fill_value=False, dtype='bool') num_func_calls = paddle.full(shape=[1], fill_value=1, dtype='int64') - history_size = paddle.full( - shape=[1], fill_value=history_size, dtype='int64' - ) + history_size = paddle.full(shape=[], fill_value=history_size, dtype='int64') head = paddle.full(shape=[1], fill_value=1, dtype='int64') tail = paddle.full(shape=[1], fill_value=0, dtype='int64') @@ -177,7 +175,7 @@ def minimize_lbfgs( q = paddle.assign(g1) # In a array circle, the index may out of range, so must use mod. i = paddle.full( - shape=[1], fill_value=(head - 1).mod(history_size), dtype='int64' + shape=[], fill_value=(head - 1).mod(history_size), dtype='int64' ) def cond(i, q): @@ -193,7 +191,7 @@ def minimize_lbfgs( r = paddle.matmul(H0, q) - i = paddle.full(shape=[1], fill_value=tail + 1, dtype='int64') + i = paddle.full(shape=[], fill_value=tail + 1, dtype='int64') def cond(i, r): return i != head diff --git a/python/paddle/jit/dy2static/variable_trans_func.py b/python/paddle/jit/dy2static/variable_trans_func.py index 20f0fb6317e..80c4487dc29 100644 --- a/python/paddle/jit/dy2static/variable_trans_func.py +++ b/python/paddle/jit/dy2static/variable_trans_func.py @@ -51,11 +51,11 @@ def to_static_variable(x): Translate a Python Tensor to PaddlePaddle static graph Tensor ''' if isinstance(x, bool): - return paddle.full(shape=[1], dtype='bool', fill_value=x) + return paddle.full(shape=[], dtype='bool', fill_value=x) if isinstance(x, float): - return paddle.full(shape=[1], dtype='float64', fill_value=x) + return paddle.full(shape=[], dtype='float64', fill_value=x) if isinstance(x, int): - return paddle.full(shape=[1], dtype='int64', fill_value=x) + return paddle.full(shape=[], dtype='int64', fill_value=x) if isinstance(x, UndefinedVar) or x is None: """ for early return case, we need a variable to represent None, current we use data_layer_not_check. diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index ffd27a545b9..cc8ab648b88 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -271,7 +271,7 @@ def _rnn_static_graph( mask = paddle.reverse(mask, axis=[0]) if sequence_length else None with paddle.fluid.framework.device_guard("cpu"): - start_i = paddle.zeros([1], dtype="int64") + start_i = paddle.zeros([], dtype="int64") end = max_seq_len end = paddle.cast(end, "int64") diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index c2b4ec71263..2b42d795baf 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -3169,19 +3169,19 @@ def tile(x, repeat_times, name=None): ) if isinstance(repeat_times, Variable): assert ( - len(repeat_times.shape) == 1 - ), 'repeat_times must be an 1-D Tensor.' + repeat_times.numel() == 1 + ), 'repeat_times must be a Tensor with one element.' else: for elem in repeat_times: if isinstance(elem, Variable): assert ( - len(elem.shape) == 1 - ), 'Elements in repeat_times must be 1-D Tensors or integers.' + elem.numel() == 1 + ), 'Elements in repeat_times must be Tensor with one element or integers.' else: type_tuple = (int, np.int32, np.int64) assert isinstance( elem, type_tuple - ), 'Elements in repeat_times must be 1-D Tensors or integers.' + ), 'Elements in repeat_times must be Tensor with one element or integers.' check_variable_and_dtype( x, @@ -3425,18 +3425,18 @@ def expand(x, shape, name=None): return _C_ops.expand(x, shape) else: if isinstance(shape, Variable): - assert len(shape.shape) == 1, 'shape must be an 1-D Tensor.' + assert shape.numel() == 1, 'shape must be a Tensor with one element' else: for elem in shape: if isinstance(elem, Variable): assert ( - len(elem.shape) == 1 - ), 'Elements in shape must be 1-D Tensors or integers.' + elem.numel() == 1 + ), 'Elements in shape must be Tensor with one element or integers.' else: type_tuple = (int, np.int32, np.int64) assert isinstance( elem, type_tuple - ), 'Elements in shape must be 1-D Tensors or integers.' + ), 'Elements in shape must be Tensor with one element or integers.' check_variable_and_dtype( x, diff --git a/test/dygraph_to_static/test_list.py b/test/dygraph_to_static/test_list.py index 44e02950bc5..091d261ed74 100644 --- a/test/dygraph_to_static/test_list.py +++ b/test/dygraph_to_static/test_list.py @@ -364,7 +364,7 @@ class TestListWithCondGradInferVarType(unittest.TestCase): x = paddle.to_tensor([2, 3, 4], dtype='float32') index = paddle.to_tensor([1]) res = net(x, index) - self.assertEqual(res[0], 48.0) + self.assertEqual(res, 48.0) if __name__ == '__main__': diff --git a/test/xpu/test_set_value_op_xpu.py b/test/xpu/test_set_value_op_xpu.py index e749eb8bc1b..a373d6a0ba5 100644 --- a/test/xpu/test_set_value_op_xpu.py +++ b/test/xpu/test_set_value_op_xpu.py @@ -1432,7 +1432,7 @@ class XPUTestSetValueOp(XPUOpTestWrapper): a.stop_gradient = False b = a[:] c = b - b[paddle.to_tensor(0)] = 1.0 + b[paddle.zeros([], dtype='int32')] = 1.0 self.assertTrue(id(b) == id(c)) np.testing.assert_array_equal(b.numpy(), c.numpy()) diff --git a/test/xpu/test_slice_op_xpu.py b/test/xpu/test_slice_op_xpu.py index f19c3d37e28..7cc0550740e 100644 --- a/test/xpu/test_slice_op_xpu.py +++ b/test/xpu/test_slice_op_xpu.py @@ -166,7 +166,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper): self.starts = [0, 1, 2, 3] self.ends = [1, 2, 3, 4] self.axes = [0, 1, 2, 3] - self.decrease_axis = [0, 1, 2, 3] + self.decrease_axis = [0, 1, 2] self.infer_flags = [1, 1, 1] self.out = self.input[0, 1, 2, 3:4] @@ -188,7 +188,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper): self.axes = [0, 1, 2, 3] self.decrease_axis = [0, 1, 2, 3] self.infer_flags = [1, 1, 1] - self.out = self.input[0, 1, 2, 3:4] + self.out = self.input[0, 1, 2, 3] support_types = get_xpu_op_support_types('slice') diff --git a/test/xpu/test_zero_dim_tensor_xpu.py b/test/xpu/test_zero_dim_tensor_xpu.py index 1a9f59040d5..9ecce0af830 100644 --- a/test/xpu/test_zero_dim_tensor_xpu.py +++ b/test/xpu/test_zero_dim_tensor_xpu.py @@ -344,6 +344,140 @@ class TestSundryAPI(unittest.TestCase): paddle.disable_static() self.x = paddle.rand([]) + def test_getitem(self): + # case1: When all axis have a scalar indice, output should be a 0-d Tensor; + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + x.stop_gradient = False + out = x[1, 2, 3, 4] + out.retain_grads() + out.backward() + self.assertEqual(out.shape, []) + np.testing.assert_allclose(out, np.array(119)) + self.assertEqual(out.grad.shape, []) + np.testing.assert_allclose(out.grad, 1.0) + self.assertEqual(x.grad.shape, [2, 3, 4, 5]) + x_grad_expected = np.zeros((2, 3, 4, 5)) + x_grad_expected[1, 2, 3, 4] = 1.0 + np.testing.assert_allclose(x.grad, x_grad_expected) + + # case2: When one axis has a 0-d Tensor indice, the output should be same as int indice. + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + out1 = x[1, 2] + out2 = x[ + paddle.full([], 1, dtype='int32'), paddle.full([], 2, dtype='int32') + ] + np.testing.assert_allclose(out1, out2) + + # case3: When all axis have a scalar indice (i.e. case1) and has None indice, + # ndim of output should be same with numbers of None. + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + out1 = x[1, 2, None, 3, 4] + self.assertEqual(out1.shape, [1]) + np.testing.assert_allclose(out1, np.array([119])) + out2 = x[1, None, 2, None, 3, 4] + self.assertEqual(out2.shape, [1, 1]) + np.testing.assert_allclose(out2, np.array([[119]])) + + # case4: 1-D Tensor will be treated as vector, no axis decrease will happen. + x = paddle.ones((2, 3, 4)) + indice = paddle.ones([1], dtype='int32') + out1 = x[indice] + self.assertEqual(out1.shape, [1, 3, 4]) + np.testing.assert_allclose(out1, np.ones((1, 3, 4))) + out2 = x[indice, indice] + self.assertEqual(out2.shape, [1, 4]) + np.testing.assert_allclose(out2, np.ones((1, 4))) + + def test_setitem(self): + # case1: all axis have a scalar indice + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + x.stop_gradient = False + out = x * 2 + out[1, 2, 3, 4] = 10 + out.backward() + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(out[1, 2, 3, 4], np.array(10)) + self.assertEqual(x.grad.shape, [2, 3, 4, 5]) + x_grad_expected = np.ones((2, 3, 4, 5)) * 2 + x_grad_expected[1, 2, 3, 4] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + + # case2: 0-D Tensor indice in some axis + # NOTE(zoooo0820): Now, int/slice with 0-D Tensor will still be + # treated as combined indexing, which is not support backward. + # There should have more test cases such as out[1, indice, :] = 0.5 when this + # problem is fixed. + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + indice = paddle.full([], 1, dtype='int32') + out = x * 1 + out[indice, indice] = 0.5 + out.backward() + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(out[1, 1], np.ones((4, 5)) * 0.5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[1, 1] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + + # case3:0-D Tensor indice in some axis, value is a Tensor + # and there is broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones((4, 5), dtype='float32') * 5 + v.stop_gradient = False + indice = paddle.full([], 1, dtype='int32') + out = x * 1 + out[indice] = v + out.backward() + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(out[1], np.ones((3, 4, 5)) * 5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[1] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + value_grad_expected = np.ones((4, 5)) * 3 + np.testing.assert_allclose(v.grad, value_grad_expected) + + # case4: value is a 0-D tensor and there is broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones([], dtype='float32') * 5 + v.stop_gradient = False + out = x * 1 + indice = paddle.full([], 0, dtype='int32') + out[indice] = v + out.backward() + + self.assertEqual(out.shape, x.shape) + self.assertEqual(v.grad.shape, []) + np.testing.assert_allclose(out[0], np.ones((3, 4, 5)) * 5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[0] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + value_grad_expected = np.ones(()) * 3 * 4 * 5 + np.testing.assert_allclose(v.grad, value_grad_expected) + + # case5: indice / value is 0-D Tensor, and there is no broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones([], dtype='float32') * 2 + v.stop_gradient = False + out = x * 1 + indice = paddle.full([], 0, dtype='int32') + out[indice, indice, indice, indice] = v + out.backward() + + self.assertEqual(out.shape, x.shape) + self.assertEqual(v.grad.shape, []) + np.testing.assert_allclose(out[0, 0, 0, 0], np.ones(()) * 2) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[0, 0, 0, 0] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + value_grad_expected = np.ones(()) + np.testing.assert_allclose(v.grad, value_grad_expected) + def test_expand(self): # case1 x = paddle.full([], 1, 'float32') -- GitLab