diff --git a/paddle/fluid/framework/attribute_checker.h b/paddle/fluid/framework/attribute_checker.h index 67eb69efdf3d099a426fcaeec7f7c36aeb59a11e..2e5e7bf8939aeaf66614e14f67ba8e37d8655b32 100644 --- a/paddle/fluid/framework/attribute_checker.h +++ b/paddle/fluid/framework/attribute_checker.h @@ -73,10 +73,10 @@ class TypedAttrVarInfoChecker { platform::errors::InvalidArgument( "Required Attribute with Variable type shall not be nullptr.")); auto shape = var_desc->GetShape(); - PADDLE_ENFORCE_EQ(shape.size(), + PADDLE_ENFORCE_LE(shape.size(), 1U, platform::errors::InvalidArgument( - "Required shape rank of Attribute(%s) == 1, " + "Required shape rank of Attribute(%s) <= 1, " "but received rank == %s", var_desc->Name(), shape.size())); @@ -105,20 +105,21 @@ class TypedAttrVarInfoChecker { platform::errors::InvalidArgument( "Required Attribute with Variable type shall not be nullptr.")); auto shape = var_desc->GetShape(); - PADDLE_ENFORCE_EQ(shape.size(), + PADDLE_ENFORCE_LE(shape.size(), 1U, platform::errors::InvalidArgument( - "Required shape rank of Attribute(%s) == 1, " + "Required shape rank of Attribute(%s) <= 1, " "but received rank == %s", var_desc->Name(), shape.size())); - PADDLE_ENFORCE_EQ(shape[0] == 1U || shape[0] == -1, - true, - platform::errors::InvalidArgument( - "Required shape[0] of Attribute(%s) == 1 or -1, " - "but received shape[0] == %s", - var_desc->Name(), - shape[0])); + PADDLE_ENFORCE_EQ( + shape.size() == 0U || shape[0] == 1U || shape[0] == -1, + true, + platform::errors::InvalidArgument( + "Required shape is (), or shape[0] of Attribute(%s) == 1 or -1, " + "but received shape[0] == %s", + var_desc->Name(), + shape[0])); } } }; diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 2455eed34fe366bb1ed722e5c8f622cd330ec829..0092bccba7028411170fae96ebec68d13e71ab49 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -135,17 +135,18 @@ static PyObject* tensor_method_numpy(TensorObject* self, } } if (set_to_1d) { - // 0D Tensor hack process to 1D numpy, will remove in future + // 0D Tensor hack process to 1D numpy, will remove in release 2.6 VLOG(0) << "Warning:: 0D Tensor cannot be used as 'Tensor.numpy()[0]' . In " "order to avoid this problem, " "0D Tensor will be changed to 1D numpy currently, but it's not " "correct and will be " - "removed in future. For Tensor contain only one element, Please " + "removed in release 2.6. For Tensor contain only one element, " + "Please " "modify " " 'Tensor.numpy()[0]' to 'float(Tensor)' as soon as " "possible, " - "otherwise 'Tensor.numpy()[0]' will raise error in future."; + "otherwise 'Tensor.numpy()[0]' will raise error in release 2.6."; py_rank = 1; py_dims[0] = 1; py_strides[0] = sizeof_dtype * numel; @@ -922,39 +923,50 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self, } } - if (!none_axes.empty()) { - // Deal with cases when all axes are decreased. - // After slice, the shape of out is [1], which should have been - // [], but Paddle doesn't support scalar. - // In order to ensure the correctness of the final shape of out, - // one dimension of out needs to be decreased. - // For example: - // # x.shape: (2,3,4) - // out = x[0, 1, 1, None] # out.shape : (1) + bool set_to_1d = FLAGS_set_to_1d; + + if (set_to_1d) { + // NOTE(zoooo0820): When all axes are decreased, the output will be 1-D + // with FLAGS_set_to_1d=True. In this case, one `None` should be pop out, + // otherwise the output shape will be not correct. if (static_cast(decrease_axis.size()) == tensor->dims().size()) { - none_axes.pop_back(); + VLOG(0) + << "Warning: In Tensor '__getitem__', if the number of scalar " + "elements " + "in the index is equal to the rank of the Tensor, the output " + "should " + "be 0-D. In order to be consistent with the behavior of previous " + "versions, it will be processed to 1-D. But it is not correct and " + "will be " + "removed in release 2.6. " + "If 1-D is still wanted, please modify the index element from " + "scalar to slice " + "(e.g. 'x[i]' => 'x[i:i+1]'). "; + if (!none_axes.empty()) { + none_axes.pop_back(); + } } - if (!none_axes.empty()) { - paddle::Tensor new_out; - { - eager_gil_scoped_release guard; - // Deal with cases that decrease_axes is not empty - // For example: - // # x.shape: (2,3,4) - // out = x[0, 0:2, None] # out.shape : (2, 1, 4) - for (auto& axis : none_axes) { - int len = 0; - for (int da : decrease_axis) { - if (da < axis) { - len++; - } + } + if (!none_axes.empty()) { + paddle::Tensor new_out; + { + eager_gil_scoped_release guard; + // Deal with cases that decrease_axes is not empty + // For example: + // # x.shape: (2,3,4) + // out = x[0, 0:2, None] # out.shape : (2, 1, 4) + for (auto& axis : none_axes) { + int len = 0; + for (int da : decrease_axis) { + if (da < axis) { + len++; } - axis -= len; } - new_out = unsqueeze_ad_func(out, none_axes); + axis -= len; } - return ToPyObject(new_out); + new_out = unsqueeze_ad_func(out, none_axes); } + return ToPyObject(new_out); } // the index is a list diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 1be8371ad4f0352a24b95fbe7061a4e73fa4a788..1440918eb483fbed1c9123613e9e32fe14d66341 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -63,6 +63,7 @@ limitations under the License. */ #include "paddle/phi/core/compat/arg_map_context.h" #include "paddle/phi/core/type_defs.h" +DECLARE_bool(set_to_1d); namespace paddle { namespace pybind { @@ -1067,46 +1068,63 @@ void BindImperative(py::module *m_ptr) { } tracer->TraceOp(op_type, ins, outs, std::move(attrs)); } - if (!none_axes.empty()) { - // Deal with cases when all axes are decreased. - // After slice, the shape of out is [1], which should have been - // [], but Paddle doesn't support scalar. - // In order to ensure the correctness of the final shape of out, - // one dimension of out needs to be decreased. - // For example: - // # x.shape: (2,3,4) - // out = x[0, 1, 1, None] # out.shape : (1) + + bool set_to_1d = FLAGS_set_to_1d; + + if (set_to_1d) { + // NOTE(zoooo0820): When all axes are decreased, the output + // will be 1-D with FLAGS_set_to_1d=True. In this case, one + // `None` should be pop out, otherwise the output shape will be + // not correct. if (static_cast(decrease_axis.size()) == tensor->dims().size()) { - none_axes.pop_back(); + VLOG(0) << "Warning: In Tensor '__getitem__', if the number " + "of scalar " + "elements " + "in the index is equal to the rank of the Tensor, " + "the output " + "should " + "be 0-D. In order to be consistent with the " + "behavior of previous " + "versions, it will be processed to 1-D. But it is " + "not correct and " + "will be " + "removed in release 2.6. " + "If 1-D is still wanted, please modify the index " + "element from " + "scalar to slice " + "(e.g. 'x[i]' => 'x[i:i+1]'). "; + if (!none_axes.empty()) { + none_axes.pop_back(); + } } - if (!none_axes.empty()) { - // Deal with cases that decrease_axes is not empty - // For example: - // # x.shape: (2,3,4) - // out = x[0, 0:2, None] # out.shape : (2, 1, 4) - for (auto &axis : none_axes) { - int len = 0; - for (int da : decrease_axis) { - if (da < axis) { - len++; - } + } + if (!none_axes.empty()) { + // Deal with cases that decrease_axes is not empty + // For example: + // # x.shape: (2,3,4) + // out = x[0, 0:2, None] # out.shape : (2, 1, 4) + for (auto &axis : none_axes) { + int len = 0; + for (int da : decrease_axis) { + if (da < axis) { + len++; } - axis -= len; } - - imperative::NameVarBaseMap ins = {{"X", {out}}}; - framework::AttributeMap attrs = {{"axes", none_axes}}; - auto new_out = std::shared_ptr( - new imperative::VarBase(tracer->GenerateUniqueName())); - auto out_xshape = std::shared_ptr( - new imperative::VarBase(tracer->GenerateUniqueName())); - imperative::NameVarBaseMap outs = {{"Out", {new_out}}, - {"XShape", {out_xshape}}}; - tracer->TraceOp("unsqueeze2", ins, outs, std::move(attrs)); - - return new_out; + axis -= len; } + + imperative::NameVarBaseMap ins = {{"X", {out}}}; + framework::AttributeMap attrs = {{"axes", none_axes}}; + auto new_out = std::shared_ptr( + new imperative::VarBase(tracer->GenerateUniqueName())); + auto out_xshape = std::shared_ptr( + new imperative::VarBase(tracer->GenerateUniqueName())); + imperative::NameVarBaseMap outs = {{"Out", {new_out}}, + {"XShape", {out_xshape}}}; + tracer->TraceOp("unsqueeze2", ins, outs, std::move(attrs)); + + return new_out; } // the index is a list diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 14ae02246babb573f1ceb74e73d9bc9a802ed590..dbb1938090796859c031752c076523135e4579ab 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -3918,9 +3918,6 @@ void StridedSliceRawInferMeta(const MetaTensor& x, new_out_shape.push_back(out_dims[i]); } } - if (new_out_shape.size() == 0) { - new_out_shape.push_back(1); - } out_dims = phi::make_ddim(new_out_shape); } VLOG(4) << "out_dims: " << out_dims; diff --git a/paddle/phi/kernels/funcs/slice_utils.h b/paddle/phi/kernels/funcs/slice_utils.h index a56a5e16f6503d79ad99ae11d8579f2bf67aef54..78fa4c4ba13a28c1e8097bc2d5129a0f6356ff9f 100644 --- a/paddle/phi/kernels/funcs/slice_utils.h +++ b/paddle/phi/kernels/funcs/slice_utils.h @@ -13,10 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include - #include #include +#include "paddle/phi/core/flags.h" + +DECLARE_bool(set_to_1d); namespace phi { @@ -202,13 +205,11 @@ inline DDim GetDecreasedDims(const DDim slice_dims, new_shape.push_back(decreased_dims[i]); } } - - // NOTE(liym27): Paddle does not support that the rank of Tensor is 0, and - // uses [1] instead. - if (new_shape.size() == 0) { + if (FLAGS_set_to_1d && new_shape.size() == 0) { + // NOTE(zoooo0820): Hack procssing to 1-D, when axes decrease to 0-D in + // slice. This will remove in release 2.6. new_shape.push_back(1); } - decreased_dims = phi::make_ddim(new_shape); } return decreased_dims; diff --git a/paddle/phi/kernels/xpu/set_value_grad_kernel.cc b/paddle/phi/kernels/xpu/set_value_grad_kernel.cc index d7e1ed8114e008d297fb1e295fab3b29a5c8dc82..d80a2a97da8cfe9ff785f1dc51939aebc8fc97fd 100644 --- a/paddle/phi/kernels/xpu/set_value_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/set_value_grad_kernel.cc @@ -266,6 +266,11 @@ void SetValueGradImpl(const Context& dev_ctx, {fake_value_grad_dims.Get(), fake_value_grad_dims.size()}, static_cast(0)); auto value_grad_dims_vec = phi::vectorize(value_grad_dims); + // for value is a 0-D Tensor + if (value_grad_dims.size() == 0) { + value_grad_dims_vec = + phi::vectorize(phi::make_ddim(std::vector({1}))); + } for (auto offset : offsets) { for (int i = 0; i < out_dims_size; i++) { slice_end[i] = offset[i] + fake_value_grad_dims[i]; diff --git a/python/paddle/distributed/auto_parallel/operators/dist_slice.py b/python/paddle/distributed/auto_parallel/operators/dist_slice.py index 17e68002fa42f4f5858af215f07d86b6641cd159..0110f54d481a0ffcb4d8e4ecf36aea7692ffe453 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_slice.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_slice.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle + from ..utils import compute_compatible_dim_mapping, is_dim_shard from .common import ( DistributedOperatorImpl, @@ -70,9 +72,14 @@ class DistributedSliceImpl(DistributedOperatorImpl): if i not in decrease_axis: ref_indices.append(i) if ref_indices == []: - assert len(out_dims_mapping) == 1 - if is_dim_shard(out_dims_mapping[0]): - return False + # NOTE(zoooo0820): When all axes are decreased, the output will be 1-D + # with FLAGS_set_to_1d=True. + if paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']: + assert len(out_dims_mapping) == 1 + if is_dim_shard(out_dims_mapping[0]): + return False + else: + assert len(out_dims_mapping) == 0 else: for i in range(len(out_dims_mapping)): ref_index = ref_indices[i] @@ -142,9 +149,12 @@ class DistributedSliceImpl(DistributedOperatorImpl): ref_indices.append(i) if ref_dims_mapping == []: - ref_dims_mapping = [-1] + # NOTE(zoooo0820): When all axes are decreased, the output will be 1-D + # with FLAGS_set_to_1d=True. + if paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']: + ref_dims_mapping = [-1] + assert ref_dims_mapping[0] == out_dims_mapping[0] assert len(ref_dims_mapping) == len(out_dims_mapping) - assert ref_dims_mapping[0] == out_dims_mapping[0] changed = False else: assert len(ref_dims_mapping) == len(out_dims_mapping) diff --git a/python/paddle/fft.py b/python/paddle/fft.py index 1ce18f120c19e4d07973ff667db992d1eebd548b..438c65ae2f0448655ef21d5c0a420069de22e768 100644 --- a/python/paddle/fft.py +++ b/python/paddle/fft.py @@ -1371,7 +1371,7 @@ def fftshift(x, axes=None, name=None): elif isinstance(axes, int): shifts = shape[axes] // 2 else: - shifts = paddle.concat([shape[ax] // 2 for ax in axes]) + shifts = paddle.concat([shape[ax : ax + 1] // 2 for ax in axes]) return paddle.roll(x, shifts, axes, name=name) @@ -1416,7 +1416,7 @@ def ifftshift(x, axes=None, name=None): elif isinstance(axes, int): shifts = -shape[axes] // 2 else: - shifts = paddle.concat([-shape[ax] // 2 for ax in axes]) + shifts = paddle.concat([-shape[ax : ax + 1] // 2 for ax in axes]) return paddle.roll(x, shifts, axes, name=name) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py b/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py index effcfece0f5eeeee8dc75b7809d522930600bc45..58059a295539dfb04a30428abe7016ea1ee0acc2 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py @@ -43,7 +43,7 @@ class TestImperativeNumpyBridge(unittest.TestCase): np.testing.assert_array_equal(var2.numpy(), data_np) data_np[0][0] = -1 self.assertEqual(data_np[0][0], -1) - self.assertNotEqual(var2[0][0].numpy()[0], -1) + self.assertNotEqual(var2[0][0].numpy(), -1) self.assertFalse(np.array_equal(var2.numpy(), data_np)) diff --git a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py index 66389a870e46f177acc8e024d66b24a3acbf1203..0bf3d8e9480976ca50ab25683abba2ef98d7b4ad 100644 --- a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py +++ b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py @@ -140,16 +140,16 @@ class TestKthvalueOpWithNaN(unittest.TestCase): nan_position = 100 self.x[0, nan_position, 2] = float('nan') v, inds = self.x.kthvalue(k=200, axis=1) - self.assertTrue(np.isnan(v[0, 2].numpy()[0])) - self.assertEqual(inds[0, 2].numpy()[0], nan_position) + self.assertTrue(np.isnan(v[0, 2].numpy())) + self.assertEqual(inds[0, 2].numpy(), nan_position) def test_nan_in_gpu_kernel(): paddle.set_device('gpu') nan_position = 100 self.x[0, nan_position, 2] = float('nan') v, inds = self.x.kthvalue(k=200, axis=1) - self.assertTrue(np.isnan(v[0, 2].numpy()[0])) - self.assertEqual(inds[0, 2].numpy()[0], nan_position) + self.assertTrue(np.isnan(v[0, 2].numpy())) + self.assertEqual(inds[0, 2].numpy(), nan_position) test_nan_in_cpu_kernel() if fluid.core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py index 9c5a71df0183581c63329423ea439eaf76d7d9d8..66d4b8f05b75df0f0d50070cd01d789d3228c509 100644 --- a/python/paddle/fluid/tests/unittests/test_set_value_op.py +++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py @@ -1590,7 +1590,7 @@ class TestSetValueInplace(unittest.TestCase): a.stop_gradient = False b = a[:] c = b - b[paddle.to_tensor(0)] = 1.0 + b[paddle.zeros([], dtype='int32')] = 1.0 self.assertTrue(id(b) == id(c)) np.testing.assert_array_equal(b.numpy(), c.numpy()) diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py index 0314a37170d0e1c7c11e64e8b726a34e66e4a3b2..f43bd4b140d7dea10739376eda36c02da0d3ee7f 100644 --- a/python/paddle/fluid/tests/unittests/test_slice_op.py +++ b/python/paddle/fluid/tests/unittests/test_slice_op.py @@ -541,8 +541,8 @@ class TestSliceAPI(unittest.TestCase): def test_1(self): with paddle_static_guard(): input = np.random.random([3, 4, 5, 6]).astype("float64") - minus_1 = paddle.tensor.fill_constant([1], "int32", -1) - minus_3 = paddle.tensor.fill_constant([1], "int64", -3) + minus_1 = paddle.tensor.fill_constant([], "int32", -1) + minus_3 = paddle.tensor.fill_constant([], "int64", -3) starts = paddle.static.data( name='starts', shape=[1, 3], dtype="float32" ) diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index 24920eb375ce1154355d65e119b8b97ade90e0ca..c9607f89197a59310d6130cb8aa592802692b9ce 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -604,8 +604,7 @@ class TestVarBase(unittest.TestCase): nw = w[1, 1, 1] - self.assertEqual(len(nw.shape), 1) - self.assertEqual(nw.shape[0], 1) + self.assertEqual(len(nw.shape), 0) nw = w[:, :, :-1] self.assertEqual((784, 100, 99), tuple(nw.shape)) @@ -705,10 +704,10 @@ class TestVarBase(unittest.TestCase): var = paddle.to_tensor(tensor_array) - one = paddle.ones(shape=[1], dtype="int32") - two = paddle.full(shape=[1], fill_value=2, dtype="int32") - negative_one = paddle.full(shape=[1], fill_value=-1, dtype="int32") - four = paddle.full(shape=[1], fill_value=4, dtype="int32") + one = paddle.ones(shape=[], dtype="int32") + two = paddle.full(shape=[], fill_value=2, dtype="int32") + negative_one = paddle.full(shape=[], fill_value=-1, dtype="int32") + four = paddle.full(shape=[], fill_value=4, dtype="int32") var = fluid.dygraph.to_variable(tensor_array) var1 = var[0, one, one] diff --git a/python/paddle/fluid/tests/unittests/test_variable.py b/python/paddle/fluid/tests/unittests/test_variable.py index b709510371edf52b1649eedcac3104d83a5f1bbc..6d5bd96f9aca2641fe83474122ceb641ab99aa1d 100644 --- a/python/paddle/fluid/tests/unittests/test_variable.py +++ b/python/paddle/fluid/tests/unittests/test_variable.py @@ -132,8 +132,7 @@ class TestVariable(unittest.TestCase): nw = w[1, 1, 1] - self.assertEqual(len(nw.shape), 1) - self.assertEqual(nw.shape[0], 1) + self.assertEqual(len(nw.shape), 0) nw = w[:, :, :-1] self.assertEqual((784, 100, 99), nw.shape) diff --git a/python/paddle/fluid/tests/unittests/test_while_op.py b/python/paddle/fluid/tests/unittests/test_while_op.py index ea6d2d4945395887538883dd559d167ceefc3e3d..8ae9fa8c5c2bf93f0ffc48b871989356a672ee77 100644 --- a/python/paddle/fluid/tests/unittests/test_while_op.py +++ b/python/paddle/fluid/tests/unittests/test_while_op.py @@ -192,9 +192,9 @@ class TestOutputsMustExistsInputs(unittest.TestCase): with fluid.program_guard(main_program, startup_program): def func(x): - s = paddle.zeros([1]) - i = paddle.ones([1]) - max_len = paddle.shape(x)[0] + s = paddle.zeros([]) + i = paddle.ones([]) + max_len = paddle.shape(x) def cond(i, s, x): return i < max_len diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py index f3bea4cf2467b6666fee195f11415ee6d2bc7260..8d824a68298df0e0a6649c1cd7c12c10b7291729 100644 --- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py @@ -666,6 +666,140 @@ class TestSundryAPI(unittest.TestCase): self.assertEqual(zero_dim_var.shape, []) self.assertEqual(zero_dim_var.item(), 0.5) + def test_getitem(self): + # case1: When all axis have a scalar indice, output should be a 0-d Tensor; + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + x.stop_gradient = False + out = x[1, 2, 3, 4] + out.retain_grads() + out.backward() + self.assertEqual(out.shape, []) + np.testing.assert_allclose(out, np.array(119)) + self.assertEqual(out.grad.shape, []) + np.testing.assert_allclose(out.grad, 1.0) + self.assertEqual(x.grad.shape, [2, 3, 4, 5]) + x_grad_expected = np.zeros((2, 3, 4, 5)) + x_grad_expected[1, 2, 3, 4] = 1.0 + np.testing.assert_allclose(x.grad, x_grad_expected) + + # case2: When one axis has a 0-d Tensor indice, the output should be same as int indice. + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + out1 = x[1, 2] + out2 = x[ + paddle.full([], 1, dtype='int32'), paddle.full([], 2, dtype='int32') + ] + np.testing.assert_allclose(out1, out2) + + # case3: When all axis have a scalar indice (i.e. case1) and has None indice, + # ndim of output should be same with numbers of None. + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + out1 = x[1, 2, None, 3, 4] + self.assertEqual(out1.shape, [1]) + np.testing.assert_allclose(out1, np.array([119])) + out2 = x[1, None, 2, None, 3, 4] + self.assertEqual(out2.shape, [1, 1]) + np.testing.assert_allclose(out2, np.array([[119]])) + + # case4: 1-D Tensor will be treated as vector, no axis decrease will happen. + x = paddle.ones((2, 3, 4)) + indice = paddle.ones([1], dtype='int32') + out1 = x[indice] + self.assertEqual(out1.shape, [1, 3, 4]) + np.testing.assert_allclose(out1, np.ones((1, 3, 4))) + out2 = x[indice, indice] + self.assertEqual(out2.shape, [1, 4]) + np.testing.assert_allclose(out2, np.ones((1, 4))) + + def test_setitem(self): + # case1: all axis have a scalar indice + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + x.stop_gradient = False + out = x * 2 + out[1, 2, 3, 4] = 10 + out.backward() + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(out[1, 2, 3, 4], np.array(10)) + self.assertEqual(x.grad.shape, [2, 3, 4, 5]) + x_grad_expected = np.ones((2, 3, 4, 5)) * 2 + x_grad_expected[1, 2, 3, 4] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + + # case2: 0-D Tensor indice in some axis + # NOTE(zoooo0820): Now, int/slice with 0-D Tensor will still be + # treated as combined indexing, which is not support backward. + # There should have more test cases such as out[1, indice, :] = 0.5 when this + # problem is fixed. + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + indice = paddle.full([], 1, dtype='int32') + out = x * 1 + out[indice, indice] = 0.5 + out.backward() + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(out[1, 1], np.ones((4, 5)) * 0.5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[1, 1] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + + # case3:0-D Tensor indice in some axis, value is a Tensor + # and there is broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones((4, 5), dtype='float32') * 5 + v.stop_gradient = False + indice = paddle.full([], 1, dtype='int32') + out = x * 1 + out[indice] = v + out.backward() + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(out[1], np.ones((3, 4, 5)) * 5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[1] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + value_grad_expected = np.ones((4, 5)) * 3 + np.testing.assert_allclose(v.grad, value_grad_expected) + + # case4: value is a 0-D tensor and there is broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones([], dtype='float32') * 5 + v.stop_gradient = False + out = x * 1 + indice = paddle.full([], 0, dtype='int32') + out[indice] = v + out.backward() + + self.assertEqual(out.shape, x.shape) + self.assertEqual(v.grad.shape, []) + np.testing.assert_allclose(out[0], np.ones((3, 4, 5)) * 5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[0] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + value_grad_expected = np.ones(()) * 3 * 4 * 5 + np.testing.assert_allclose(v.grad, value_grad_expected) + + # case5: indice / value is 0-D Tensor, and there is no broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones([], dtype='float32') * 2 + v.stop_gradient = False + out = x * 1 + indice = paddle.full([], 0, dtype='int32') + out[indice, indice, indice, indice] = v + out.backward() + + self.assertEqual(out.shape, x.shape) + self.assertEqual(v.grad.shape, []) + np.testing.assert_allclose(out[0, 0, 0, 0], np.ones(()) * 2) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[0, 0, 0, 0] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + value_grad_expected = np.ones(()) + np.testing.assert_allclose(v.grad, value_grad_expected) + def test_expand(self): # case1 x = paddle.full([], 1, 'float32') @@ -2456,6 +2590,118 @@ class TestSundryAPIStatic(unittest.TestCase): self.assertEqual(res[0].shape, ()) self.assertEqual(res[0], 0.5) + @prog_scope() + def test_getitem(self): + # case1: When all axis have a scalar indice, output should be a 0-d Tensor; + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + x.stop_gradient = False + out = x[1, 2, 3, 4] + paddle.static.append_backward(out.sum()) + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out, x.grad_name, out.grad_name]) + + self.assertEqual(res[0].shape, ()) + np.testing.assert_allclose(res[0], np.array(119)) + self.assertEqual(res[2].shape, ()) + np.testing.assert_allclose(res[2], 1.0) + self.assertEqual(res[1].shape, (2, 3, 4, 5)) + x_grad_expected = np.zeros((2, 3, 4, 5)) + x_grad_expected[1, 2, 3, 4] = 1.0 + np.testing.assert_allclose(res[1], x_grad_expected) + + # case2: When one axis has a 0-d Tensor indice, the output should be same as int indice. + x2 = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + out1 = x2[1, 2] + out2 = x2[ + paddle.full([], 1, dtype='int32'), paddle.full([], 2, dtype='int32') + ] + res = self.exe.run(prog, fetch_list=[out1, out2]) + np.testing.assert_allclose(res[0], res[1]) + + # case3: When all axis have a scalar indice (i.e. case1) and has None indice, + # ndim of output should be same with numbers of None. + x3 = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + out3 = x3[1, 2, None, 3, 4] + out4 = x3[1, None, 2, None, 3, 4] + res = self.exe.run(prog, fetch_list=[out3, out4]) + self.assertEqual(res[0].shape, (1,)) + np.testing.assert_allclose(res[0], np.array([119])) + self.assertEqual(res[1].shape, (1, 1)) + np.testing.assert_allclose(res[1], np.array([[119]])) + + # case4: 1-D Tensor will be treated as vector, no axis decrease will happen. + x4 = paddle.ones((2, 3, 4)) + indice = paddle.ones([1], dtype='int32') + out5 = x4[indice] + out6 = x4[indice, indice] + res = self.exe.run(prog, fetch_list=[out5, out6]) + + self.assertEqual(res[0].shape, (1, 3, 4)) + np.testing.assert_allclose(res[0], np.ones((1, 3, 4))) + self.assertEqual(res[1].shape, (1, 4)) + np.testing.assert_allclose(res[1], np.ones((1, 4))) + + @prog_scope() + def test_setitem(self): + # NOTE(zoooo0820): __setitem__ has gradient problem in static graph. + # To solve this, we may not support __setitem__ in static graph. + # These unit tests will delete soon. + + # case1: all axis have a scalar indice + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + x.stop_gradient = False + out = x * 2 + out[1, 2, 3, 4] = 10 + paddle.static.append_backward(out.sum()) + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out, x.grad_name]) + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(res[0][1, 2, 3, 4], np.array(10)) + self.assertEqual(res[1].shape, (2, 3, 4, 5)) + x_grad_expected = np.ones((2, 3, 4, 5)) * 2 + x_grad_expected[1, 2, 3, 4] = 0 + np.testing.assert_allclose(res[1], x_grad_expected) + + # case2: 0-D Tensor indice in some axis + # NOTE(zoooo0820): Now, int/slice with 0-D Tensor will still be + # treated as combined indexing, which is not support backward. + # There should have more test cases such as out[1, indice, :] = 0.5 when this + # problem is fixed. + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + indice = paddle.full([], 1, dtype='int32') + out = x * 1 + out[indice, indice] = 0.5 + paddle.static.append_backward(out.sum()) + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out, x.grad_name]) + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(res[0][1, 1], np.ones((4, 5)) * 0.5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[1, 1] = 0 + np.testing.assert_allclose(res[1], x_grad_expected) + + # case3:0-D Tensor indice in some axis, value is a Tensor + # and there is broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones((4, 5), dtype='float32') * 5 + v.stop_gradient = False + indice = paddle.full([], 1, dtype='int32') + out = x * 1 + out[indice] = v + paddle.static.append_backward(out.sum()) + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out, x.grad_name, v.grad_name]) + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(res[0][1], np.ones((3, 4, 5)) * 5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[1] = 0 + np.testing.assert_allclose(res[1], x_grad_expected) + @prog_scope() def test_expand(self): x = paddle.full([], 1, 'float32') diff --git a/python/paddle/fluid/variable_index.py b/python/paddle/fluid/variable_index.py index 0d866860b314a3e65dcd759c80768d1a68856fc7..451e8c3bf9875800d722a8b1220c6aae9b3cf7a5 100644 --- a/python/paddle/fluid/variable_index.py +++ b/python/paddle/fluid/variable_index.py @@ -17,6 +17,7 @@ import numpy as np from . import unique_name from . import core import paddle +import warnings MAX_INTEGER = 2**31 - 1 @@ -185,7 +186,8 @@ class SliceInfo: for i in range(len(gather_tensor_shape)): if not ( - value_dims_bd[i] == gather_tensor_shape[i] + len(value_dims_bd) == 0 + or value_dims_bd[i] == gather_tensor_shape[i] or value_dims_bd[i] == 1 ): raise ValueError( @@ -282,7 +284,16 @@ def is_integer_or_scalar_tensor(ele): if isinstance(ele, int): return True elif isinstance(ele, Variable): - if len(ele.shape) == 1 and ele.shape[0] == 1: + # NOTE(zoooo0820): For compatibility, if FLAGS_set_to_1d is set to True, + # 1-D tensor is still treated as a scalar, which means basic indexing. + # This will be removed in future. + if paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']: + if len(ele.shape) == 1 and ele.shape[0] == 1: + warnings.warn( + "1-D Tensor will be treat as advanced indexing in future version. Currently, 1-D Tensor means a scalar, not vector, and please modify it to 0-D Tensor. If advanced indexing is needed, please use `export FLAGS_set_to_1d=False` to set the flag." + ) + return True + if len(ele.shape) == 0: return True return False @@ -573,13 +584,14 @@ def _getitem_impl_(var, item): out = reverse(out, axis=reverse_axes) - # Deal with cases when all axes are decreased. - # After slice, the shape of out is [1], which should have been [], but Paddle doesn't support scalar. - # In order to ensure the correctness of the final shape of out, one dimension of out needs to be decreased. - # For example: - # # x.shape: (2,3,4) - # out = x[0, 1, 1, None] # out.shape : (1) - if len(decrease_axes) == len(var.shape): + # NOTE(zoooo0820): When all axes are decreased, the output will be 1-D + # with FLAGS_set_to_1d=True. In this case, one `None` should be pop out, + # otherwise the output shape will be not correct. + set_to_1d = paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d'] + if set_to_1d and len(decrease_axes) == len(var.shape): + warnings.warn( + "Warning: In Tensor '__getitem__', if the number of scalar elements in the index is equal to the rank of the Tensor, the output should be 0-D. In order to be consistent with the behavior of previous versions, it will be processed to 1-D. But it is not correct and will be removed in release 2.6. If 1-D is still wanted, please modify the index element from scalar to slice (e.g. 'x[i]' => 'x[i:i+1]')." + ) none_axes = none_axes[1:] if len(none_axes) > 0: @@ -592,13 +604,6 @@ def _getitem_impl_(var, item): new_axis = axis - l none_axes[idx] = new_axis - # Deal with cases when all axes are decreased. - # After slice, the shape of out is [1], which should have been [], but Paddle doesn't support scalar. - # In order to ensure the correctness of the final shape of out, one dimension of out needs to be decreased. - # For example: - # # x.shape: (2,3,4) - # out = x[0, 1, 1, None] # out.shape : (1) - from ..tensor import unsqueeze out = unsqueeze(out, axis=none_axes) diff --git a/python/paddle/incubate/optimizer/functional/lbfgs.py b/python/paddle/incubate/optimizer/functional/lbfgs.py index a7221f0925e76d8a3dbf16eca1af481675fa7d78..e3620c4ffc0ed5191acfeda6c1d949b1bb6d52af 100644 --- a/python/paddle/incubate/optimizer/functional/lbfgs.py +++ b/python/paddle/incubate/optimizer/functional/lbfgs.py @@ -125,9 +125,7 @@ def minimize_lbfgs( is_converge = paddle.full(shape=[1], fill_value=False, dtype='bool') num_func_calls = paddle.full(shape=[1], fill_value=1, dtype='int64') - history_size = paddle.full( - shape=[1], fill_value=history_size, dtype='int64' - ) + history_size = paddle.full(shape=[], fill_value=history_size, dtype='int64') head = paddle.full(shape=[1], fill_value=1, dtype='int64') tail = paddle.full(shape=[1], fill_value=0, dtype='int64') @@ -177,7 +175,7 @@ def minimize_lbfgs( q = paddle.assign(g1) # In a array circle, the index may out of range, so must use mod. i = paddle.full( - shape=[1], fill_value=(head - 1).mod(history_size), dtype='int64' + shape=[], fill_value=(head - 1).mod(history_size), dtype='int64' ) def cond(i, q): @@ -193,7 +191,7 @@ def minimize_lbfgs( r = paddle.matmul(H0, q) - i = paddle.full(shape=[1], fill_value=tail + 1, dtype='int64') + i = paddle.full(shape=[], fill_value=tail + 1, dtype='int64') def cond(i, r): return i != head diff --git a/python/paddle/jit/dy2static/variable_trans_func.py b/python/paddle/jit/dy2static/variable_trans_func.py index 20f0fb6317e3b6ac296c31d48968a9768a7591bb..80c4487dc29c67ba91d408e4f4de26c5c68877dc 100644 --- a/python/paddle/jit/dy2static/variable_trans_func.py +++ b/python/paddle/jit/dy2static/variable_trans_func.py @@ -51,11 +51,11 @@ def to_static_variable(x): Translate a Python Tensor to PaddlePaddle static graph Tensor ''' if isinstance(x, bool): - return paddle.full(shape=[1], dtype='bool', fill_value=x) + return paddle.full(shape=[], dtype='bool', fill_value=x) if isinstance(x, float): - return paddle.full(shape=[1], dtype='float64', fill_value=x) + return paddle.full(shape=[], dtype='float64', fill_value=x) if isinstance(x, int): - return paddle.full(shape=[1], dtype='int64', fill_value=x) + return paddle.full(shape=[], dtype='int64', fill_value=x) if isinstance(x, UndefinedVar) or x is None: """ for early return case, we need a variable to represent None, current we use data_layer_not_check. diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index ffd27a545b9932e75c79dbcaba886623d77a1781..cc8ab648b889569dfd71e39e89c73a09b9f4ff96 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -271,7 +271,7 @@ def _rnn_static_graph( mask = paddle.reverse(mask, axis=[0]) if sequence_length else None with paddle.fluid.framework.device_guard("cpu"): - start_i = paddle.zeros([1], dtype="int64") + start_i = paddle.zeros([], dtype="int64") end = max_seq_len end = paddle.cast(end, "int64") diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index c2b4ec7126388832e7a02006373510d94ce84451..2b42d795bafb8cd98537051544e4e007d031d405 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -3169,19 +3169,19 @@ def tile(x, repeat_times, name=None): ) if isinstance(repeat_times, Variable): assert ( - len(repeat_times.shape) == 1 - ), 'repeat_times must be an 1-D Tensor.' + repeat_times.numel() == 1 + ), 'repeat_times must be a Tensor with one element.' else: for elem in repeat_times: if isinstance(elem, Variable): assert ( - len(elem.shape) == 1 - ), 'Elements in repeat_times must be 1-D Tensors or integers.' + elem.numel() == 1 + ), 'Elements in repeat_times must be Tensor with one element or integers.' else: type_tuple = (int, np.int32, np.int64) assert isinstance( elem, type_tuple - ), 'Elements in repeat_times must be 1-D Tensors or integers.' + ), 'Elements in repeat_times must be Tensor with one element or integers.' check_variable_and_dtype( x, @@ -3425,18 +3425,18 @@ def expand(x, shape, name=None): return _C_ops.expand(x, shape) else: if isinstance(shape, Variable): - assert len(shape.shape) == 1, 'shape must be an 1-D Tensor.' + assert shape.numel() == 1, 'shape must be a Tensor with one element' else: for elem in shape: if isinstance(elem, Variable): assert ( - len(elem.shape) == 1 - ), 'Elements in shape must be 1-D Tensors or integers.' + elem.numel() == 1 + ), 'Elements in shape must be Tensor with one element or integers.' else: type_tuple = (int, np.int32, np.int64) assert isinstance( elem, type_tuple - ), 'Elements in shape must be 1-D Tensors or integers.' + ), 'Elements in shape must be Tensor with one element or integers.' check_variable_and_dtype( x, diff --git a/test/dygraph_to_static/test_list.py b/test/dygraph_to_static/test_list.py index 44e02950bc54819d90835aae85557a8267dfebed..091d261ed74580d38ababd46e41693c201e2873e 100644 --- a/test/dygraph_to_static/test_list.py +++ b/test/dygraph_to_static/test_list.py @@ -364,7 +364,7 @@ class TestListWithCondGradInferVarType(unittest.TestCase): x = paddle.to_tensor([2, 3, 4], dtype='float32') index = paddle.to_tensor([1]) res = net(x, index) - self.assertEqual(res[0], 48.0) + self.assertEqual(res, 48.0) if __name__ == '__main__': diff --git a/test/xpu/test_set_value_op_xpu.py b/test/xpu/test_set_value_op_xpu.py index e749eb8bc1b116bc18d938aaa2a63781978b7f2b..a373d6a0ba5f8777ae3dd61da9d09bb0aa6c5eca 100644 --- a/test/xpu/test_set_value_op_xpu.py +++ b/test/xpu/test_set_value_op_xpu.py @@ -1432,7 +1432,7 @@ class XPUTestSetValueOp(XPUOpTestWrapper): a.stop_gradient = False b = a[:] c = b - b[paddle.to_tensor(0)] = 1.0 + b[paddle.zeros([], dtype='int32')] = 1.0 self.assertTrue(id(b) == id(c)) np.testing.assert_array_equal(b.numpy(), c.numpy()) diff --git a/test/xpu/test_slice_op_xpu.py b/test/xpu/test_slice_op_xpu.py index f19c3d37e283edb51235cdcde08013fb2c41ffe2..7cc0550740e1b14ca12df2b865d78076eb76affb 100644 --- a/test/xpu/test_slice_op_xpu.py +++ b/test/xpu/test_slice_op_xpu.py @@ -166,7 +166,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper): self.starts = [0, 1, 2, 3] self.ends = [1, 2, 3, 4] self.axes = [0, 1, 2, 3] - self.decrease_axis = [0, 1, 2, 3] + self.decrease_axis = [0, 1, 2] self.infer_flags = [1, 1, 1] self.out = self.input[0, 1, 2, 3:4] @@ -188,7 +188,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper): self.axes = [0, 1, 2, 3] self.decrease_axis = [0, 1, 2, 3] self.infer_flags = [1, 1, 1] - self.out = self.input[0, 1, 2, 3:4] + self.out = self.input[0, 1, 2, 3] support_types = get_xpu_op_support_types('slice') diff --git a/test/xpu/test_zero_dim_tensor_xpu.py b/test/xpu/test_zero_dim_tensor_xpu.py index 1a9f59040d53486c8963f7cc7cb3ef793b3436e7..9ecce0af8305d3e2112b17d4c94c44103603214f 100644 --- a/test/xpu/test_zero_dim_tensor_xpu.py +++ b/test/xpu/test_zero_dim_tensor_xpu.py @@ -344,6 +344,140 @@ class TestSundryAPI(unittest.TestCase): paddle.disable_static() self.x = paddle.rand([]) + def test_getitem(self): + # case1: When all axis have a scalar indice, output should be a 0-d Tensor; + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + x.stop_gradient = False + out = x[1, 2, 3, 4] + out.retain_grads() + out.backward() + self.assertEqual(out.shape, []) + np.testing.assert_allclose(out, np.array(119)) + self.assertEqual(out.grad.shape, []) + np.testing.assert_allclose(out.grad, 1.0) + self.assertEqual(x.grad.shape, [2, 3, 4, 5]) + x_grad_expected = np.zeros((2, 3, 4, 5)) + x_grad_expected[1, 2, 3, 4] = 1.0 + np.testing.assert_allclose(x.grad, x_grad_expected) + + # case2: When one axis has a 0-d Tensor indice, the output should be same as int indice. + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + out1 = x[1, 2] + out2 = x[ + paddle.full([], 1, dtype='int32'), paddle.full([], 2, dtype='int32') + ] + np.testing.assert_allclose(out1, out2) + + # case3: When all axis have a scalar indice (i.e. case1) and has None indice, + # ndim of output should be same with numbers of None. + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + out1 = x[1, 2, None, 3, 4] + self.assertEqual(out1.shape, [1]) + np.testing.assert_allclose(out1, np.array([119])) + out2 = x[1, None, 2, None, 3, 4] + self.assertEqual(out2.shape, [1, 1]) + np.testing.assert_allclose(out2, np.array([[119]])) + + # case4: 1-D Tensor will be treated as vector, no axis decrease will happen. + x = paddle.ones((2, 3, 4)) + indice = paddle.ones([1], dtype='int32') + out1 = x[indice] + self.assertEqual(out1.shape, [1, 3, 4]) + np.testing.assert_allclose(out1, np.ones((1, 3, 4))) + out2 = x[indice, indice] + self.assertEqual(out2.shape, [1, 4]) + np.testing.assert_allclose(out2, np.ones((1, 4))) + + def test_setitem(self): + # case1: all axis have a scalar indice + x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) + x.stop_gradient = False + out = x * 2 + out[1, 2, 3, 4] = 10 + out.backward() + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(out[1, 2, 3, 4], np.array(10)) + self.assertEqual(x.grad.shape, [2, 3, 4, 5]) + x_grad_expected = np.ones((2, 3, 4, 5)) * 2 + x_grad_expected[1, 2, 3, 4] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + + # case2: 0-D Tensor indice in some axis + # NOTE(zoooo0820): Now, int/slice with 0-D Tensor will still be + # treated as combined indexing, which is not support backward. + # There should have more test cases such as out[1, indice, :] = 0.5 when this + # problem is fixed. + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + indice = paddle.full([], 1, dtype='int32') + out = x * 1 + out[indice, indice] = 0.5 + out.backward() + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(out[1, 1], np.ones((4, 5)) * 0.5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[1, 1] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + + # case3:0-D Tensor indice in some axis, value is a Tensor + # and there is broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones((4, 5), dtype='float32') * 5 + v.stop_gradient = False + indice = paddle.full([], 1, dtype='int32') + out = x * 1 + out[indice] = v + out.backward() + + self.assertEqual(out.shape, x.shape) + np.testing.assert_allclose(out[1], np.ones((3, 4, 5)) * 5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[1] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + value_grad_expected = np.ones((4, 5)) * 3 + np.testing.assert_allclose(v.grad, value_grad_expected) + + # case4: value is a 0-D tensor and there is broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones([], dtype='float32') * 5 + v.stop_gradient = False + out = x * 1 + indice = paddle.full([], 0, dtype='int32') + out[indice] = v + out.backward() + + self.assertEqual(out.shape, x.shape) + self.assertEqual(v.grad.shape, []) + np.testing.assert_allclose(out[0], np.ones((3, 4, 5)) * 5) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[0] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + value_grad_expected = np.ones(()) * 3 * 4 * 5 + np.testing.assert_allclose(v.grad, value_grad_expected) + + # case5: indice / value is 0-D Tensor, and there is no broadcast + x = paddle.randn((2, 3, 4, 5)) + x.stop_gradient = False + v = paddle.ones([], dtype='float32') * 2 + v.stop_gradient = False + out = x * 1 + indice = paddle.full([], 0, dtype='int32') + out[indice, indice, indice, indice] = v + out.backward() + + self.assertEqual(out.shape, x.shape) + self.assertEqual(v.grad.shape, []) + np.testing.assert_allclose(out[0, 0, 0, 0], np.ones(()) * 2) + x_grad_expected = np.ones((2, 3, 4, 5)) + x_grad_expected[0, 0, 0, 0] = 0 + np.testing.assert_allclose(x.grad, x_grad_expected) + value_grad_expected = np.ones(()) + np.testing.assert_allclose(v.grad, value_grad_expected) + def test_expand(self): # case1 x = paddle.full([], 1, 'float32')