未验证 提交 767e7b3f 编写于 作者: J JYChen 提交者: GitHub

[Cherry-pick] zero-dim: support 0-D for getitem/setitem (#53441)

* support 0-D output and 0-D as indice in __getitem__

* fix tests

* fix inference and UT

* add unittest for setitem

* fix xpu test

* fix xpu 0-d

* fix right value is 0d and index is List/Tensor

* Hack__getitem__ from 0-d to 1-d with FLAGS_set_to_1d

* change PHI_DECLARE_xxx to DECLARE_xxx since the change not merged to 2.5

* hack 1-D tensor to Scalar

* throw warning at __getitem__, not slice_utils
上级 95a7bcf9
...@@ -73,10 +73,10 @@ class TypedAttrVarInfoChecker { ...@@ -73,10 +73,10 @@ class TypedAttrVarInfoChecker {
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Required Attribute with Variable type shall not be nullptr.")); "Required Attribute with Variable type shall not be nullptr."));
auto shape = var_desc->GetShape(); auto shape = var_desc->GetShape();
PADDLE_ENFORCE_EQ(shape.size(), PADDLE_ENFORCE_LE(shape.size(),
1U, 1U,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Required shape rank of Attribute(%s) == 1, " "Required shape rank of Attribute(%s) <= 1, "
"but received rank == %s", "but received rank == %s",
var_desc->Name(), var_desc->Name(),
shape.size())); shape.size()));
...@@ -105,20 +105,21 @@ class TypedAttrVarInfoChecker { ...@@ -105,20 +105,21 @@ class TypedAttrVarInfoChecker {
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Required Attribute with Variable type shall not be nullptr.")); "Required Attribute with Variable type shall not be nullptr."));
auto shape = var_desc->GetShape(); auto shape = var_desc->GetShape();
PADDLE_ENFORCE_EQ(shape.size(), PADDLE_ENFORCE_LE(shape.size(),
1U, 1U,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Required shape rank of Attribute(%s) == 1, " "Required shape rank of Attribute(%s) <= 1, "
"but received rank == %s", "but received rank == %s",
var_desc->Name(), var_desc->Name(),
shape.size())); shape.size()));
PADDLE_ENFORCE_EQ(shape[0] == 1U || shape[0] == -1, PADDLE_ENFORCE_EQ(
true, shape.size() == 0U || shape[0] == 1U || shape[0] == -1,
platform::errors::InvalidArgument( true,
"Required shape[0] of Attribute(%s) == 1 or -1, " platform::errors::InvalidArgument(
"but received shape[0] == %s", "Required shape is (), or shape[0] of Attribute(%s) == 1 or -1, "
var_desc->Name(), "but received shape[0] == %s",
shape[0])); var_desc->Name(),
shape[0]));
} }
} }
}; };
......
...@@ -135,17 +135,18 @@ static PyObject* tensor_method_numpy(TensorObject* self, ...@@ -135,17 +135,18 @@ static PyObject* tensor_method_numpy(TensorObject* self,
} }
} }
if (set_to_1d) { if (set_to_1d) {
// 0D Tensor hack process to 1D numpy, will remove in future // 0D Tensor hack process to 1D numpy, will remove in release 2.6
VLOG(0) VLOG(0)
<< "Warning:: 0D Tensor cannot be used as 'Tensor.numpy()[0]' . In " << "Warning:: 0D Tensor cannot be used as 'Tensor.numpy()[0]' . In "
"order to avoid this problem, " "order to avoid this problem, "
"0D Tensor will be changed to 1D numpy currently, but it's not " "0D Tensor will be changed to 1D numpy currently, but it's not "
"correct and will be " "correct and will be "
"removed in future. For Tensor contain only one element, Please " "removed in release 2.6. For Tensor contain only one element, "
"Please "
"modify " "modify "
" 'Tensor.numpy()[0]' to 'float(Tensor)' as soon as " " 'Tensor.numpy()[0]' to 'float(Tensor)' as soon as "
"possible, " "possible, "
"otherwise 'Tensor.numpy()[0]' will raise error in future."; "otherwise 'Tensor.numpy()[0]' will raise error in release 2.6.";
py_rank = 1; py_rank = 1;
py_dims[0] = 1; py_dims[0] = 1;
py_strides[0] = sizeof_dtype * numel; py_strides[0] = sizeof_dtype * numel;
...@@ -922,39 +923,50 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self, ...@@ -922,39 +923,50 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self,
} }
} }
if (!none_axes.empty()) { bool set_to_1d = FLAGS_set_to_1d;
// Deal with cases when all axes are decreased.
// After slice, the shape of out is [1], which should have been if (set_to_1d) {
// [], but Paddle doesn't support scalar. // NOTE(zoooo0820): When all axes are decreased, the output will be 1-D
// In order to ensure the correctness of the final shape of out, // with FLAGS_set_to_1d=True. In this case, one `None` should be pop out,
// one dimension of out needs to be decreased. // otherwise the output shape will be not correct.
// For example:
// # x.shape: (2,3,4)
// out = x[0, 1, 1, None] # out.shape : (1)
if (static_cast<int>(decrease_axis.size()) == tensor->dims().size()) { if (static_cast<int>(decrease_axis.size()) == tensor->dims().size()) {
none_axes.pop_back(); VLOG(0)
<< "Warning: In Tensor '__getitem__', if the number of scalar "
"elements "
"in the index is equal to the rank of the Tensor, the output "
"should "
"be 0-D. In order to be consistent with the behavior of previous "
"versions, it will be processed to 1-D. But it is not correct and "
"will be "
"removed in release 2.6. "
"If 1-D is still wanted, please modify the index element from "
"scalar to slice "
"(e.g. 'x[i]' => 'x[i:i+1]'). ";
if (!none_axes.empty()) {
none_axes.pop_back();
}
} }
if (!none_axes.empty()) { }
paddle::Tensor new_out; if (!none_axes.empty()) {
{ paddle::Tensor new_out;
eager_gil_scoped_release guard; {
// Deal with cases that decrease_axes is not empty eager_gil_scoped_release guard;
// For example: // Deal with cases that decrease_axes is not empty
// # x.shape: (2,3,4) // For example:
// out = x[0, 0:2, None] # out.shape : (2, 1, 4) // # x.shape: (2,3,4)
for (auto& axis : none_axes) { // out = x[0, 0:2, None] # out.shape : (2, 1, 4)
int len = 0; for (auto& axis : none_axes) {
for (int da : decrease_axis) { int len = 0;
if (da < axis) { for (int da : decrease_axis) {
len++; if (da < axis) {
} len++;
} }
axis -= len;
} }
new_out = unsqueeze_ad_func(out, none_axes); axis -= len;
} }
return ToPyObject(new_out); new_out = unsqueeze_ad_func(out, none_axes);
} }
return ToPyObject(new_out);
} }
// the index is a list // the index is a list
......
...@@ -63,6 +63,7 @@ limitations under the License. */ ...@@ -63,6 +63,7 @@ limitations under the License. */
#include "paddle/phi/core/compat/arg_map_context.h" #include "paddle/phi/core/compat/arg_map_context.h"
#include "paddle/phi/core/type_defs.h" #include "paddle/phi/core/type_defs.h"
DECLARE_bool(set_to_1d);
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
...@@ -1067,46 +1068,63 @@ void BindImperative(py::module *m_ptr) { ...@@ -1067,46 +1068,63 @@ void BindImperative(py::module *m_ptr) {
} }
tracer->TraceOp(op_type, ins, outs, std::move(attrs)); tracer->TraceOp(op_type, ins, outs, std::move(attrs));
} }
if (!none_axes.empty()) {
// Deal with cases when all axes are decreased. bool set_to_1d = FLAGS_set_to_1d;
// After slice, the shape of out is [1], which should have been
// [], but Paddle doesn't support scalar. if (set_to_1d) {
// In order to ensure the correctness of the final shape of out, // NOTE(zoooo0820): When all axes are decreased, the output
// one dimension of out needs to be decreased. // will be 1-D with FLAGS_set_to_1d=True. In this case, one
// For example: // `None` should be pop out, otherwise the output shape will be
// # x.shape: (2,3,4) // not correct.
// out = x[0, 1, 1, None] # out.shape : (1)
if (static_cast<int>(decrease_axis.size()) == if (static_cast<int>(decrease_axis.size()) ==
tensor->dims().size()) { tensor->dims().size()) {
none_axes.pop_back(); VLOG(0) << "Warning: In Tensor '__getitem__', if the number "
"of scalar "
"elements "
"in the index is equal to the rank of the Tensor, "
"the output "
"should "
"be 0-D. In order to be consistent with the "
"behavior of previous "
"versions, it will be processed to 1-D. But it is "
"not correct and "
"will be "
"removed in release 2.6. "
"If 1-D is still wanted, please modify the index "
"element from "
"scalar to slice "
"(e.g. 'x[i]' => 'x[i:i+1]'). ";
if (!none_axes.empty()) {
none_axes.pop_back();
}
} }
if (!none_axes.empty()) { }
// Deal with cases that decrease_axes is not empty if (!none_axes.empty()) {
// For example: // Deal with cases that decrease_axes is not empty
// # x.shape: (2,3,4) // For example:
// out = x[0, 0:2, None] # out.shape : (2, 1, 4) // # x.shape: (2,3,4)
for (auto &axis : none_axes) { // out = x[0, 0:2, None] # out.shape : (2, 1, 4)
int len = 0; for (auto &axis : none_axes) {
for (int da : decrease_axis) { int len = 0;
if (da < axis) { for (int da : decrease_axis) {
len++; if (da < axis) {
} len++;
} }
axis -= len;
} }
axis -= len;
imperative::NameVarBaseMap ins = {{"X", {out}}};
framework::AttributeMap attrs = {{"axes", none_axes}};
auto new_out = std::shared_ptr<imperative::VarBase>(
new imperative::VarBase(tracer->GenerateUniqueName()));
auto out_xshape = std::shared_ptr<imperative::VarBase>(
new imperative::VarBase(tracer->GenerateUniqueName()));
imperative::NameVarBaseMap outs = {{"Out", {new_out}},
{"XShape", {out_xshape}}};
tracer->TraceOp("unsqueeze2", ins, outs, std::move(attrs));
return new_out;
} }
imperative::NameVarBaseMap ins = {{"X", {out}}};
framework::AttributeMap attrs = {{"axes", none_axes}};
auto new_out = std::shared_ptr<imperative::VarBase>(
new imperative::VarBase(tracer->GenerateUniqueName()));
auto out_xshape = std::shared_ptr<imperative::VarBase>(
new imperative::VarBase(tracer->GenerateUniqueName()));
imperative::NameVarBaseMap outs = {{"Out", {new_out}},
{"XShape", {out_xshape}}};
tracer->TraceOp("unsqueeze2", ins, outs, std::move(attrs));
return new_out;
} }
// the index is a list // the index is a list
......
...@@ -3918,9 +3918,6 @@ void StridedSliceRawInferMeta(const MetaTensor& x, ...@@ -3918,9 +3918,6 @@ void StridedSliceRawInferMeta(const MetaTensor& x,
new_out_shape.push_back(out_dims[i]); new_out_shape.push_back(out_dims[i]);
} }
} }
if (new_out_shape.size() == 0) {
new_out_shape.push_back(1);
}
out_dims = phi::make_ddim(new_out_shape); out_dims = phi::make_ddim(new_out_shape);
} }
VLOG(4) << "out_dims: " << out_dims; VLOG(4) << "out_dims: " << out_dims;
......
...@@ -13,10 +13,13 @@ See the License for the specific language governing permissions and ...@@ -13,10 +13,13 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <glog/logging.h>
#include <paddle/phi/core/ddim.h> #include <paddle/phi/core/ddim.h>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/phi/core/flags.h"
DECLARE_bool(set_to_1d);
namespace phi { namespace phi {
...@@ -202,13 +205,11 @@ inline DDim GetDecreasedDims(const DDim slice_dims, ...@@ -202,13 +205,11 @@ inline DDim GetDecreasedDims(const DDim slice_dims,
new_shape.push_back(decreased_dims[i]); new_shape.push_back(decreased_dims[i]);
} }
} }
if (FLAGS_set_to_1d && new_shape.size() == 0) {
// NOTE(liym27): Paddle does not support that the rank of Tensor is 0, and // NOTE(zoooo0820): Hack procssing to 1-D, when axes decrease to 0-D in
// uses [1] instead. // slice. This will remove in release 2.6.
if (new_shape.size() == 0) {
new_shape.push_back(1); new_shape.push_back(1);
} }
decreased_dims = phi::make_ddim(new_shape); decreased_dims = phi::make_ddim(new_shape);
} }
return decreased_dims; return decreased_dims;
......
...@@ -266,6 +266,11 @@ void SetValueGradImpl(const Context& dev_ctx, ...@@ -266,6 +266,11 @@ void SetValueGradImpl(const Context& dev_ctx,
{fake_value_grad_dims.Get(), fake_value_grad_dims.size()}, {fake_value_grad_dims.Get(), fake_value_grad_dims.size()},
static_cast<T>(0)); static_cast<T>(0));
auto value_grad_dims_vec = phi::vectorize<int64_t>(value_grad_dims); auto value_grad_dims_vec = phi::vectorize<int64_t>(value_grad_dims);
// for value is a 0-D Tensor
if (value_grad_dims.size() == 0) {
value_grad_dims_vec =
phi::vectorize<int64_t>(phi::make_ddim(std::vector<int>({1})));
}
for (auto offset : offsets) { for (auto offset : offsets) {
for (int i = 0; i < out_dims_size; i++) { for (int i = 0; i < out_dims_size; i++) {
slice_end[i] = offset[i] + fake_value_grad_dims[i]; slice_end[i] = offset[i] + fake_value_grad_dims[i];
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle
from ..utils import compute_compatible_dim_mapping, is_dim_shard from ..utils import compute_compatible_dim_mapping, is_dim_shard
from .common import ( from .common import (
DistributedOperatorImpl, DistributedOperatorImpl,
...@@ -70,9 +72,14 @@ class DistributedSliceImpl(DistributedOperatorImpl): ...@@ -70,9 +72,14 @@ class DistributedSliceImpl(DistributedOperatorImpl):
if i not in decrease_axis: if i not in decrease_axis:
ref_indices.append(i) ref_indices.append(i)
if ref_indices == []: if ref_indices == []:
assert len(out_dims_mapping) == 1 # NOTE(zoooo0820): When all axes are decreased, the output will be 1-D
if is_dim_shard(out_dims_mapping[0]): # with FLAGS_set_to_1d=True.
return False if paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']:
assert len(out_dims_mapping) == 1
if is_dim_shard(out_dims_mapping[0]):
return False
else:
assert len(out_dims_mapping) == 0
else: else:
for i in range(len(out_dims_mapping)): for i in range(len(out_dims_mapping)):
ref_index = ref_indices[i] ref_index = ref_indices[i]
...@@ -142,9 +149,12 @@ class DistributedSliceImpl(DistributedOperatorImpl): ...@@ -142,9 +149,12 @@ class DistributedSliceImpl(DistributedOperatorImpl):
ref_indices.append(i) ref_indices.append(i)
if ref_dims_mapping == []: if ref_dims_mapping == []:
ref_dims_mapping = [-1] # NOTE(zoooo0820): When all axes are decreased, the output will be 1-D
# with FLAGS_set_to_1d=True.
if paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']:
ref_dims_mapping = [-1]
assert ref_dims_mapping[0] == out_dims_mapping[0]
assert len(ref_dims_mapping) == len(out_dims_mapping) assert len(ref_dims_mapping) == len(out_dims_mapping)
assert ref_dims_mapping[0] == out_dims_mapping[0]
changed = False changed = False
else: else:
assert len(ref_dims_mapping) == len(out_dims_mapping) assert len(ref_dims_mapping) == len(out_dims_mapping)
......
...@@ -1371,7 +1371,7 @@ def fftshift(x, axes=None, name=None): ...@@ -1371,7 +1371,7 @@ def fftshift(x, axes=None, name=None):
elif isinstance(axes, int): elif isinstance(axes, int):
shifts = shape[axes] // 2 shifts = shape[axes] // 2
else: else:
shifts = paddle.concat([shape[ax] // 2 for ax in axes]) shifts = paddle.concat([shape[ax : ax + 1] // 2 for ax in axes])
return paddle.roll(x, shifts, axes, name=name) return paddle.roll(x, shifts, axes, name=name)
...@@ -1416,7 +1416,7 @@ def ifftshift(x, axes=None, name=None): ...@@ -1416,7 +1416,7 @@ def ifftshift(x, axes=None, name=None):
elif isinstance(axes, int): elif isinstance(axes, int):
shifts = -shape[axes] // 2 shifts = -shape[axes] // 2
else: else:
shifts = paddle.concat([-shape[ax] // 2 for ax in axes]) shifts = paddle.concat([-shape[ax : ax + 1] // 2 for ax in axes])
return paddle.roll(x, shifts, axes, name=name) return paddle.roll(x, shifts, axes, name=name)
......
...@@ -43,7 +43,7 @@ class TestImperativeNumpyBridge(unittest.TestCase): ...@@ -43,7 +43,7 @@ class TestImperativeNumpyBridge(unittest.TestCase):
np.testing.assert_array_equal(var2.numpy(), data_np) np.testing.assert_array_equal(var2.numpy(), data_np)
data_np[0][0] = -1 data_np[0][0] = -1
self.assertEqual(data_np[0][0], -1) self.assertEqual(data_np[0][0], -1)
self.assertNotEqual(var2[0][0].numpy()[0], -1) self.assertNotEqual(var2[0][0].numpy(), -1)
self.assertFalse(np.array_equal(var2.numpy(), data_np)) self.assertFalse(np.array_equal(var2.numpy(), data_np))
......
...@@ -140,16 +140,16 @@ class TestKthvalueOpWithNaN(unittest.TestCase): ...@@ -140,16 +140,16 @@ class TestKthvalueOpWithNaN(unittest.TestCase):
nan_position = 100 nan_position = 100
self.x[0, nan_position, 2] = float('nan') self.x[0, nan_position, 2] = float('nan')
v, inds = self.x.kthvalue(k=200, axis=1) v, inds = self.x.kthvalue(k=200, axis=1)
self.assertTrue(np.isnan(v[0, 2].numpy()[0])) self.assertTrue(np.isnan(v[0, 2].numpy()))
self.assertEqual(inds[0, 2].numpy()[0], nan_position) self.assertEqual(inds[0, 2].numpy(), nan_position)
def test_nan_in_gpu_kernel(): def test_nan_in_gpu_kernel():
paddle.set_device('gpu') paddle.set_device('gpu')
nan_position = 100 nan_position = 100
self.x[0, nan_position, 2] = float('nan') self.x[0, nan_position, 2] = float('nan')
v, inds = self.x.kthvalue(k=200, axis=1) v, inds = self.x.kthvalue(k=200, axis=1)
self.assertTrue(np.isnan(v[0, 2].numpy()[0])) self.assertTrue(np.isnan(v[0, 2].numpy()))
self.assertEqual(inds[0, 2].numpy()[0], nan_position) self.assertEqual(inds[0, 2].numpy(), nan_position)
test_nan_in_cpu_kernel() test_nan_in_cpu_kernel()
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
......
...@@ -1590,7 +1590,7 @@ class TestSetValueInplace(unittest.TestCase): ...@@ -1590,7 +1590,7 @@ class TestSetValueInplace(unittest.TestCase):
a.stop_gradient = False a.stop_gradient = False
b = a[:] b = a[:]
c = b c = b
b[paddle.to_tensor(0)] = 1.0 b[paddle.zeros([], dtype='int32')] = 1.0
self.assertTrue(id(b) == id(c)) self.assertTrue(id(b) == id(c))
np.testing.assert_array_equal(b.numpy(), c.numpy()) np.testing.assert_array_equal(b.numpy(), c.numpy())
......
...@@ -541,8 +541,8 @@ class TestSliceAPI(unittest.TestCase): ...@@ -541,8 +541,8 @@ class TestSliceAPI(unittest.TestCase):
def test_1(self): def test_1(self):
with paddle_static_guard(): with paddle_static_guard():
input = np.random.random([3, 4, 5, 6]).astype("float64") input = np.random.random([3, 4, 5, 6]).astype("float64")
minus_1 = paddle.tensor.fill_constant([1], "int32", -1) minus_1 = paddle.tensor.fill_constant([], "int32", -1)
minus_3 = paddle.tensor.fill_constant([1], "int64", -3) minus_3 = paddle.tensor.fill_constant([], "int64", -3)
starts = paddle.static.data( starts = paddle.static.data(
name='starts', shape=[1, 3], dtype="float32" name='starts', shape=[1, 3], dtype="float32"
) )
......
...@@ -604,8 +604,7 @@ class TestVarBase(unittest.TestCase): ...@@ -604,8 +604,7 @@ class TestVarBase(unittest.TestCase):
nw = w[1, 1, 1] nw = w[1, 1, 1]
self.assertEqual(len(nw.shape), 1) self.assertEqual(len(nw.shape), 0)
self.assertEqual(nw.shape[0], 1)
nw = w[:, :, :-1] nw = w[:, :, :-1]
self.assertEqual((784, 100, 99), tuple(nw.shape)) self.assertEqual((784, 100, 99), tuple(nw.shape))
...@@ -705,10 +704,10 @@ class TestVarBase(unittest.TestCase): ...@@ -705,10 +704,10 @@ class TestVarBase(unittest.TestCase):
var = paddle.to_tensor(tensor_array) var = paddle.to_tensor(tensor_array)
one = paddle.ones(shape=[1], dtype="int32") one = paddle.ones(shape=[], dtype="int32")
two = paddle.full(shape=[1], fill_value=2, dtype="int32") two = paddle.full(shape=[], fill_value=2, dtype="int32")
negative_one = paddle.full(shape=[1], fill_value=-1, dtype="int32") negative_one = paddle.full(shape=[], fill_value=-1, dtype="int32")
four = paddle.full(shape=[1], fill_value=4, dtype="int32") four = paddle.full(shape=[], fill_value=4, dtype="int32")
var = fluid.dygraph.to_variable(tensor_array) var = fluid.dygraph.to_variable(tensor_array)
var1 = var[0, one, one] var1 = var[0, one, one]
......
...@@ -132,8 +132,7 @@ class TestVariable(unittest.TestCase): ...@@ -132,8 +132,7 @@ class TestVariable(unittest.TestCase):
nw = w[1, 1, 1] nw = w[1, 1, 1]
self.assertEqual(len(nw.shape), 1) self.assertEqual(len(nw.shape), 0)
self.assertEqual(nw.shape[0], 1)
nw = w[:, :, :-1] nw = w[:, :, :-1]
self.assertEqual((784, 100, 99), nw.shape) self.assertEqual((784, 100, 99), nw.shape)
......
...@@ -192,9 +192,9 @@ class TestOutputsMustExistsInputs(unittest.TestCase): ...@@ -192,9 +192,9 @@ class TestOutputsMustExistsInputs(unittest.TestCase):
with fluid.program_guard(main_program, startup_program): with fluid.program_guard(main_program, startup_program):
def func(x): def func(x):
s = paddle.zeros([1]) s = paddle.zeros([])
i = paddle.ones([1]) i = paddle.ones([])
max_len = paddle.shape(x)[0] max_len = paddle.shape(x)
def cond(i, s, x): def cond(i, s, x):
return i < max_len return i < max_len
......
...@@ -666,6 +666,140 @@ class TestSundryAPI(unittest.TestCase): ...@@ -666,6 +666,140 @@ class TestSundryAPI(unittest.TestCase):
self.assertEqual(zero_dim_var.shape, []) self.assertEqual(zero_dim_var.shape, [])
self.assertEqual(zero_dim_var.item(), 0.5) self.assertEqual(zero_dim_var.item(), 0.5)
def test_getitem(self):
# case1: When all axis have a scalar indice, output should be a 0-d Tensor;
x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
x.stop_gradient = False
out = x[1, 2, 3, 4]
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
np.testing.assert_allclose(out, np.array(119))
self.assertEqual(out.grad.shape, [])
np.testing.assert_allclose(out.grad, 1.0)
self.assertEqual(x.grad.shape, [2, 3, 4, 5])
x_grad_expected = np.zeros((2, 3, 4, 5))
x_grad_expected[1, 2, 3, 4] = 1.0
np.testing.assert_allclose(x.grad, x_grad_expected)
# case2: When one axis has a 0-d Tensor indice, the output should be same as int indice.
x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
out1 = x[1, 2]
out2 = x[
paddle.full([], 1, dtype='int32'), paddle.full([], 2, dtype='int32')
]
np.testing.assert_allclose(out1, out2)
# case3: When all axis have a scalar indice (i.e. case1) and has None indice,
# ndim of output should be same with numbers of None.
x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
out1 = x[1, 2, None, 3, 4]
self.assertEqual(out1.shape, [1])
np.testing.assert_allclose(out1, np.array([119]))
out2 = x[1, None, 2, None, 3, 4]
self.assertEqual(out2.shape, [1, 1])
np.testing.assert_allclose(out2, np.array([[119]]))
# case4: 1-D Tensor will be treated as vector, no axis decrease will happen.
x = paddle.ones((2, 3, 4))
indice = paddle.ones([1], dtype='int32')
out1 = x[indice]
self.assertEqual(out1.shape, [1, 3, 4])
np.testing.assert_allclose(out1, np.ones((1, 3, 4)))
out2 = x[indice, indice]
self.assertEqual(out2.shape, [1, 4])
np.testing.assert_allclose(out2, np.ones((1, 4)))
def test_setitem(self):
# case1: all axis have a scalar indice
x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
x.stop_gradient = False
out = x * 2
out[1, 2, 3, 4] = 10
out.backward()
self.assertEqual(out.shape, x.shape)
np.testing.assert_allclose(out[1, 2, 3, 4], np.array(10))
self.assertEqual(x.grad.shape, [2, 3, 4, 5])
x_grad_expected = np.ones((2, 3, 4, 5)) * 2
x_grad_expected[1, 2, 3, 4] = 0
np.testing.assert_allclose(x.grad, x_grad_expected)
# case2: 0-D Tensor indice in some axis
# NOTE(zoooo0820): Now, int/slice with 0-D Tensor will still be
# treated as combined indexing, which is not support backward.
# There should have more test cases such as out[1, indice, :] = 0.5 when this
# problem is fixed.
x = paddle.randn((2, 3, 4, 5))
x.stop_gradient = False
indice = paddle.full([], 1, dtype='int32')
out = x * 1
out[indice, indice] = 0.5
out.backward()
self.assertEqual(out.shape, x.shape)
np.testing.assert_allclose(out[1, 1], np.ones((4, 5)) * 0.5)
x_grad_expected = np.ones((2, 3, 4, 5))
x_grad_expected[1, 1] = 0
np.testing.assert_allclose(x.grad, x_grad_expected)
# case3:0-D Tensor indice in some axis, value is a Tensor
# and there is broadcast
x = paddle.randn((2, 3, 4, 5))
x.stop_gradient = False
v = paddle.ones((4, 5), dtype='float32') * 5
v.stop_gradient = False
indice = paddle.full([], 1, dtype='int32')
out = x * 1
out[indice] = v
out.backward()
self.assertEqual(out.shape, x.shape)
np.testing.assert_allclose(out[1], np.ones((3, 4, 5)) * 5)
x_grad_expected = np.ones((2, 3, 4, 5))
x_grad_expected[1] = 0
np.testing.assert_allclose(x.grad, x_grad_expected)
value_grad_expected = np.ones((4, 5)) * 3
np.testing.assert_allclose(v.grad, value_grad_expected)
# case4: value is a 0-D tensor and there is broadcast
x = paddle.randn((2, 3, 4, 5))
x.stop_gradient = False
v = paddle.ones([], dtype='float32') * 5
v.stop_gradient = False
out = x * 1
indice = paddle.full([], 0, dtype='int32')
out[indice] = v
out.backward()
self.assertEqual(out.shape, x.shape)
self.assertEqual(v.grad.shape, [])
np.testing.assert_allclose(out[0], np.ones((3, 4, 5)) * 5)
x_grad_expected = np.ones((2, 3, 4, 5))
x_grad_expected[0] = 0
np.testing.assert_allclose(x.grad, x_grad_expected)
value_grad_expected = np.ones(()) * 3 * 4 * 5
np.testing.assert_allclose(v.grad, value_grad_expected)
# case5: indice / value is 0-D Tensor, and there is no broadcast
x = paddle.randn((2, 3, 4, 5))
x.stop_gradient = False
v = paddle.ones([], dtype='float32') * 2
v.stop_gradient = False
out = x * 1
indice = paddle.full([], 0, dtype='int32')
out[indice, indice, indice, indice] = v
out.backward()
self.assertEqual(out.shape, x.shape)
self.assertEqual(v.grad.shape, [])
np.testing.assert_allclose(out[0, 0, 0, 0], np.ones(()) * 2)
x_grad_expected = np.ones((2, 3, 4, 5))
x_grad_expected[0, 0, 0, 0] = 0
np.testing.assert_allclose(x.grad, x_grad_expected)
value_grad_expected = np.ones(())
np.testing.assert_allclose(v.grad, value_grad_expected)
def test_expand(self): def test_expand(self):
# case1 # case1
x = paddle.full([], 1, 'float32') x = paddle.full([], 1, 'float32')
...@@ -2456,6 +2590,118 @@ class TestSundryAPIStatic(unittest.TestCase): ...@@ -2456,6 +2590,118 @@ class TestSundryAPIStatic(unittest.TestCase):
self.assertEqual(res[0].shape, ()) self.assertEqual(res[0].shape, ())
self.assertEqual(res[0], 0.5) self.assertEqual(res[0], 0.5)
@prog_scope()
def test_getitem(self):
# case1: When all axis have a scalar indice, output should be a 0-d Tensor;
x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
x.stop_gradient = False
out = x[1, 2, 3, 4]
paddle.static.append_backward(out.sum())
prog = paddle.static.default_main_program()
res = self.exe.run(prog, fetch_list=[out, x.grad_name, out.grad_name])
self.assertEqual(res[0].shape, ())
np.testing.assert_allclose(res[0], np.array(119))
self.assertEqual(res[2].shape, ())
np.testing.assert_allclose(res[2], 1.0)
self.assertEqual(res[1].shape, (2, 3, 4, 5))
x_grad_expected = np.zeros((2, 3, 4, 5))
x_grad_expected[1, 2, 3, 4] = 1.0
np.testing.assert_allclose(res[1], x_grad_expected)
# case2: When one axis has a 0-d Tensor indice, the output should be same as int indice.
x2 = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
out1 = x2[1, 2]
out2 = x2[
paddle.full([], 1, dtype='int32'), paddle.full([], 2, dtype='int32')
]
res = self.exe.run(prog, fetch_list=[out1, out2])
np.testing.assert_allclose(res[0], res[1])
# case3: When all axis have a scalar indice (i.e. case1) and has None indice,
# ndim of output should be same with numbers of None.
x3 = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
out3 = x3[1, 2, None, 3, 4]
out4 = x3[1, None, 2, None, 3, 4]
res = self.exe.run(prog, fetch_list=[out3, out4])
self.assertEqual(res[0].shape, (1,))
np.testing.assert_allclose(res[0], np.array([119]))
self.assertEqual(res[1].shape, (1, 1))
np.testing.assert_allclose(res[1], np.array([[119]]))
# case4: 1-D Tensor will be treated as vector, no axis decrease will happen.
x4 = paddle.ones((2, 3, 4))
indice = paddle.ones([1], dtype='int32')
out5 = x4[indice]
out6 = x4[indice, indice]
res = self.exe.run(prog, fetch_list=[out5, out6])
self.assertEqual(res[0].shape, (1, 3, 4))
np.testing.assert_allclose(res[0], np.ones((1, 3, 4)))
self.assertEqual(res[1].shape, (1, 4))
np.testing.assert_allclose(res[1], np.ones((1, 4)))
@prog_scope()
def test_setitem(self):
# NOTE(zoooo0820): __setitem__ has gradient problem in static graph.
# To solve this, we may not support __setitem__ in static graph.
# These unit tests will delete soon.
# case1: all axis have a scalar indice
x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
x.stop_gradient = False
out = x * 2
out[1, 2, 3, 4] = 10
paddle.static.append_backward(out.sum())
prog = paddle.static.default_main_program()
res = self.exe.run(prog, fetch_list=[out, x.grad_name])
self.assertEqual(out.shape, x.shape)
np.testing.assert_allclose(res[0][1, 2, 3, 4], np.array(10))
self.assertEqual(res[1].shape, (2, 3, 4, 5))
x_grad_expected = np.ones((2, 3, 4, 5)) * 2
x_grad_expected[1, 2, 3, 4] = 0
np.testing.assert_allclose(res[1], x_grad_expected)
# case2: 0-D Tensor indice in some axis
# NOTE(zoooo0820): Now, int/slice with 0-D Tensor will still be
# treated as combined indexing, which is not support backward.
# There should have more test cases such as out[1, indice, :] = 0.5 when this
# problem is fixed.
x = paddle.randn((2, 3, 4, 5))
x.stop_gradient = False
indice = paddle.full([], 1, dtype='int32')
out = x * 1
out[indice, indice] = 0.5
paddle.static.append_backward(out.sum())
prog = paddle.static.default_main_program()
res = self.exe.run(prog, fetch_list=[out, x.grad_name])
self.assertEqual(out.shape, x.shape)
np.testing.assert_allclose(res[0][1, 1], np.ones((4, 5)) * 0.5)
x_grad_expected = np.ones((2, 3, 4, 5))
x_grad_expected[1, 1] = 0
np.testing.assert_allclose(res[1], x_grad_expected)
# case3:0-D Tensor indice in some axis, value is a Tensor
# and there is broadcast
x = paddle.randn((2, 3, 4, 5))
x.stop_gradient = False
v = paddle.ones((4, 5), dtype='float32') * 5
v.stop_gradient = False
indice = paddle.full([], 1, dtype='int32')
out = x * 1
out[indice] = v
paddle.static.append_backward(out.sum())
prog = paddle.static.default_main_program()
res = self.exe.run(prog, fetch_list=[out, x.grad_name, v.grad_name])
self.assertEqual(out.shape, x.shape)
np.testing.assert_allclose(res[0][1], np.ones((3, 4, 5)) * 5)
x_grad_expected = np.ones((2, 3, 4, 5))
x_grad_expected[1] = 0
np.testing.assert_allclose(res[1], x_grad_expected)
@prog_scope() @prog_scope()
def test_expand(self): def test_expand(self):
x = paddle.full([], 1, 'float32') x = paddle.full([], 1, 'float32')
......
...@@ -17,6 +17,7 @@ import numpy as np ...@@ -17,6 +17,7 @@ import numpy as np
from . import unique_name from . import unique_name
from . import core from . import core
import paddle import paddle
import warnings
MAX_INTEGER = 2**31 - 1 MAX_INTEGER = 2**31 - 1
...@@ -185,7 +186,8 @@ class SliceInfo: ...@@ -185,7 +186,8 @@ class SliceInfo:
for i in range(len(gather_tensor_shape)): for i in range(len(gather_tensor_shape)):
if not ( if not (
value_dims_bd[i] == gather_tensor_shape[i] len(value_dims_bd) == 0
or value_dims_bd[i] == gather_tensor_shape[i]
or value_dims_bd[i] == 1 or value_dims_bd[i] == 1
): ):
raise ValueError( raise ValueError(
...@@ -282,7 +284,16 @@ def is_integer_or_scalar_tensor(ele): ...@@ -282,7 +284,16 @@ def is_integer_or_scalar_tensor(ele):
if isinstance(ele, int): if isinstance(ele, int):
return True return True
elif isinstance(ele, Variable): elif isinstance(ele, Variable):
if len(ele.shape) == 1 and ele.shape[0] == 1: # NOTE(zoooo0820): For compatibility, if FLAGS_set_to_1d is set to True,
# 1-D tensor is still treated as a scalar, which means basic indexing.
# This will be removed in future.
if paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']:
if len(ele.shape) == 1 and ele.shape[0] == 1:
warnings.warn(
"1-D Tensor will be treat as advanced indexing in future version. Currently, 1-D Tensor means a scalar, not vector, and please modify it to 0-D Tensor. If advanced indexing is needed, please use `export FLAGS_set_to_1d=False` to set the flag."
)
return True
if len(ele.shape) == 0:
return True return True
return False return False
...@@ -573,13 +584,14 @@ def _getitem_impl_(var, item): ...@@ -573,13 +584,14 @@ def _getitem_impl_(var, item):
out = reverse(out, axis=reverse_axes) out = reverse(out, axis=reverse_axes)
# Deal with cases when all axes are decreased. # NOTE(zoooo0820): When all axes are decreased, the output will be 1-D
# After slice, the shape of out is [1], which should have been [], but Paddle doesn't support scalar. # with FLAGS_set_to_1d=True. In this case, one `None` should be pop out,
# In order to ensure the correctness of the final shape of out, one dimension of out needs to be decreased. # otherwise the output shape will be not correct.
# For example: set_to_1d = paddle.get_flags('FLAGS_set_to_1d')['FLAGS_set_to_1d']
# # x.shape: (2,3,4) if set_to_1d and len(decrease_axes) == len(var.shape):
# out = x[0, 1, 1, None] # out.shape : (1) warnings.warn(
if len(decrease_axes) == len(var.shape): "Warning: In Tensor '__getitem__', if the number of scalar elements in the index is equal to the rank of the Tensor, the output should be 0-D. In order to be consistent with the behavior of previous versions, it will be processed to 1-D. But it is not correct and will be removed in release 2.6. If 1-D is still wanted, please modify the index element from scalar to slice (e.g. 'x[i]' => 'x[i:i+1]')."
)
none_axes = none_axes[1:] none_axes = none_axes[1:]
if len(none_axes) > 0: if len(none_axes) > 0:
...@@ -592,13 +604,6 @@ def _getitem_impl_(var, item): ...@@ -592,13 +604,6 @@ def _getitem_impl_(var, item):
new_axis = axis - l new_axis = axis - l
none_axes[idx] = new_axis none_axes[idx] = new_axis
# Deal with cases when all axes are decreased.
# After slice, the shape of out is [1], which should have been [], but Paddle doesn't support scalar.
# In order to ensure the correctness of the final shape of out, one dimension of out needs to be decreased.
# For example:
# # x.shape: (2,3,4)
# out = x[0, 1, 1, None] # out.shape : (1)
from ..tensor import unsqueeze from ..tensor import unsqueeze
out = unsqueeze(out, axis=none_axes) out = unsqueeze(out, axis=none_axes)
......
...@@ -125,9 +125,7 @@ def minimize_lbfgs( ...@@ -125,9 +125,7 @@ def minimize_lbfgs(
is_converge = paddle.full(shape=[1], fill_value=False, dtype='bool') is_converge = paddle.full(shape=[1], fill_value=False, dtype='bool')
num_func_calls = paddle.full(shape=[1], fill_value=1, dtype='int64') num_func_calls = paddle.full(shape=[1], fill_value=1, dtype='int64')
history_size = paddle.full( history_size = paddle.full(shape=[], fill_value=history_size, dtype='int64')
shape=[1], fill_value=history_size, dtype='int64'
)
head = paddle.full(shape=[1], fill_value=1, dtype='int64') head = paddle.full(shape=[1], fill_value=1, dtype='int64')
tail = paddle.full(shape=[1], fill_value=0, dtype='int64') tail = paddle.full(shape=[1], fill_value=0, dtype='int64')
...@@ -177,7 +175,7 @@ def minimize_lbfgs( ...@@ -177,7 +175,7 @@ def minimize_lbfgs(
q = paddle.assign(g1) q = paddle.assign(g1)
# In a array circle, the index may out of range, so must use mod. # In a array circle, the index may out of range, so must use mod.
i = paddle.full( i = paddle.full(
shape=[1], fill_value=(head - 1).mod(history_size), dtype='int64' shape=[], fill_value=(head - 1).mod(history_size), dtype='int64'
) )
def cond(i, q): def cond(i, q):
...@@ -193,7 +191,7 @@ def minimize_lbfgs( ...@@ -193,7 +191,7 @@ def minimize_lbfgs(
r = paddle.matmul(H0, q) r = paddle.matmul(H0, q)
i = paddle.full(shape=[1], fill_value=tail + 1, dtype='int64') i = paddle.full(shape=[], fill_value=tail + 1, dtype='int64')
def cond(i, r): def cond(i, r):
return i != head return i != head
......
...@@ -51,11 +51,11 @@ def to_static_variable(x): ...@@ -51,11 +51,11 @@ def to_static_variable(x):
Translate a Python Tensor to PaddlePaddle static graph Tensor Translate a Python Tensor to PaddlePaddle static graph Tensor
''' '''
if isinstance(x, bool): if isinstance(x, bool):
return paddle.full(shape=[1], dtype='bool', fill_value=x) return paddle.full(shape=[], dtype='bool', fill_value=x)
if isinstance(x, float): if isinstance(x, float):
return paddle.full(shape=[1], dtype='float64', fill_value=x) return paddle.full(shape=[], dtype='float64', fill_value=x)
if isinstance(x, int): if isinstance(x, int):
return paddle.full(shape=[1], dtype='int64', fill_value=x) return paddle.full(shape=[], dtype='int64', fill_value=x)
if isinstance(x, UndefinedVar) or x is None: if isinstance(x, UndefinedVar) or x is None:
""" """
for early return case, we need a variable to represent None, current we use data_layer_not_check. for early return case, we need a variable to represent None, current we use data_layer_not_check.
......
...@@ -271,7 +271,7 @@ def _rnn_static_graph( ...@@ -271,7 +271,7 @@ def _rnn_static_graph(
mask = paddle.reverse(mask, axis=[0]) if sequence_length else None mask = paddle.reverse(mask, axis=[0]) if sequence_length else None
with paddle.fluid.framework.device_guard("cpu"): with paddle.fluid.framework.device_guard("cpu"):
start_i = paddle.zeros([1], dtype="int64") start_i = paddle.zeros([], dtype="int64")
end = max_seq_len end = max_seq_len
end = paddle.cast(end, "int64") end = paddle.cast(end, "int64")
......
...@@ -3169,19 +3169,19 @@ def tile(x, repeat_times, name=None): ...@@ -3169,19 +3169,19 @@ def tile(x, repeat_times, name=None):
) )
if isinstance(repeat_times, Variable): if isinstance(repeat_times, Variable):
assert ( assert (
len(repeat_times.shape) == 1 repeat_times.numel() == 1
), 'repeat_times must be an 1-D Tensor.' ), 'repeat_times must be a Tensor with one element.'
else: else:
for elem in repeat_times: for elem in repeat_times:
if isinstance(elem, Variable): if isinstance(elem, Variable):
assert ( assert (
len(elem.shape) == 1 elem.numel() == 1
), 'Elements in repeat_times must be 1-D Tensors or integers.' ), 'Elements in repeat_times must be Tensor with one element or integers.'
else: else:
type_tuple = (int, np.int32, np.int64) type_tuple = (int, np.int32, np.int64)
assert isinstance( assert isinstance(
elem, type_tuple elem, type_tuple
), 'Elements in repeat_times must be 1-D Tensors or integers.' ), 'Elements in repeat_times must be Tensor with one element or integers.'
check_variable_and_dtype( check_variable_and_dtype(
x, x,
...@@ -3425,18 +3425,18 @@ def expand(x, shape, name=None): ...@@ -3425,18 +3425,18 @@ def expand(x, shape, name=None):
return _C_ops.expand(x, shape) return _C_ops.expand(x, shape)
else: else:
if isinstance(shape, Variable): if isinstance(shape, Variable):
assert len(shape.shape) == 1, 'shape must be an 1-D Tensor.' assert shape.numel() == 1, 'shape must be a Tensor with one element'
else: else:
for elem in shape: for elem in shape:
if isinstance(elem, Variable): if isinstance(elem, Variable):
assert ( assert (
len(elem.shape) == 1 elem.numel() == 1
), 'Elements in shape must be 1-D Tensors or integers.' ), 'Elements in shape must be Tensor with one element or integers.'
else: else:
type_tuple = (int, np.int32, np.int64) type_tuple = (int, np.int32, np.int64)
assert isinstance( assert isinstance(
elem, type_tuple elem, type_tuple
), 'Elements in shape must be 1-D Tensors or integers.' ), 'Elements in shape must be Tensor with one element or integers.'
check_variable_and_dtype( check_variable_and_dtype(
x, x,
......
...@@ -364,7 +364,7 @@ class TestListWithCondGradInferVarType(unittest.TestCase): ...@@ -364,7 +364,7 @@ class TestListWithCondGradInferVarType(unittest.TestCase):
x = paddle.to_tensor([2, 3, 4], dtype='float32') x = paddle.to_tensor([2, 3, 4], dtype='float32')
index = paddle.to_tensor([1]) index = paddle.to_tensor([1])
res = net(x, index) res = net(x, index)
self.assertEqual(res[0], 48.0) self.assertEqual(res, 48.0)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -1432,7 +1432,7 @@ class XPUTestSetValueOp(XPUOpTestWrapper): ...@@ -1432,7 +1432,7 @@ class XPUTestSetValueOp(XPUOpTestWrapper):
a.stop_gradient = False a.stop_gradient = False
b = a[:] b = a[:]
c = b c = b
b[paddle.to_tensor(0)] = 1.0 b[paddle.zeros([], dtype='int32')] = 1.0
self.assertTrue(id(b) == id(c)) self.assertTrue(id(b) == id(c))
np.testing.assert_array_equal(b.numpy(), c.numpy()) np.testing.assert_array_equal(b.numpy(), c.numpy())
......
...@@ -166,7 +166,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper): ...@@ -166,7 +166,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper):
self.starts = [0, 1, 2, 3] self.starts = [0, 1, 2, 3]
self.ends = [1, 2, 3, 4] self.ends = [1, 2, 3, 4]
self.axes = [0, 1, 2, 3] self.axes = [0, 1, 2, 3]
self.decrease_axis = [0, 1, 2, 3] self.decrease_axis = [0, 1, 2]
self.infer_flags = [1, 1, 1] self.infer_flags = [1, 1, 1]
self.out = self.input[0, 1, 2, 3:4] self.out = self.input[0, 1, 2, 3:4]
...@@ -188,7 +188,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper): ...@@ -188,7 +188,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper):
self.axes = [0, 1, 2, 3] self.axes = [0, 1, 2, 3]
self.decrease_axis = [0, 1, 2, 3] self.decrease_axis = [0, 1, 2, 3]
self.infer_flags = [1, 1, 1] self.infer_flags = [1, 1, 1]
self.out = self.input[0, 1, 2, 3:4] self.out = self.input[0, 1, 2, 3]
support_types = get_xpu_op_support_types('slice') support_types = get_xpu_op_support_types('slice')
......
...@@ -344,6 +344,140 @@ class TestSundryAPI(unittest.TestCase): ...@@ -344,6 +344,140 @@ class TestSundryAPI(unittest.TestCase):
paddle.disable_static() paddle.disable_static()
self.x = paddle.rand([]) self.x = paddle.rand([])
def test_getitem(self):
# case1: When all axis have a scalar indice, output should be a 0-d Tensor;
x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
x.stop_gradient = False
out = x[1, 2, 3, 4]
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
np.testing.assert_allclose(out, np.array(119))
self.assertEqual(out.grad.shape, [])
np.testing.assert_allclose(out.grad, 1.0)
self.assertEqual(x.grad.shape, [2, 3, 4, 5])
x_grad_expected = np.zeros((2, 3, 4, 5))
x_grad_expected[1, 2, 3, 4] = 1.0
np.testing.assert_allclose(x.grad, x_grad_expected)
# case2: When one axis has a 0-d Tensor indice, the output should be same as int indice.
x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
out1 = x[1, 2]
out2 = x[
paddle.full([], 1, dtype='int32'), paddle.full([], 2, dtype='int32')
]
np.testing.assert_allclose(out1, out2)
# case3: When all axis have a scalar indice (i.e. case1) and has None indice,
# ndim of output should be same with numbers of None.
x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
out1 = x[1, 2, None, 3, 4]
self.assertEqual(out1.shape, [1])
np.testing.assert_allclose(out1, np.array([119]))
out2 = x[1, None, 2, None, 3, 4]
self.assertEqual(out2.shape, [1, 1])
np.testing.assert_allclose(out2, np.array([[119]]))
# case4: 1-D Tensor will be treated as vector, no axis decrease will happen.
x = paddle.ones((2, 3, 4))
indice = paddle.ones([1], dtype='int32')
out1 = x[indice]
self.assertEqual(out1.shape, [1, 3, 4])
np.testing.assert_allclose(out1, np.ones((1, 3, 4)))
out2 = x[indice, indice]
self.assertEqual(out2.shape, [1, 4])
np.testing.assert_allclose(out2, np.ones((1, 4)))
def test_setitem(self):
# case1: all axis have a scalar indice
x = paddle.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))
x.stop_gradient = False
out = x * 2
out[1, 2, 3, 4] = 10
out.backward()
self.assertEqual(out.shape, x.shape)
np.testing.assert_allclose(out[1, 2, 3, 4], np.array(10))
self.assertEqual(x.grad.shape, [2, 3, 4, 5])
x_grad_expected = np.ones((2, 3, 4, 5)) * 2
x_grad_expected[1, 2, 3, 4] = 0
np.testing.assert_allclose(x.grad, x_grad_expected)
# case2: 0-D Tensor indice in some axis
# NOTE(zoooo0820): Now, int/slice with 0-D Tensor will still be
# treated as combined indexing, which is not support backward.
# There should have more test cases such as out[1, indice, :] = 0.5 when this
# problem is fixed.
x = paddle.randn((2, 3, 4, 5))
x.stop_gradient = False
indice = paddle.full([], 1, dtype='int32')
out = x * 1
out[indice, indice] = 0.5
out.backward()
self.assertEqual(out.shape, x.shape)
np.testing.assert_allclose(out[1, 1], np.ones((4, 5)) * 0.5)
x_grad_expected = np.ones((2, 3, 4, 5))
x_grad_expected[1, 1] = 0
np.testing.assert_allclose(x.grad, x_grad_expected)
# case3:0-D Tensor indice in some axis, value is a Tensor
# and there is broadcast
x = paddle.randn((2, 3, 4, 5))
x.stop_gradient = False
v = paddle.ones((4, 5), dtype='float32') * 5
v.stop_gradient = False
indice = paddle.full([], 1, dtype='int32')
out = x * 1
out[indice] = v
out.backward()
self.assertEqual(out.shape, x.shape)
np.testing.assert_allclose(out[1], np.ones((3, 4, 5)) * 5)
x_grad_expected = np.ones((2, 3, 4, 5))
x_grad_expected[1] = 0
np.testing.assert_allclose(x.grad, x_grad_expected)
value_grad_expected = np.ones((4, 5)) * 3
np.testing.assert_allclose(v.grad, value_grad_expected)
# case4: value is a 0-D tensor and there is broadcast
x = paddle.randn((2, 3, 4, 5))
x.stop_gradient = False
v = paddle.ones([], dtype='float32') * 5
v.stop_gradient = False
out = x * 1
indice = paddle.full([], 0, dtype='int32')
out[indice] = v
out.backward()
self.assertEqual(out.shape, x.shape)
self.assertEqual(v.grad.shape, [])
np.testing.assert_allclose(out[0], np.ones((3, 4, 5)) * 5)
x_grad_expected = np.ones((2, 3, 4, 5))
x_grad_expected[0] = 0
np.testing.assert_allclose(x.grad, x_grad_expected)
value_grad_expected = np.ones(()) * 3 * 4 * 5
np.testing.assert_allclose(v.grad, value_grad_expected)
# case5: indice / value is 0-D Tensor, and there is no broadcast
x = paddle.randn((2, 3, 4, 5))
x.stop_gradient = False
v = paddle.ones([], dtype='float32') * 2
v.stop_gradient = False
out = x * 1
indice = paddle.full([], 0, dtype='int32')
out[indice, indice, indice, indice] = v
out.backward()
self.assertEqual(out.shape, x.shape)
self.assertEqual(v.grad.shape, [])
np.testing.assert_allclose(out[0, 0, 0, 0], np.ones(()) * 2)
x_grad_expected = np.ones((2, 3, 4, 5))
x_grad_expected[0, 0, 0, 0] = 0
np.testing.assert_allclose(x.grad, x_grad_expected)
value_grad_expected = np.ones(())
np.testing.assert_allclose(v.grad, value_grad_expected)
def test_expand(self): def test_expand(self):
# case1 # case1
x = paddle.full([], 1, 'float32') x = paddle.full([], 1, 'float32')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册