diff --git a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py index cb9bacb48d7c061b7caba76056fcb0ee705ab5f2..72bb45da7ec9d9f10d7e98aa8bb6daf469995264 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py @@ -16,12 +16,10 @@ import sys import unittest +from functools import reduce import numpy as np -# from functools import reduce - - sys.path.append("../") from op_test_xpu import XPUOpTest from xpu.get_test_cover_info import ( @@ -31,8 +29,7 @@ from xpu.get_test_cover_info import ( ) import paddle - -# from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.layer_helper import LayerHelper class XPUTestSetValueOp(XPUOpTestWrapper): @@ -927,6 +924,562 @@ class XPUTestSetValueOp(XPUOpTestWrapper): self._bool_tensor_error() self._broadcast_mismatch() + # 5. Test backward + class XPUTestBackward(XPUOpTest): + def setUp(self): + self.__class__.op_type = "set_value" + self.__class__.no_need_check_grad = True + self.place = paddle.XPUPlace(0) + + def test_static(self): + paddle.enable_static() + main_program = paddle.static.Program() + startup_program = paddle.static.Program() + + x_np = np.random.random(size=(4, 4)).astype('float32') + y_np = np.random.random(size=(4, 4)).astype('float32') + label_np = np.random.randint(2, size=(4, 1)).astype('int64') + + with paddle.static.program_guard(main_program, startup_program): + x = paddle.static.data(name="x", shape=[4, 4], dtype='float32') + y = paddle.static.data(name="y", shape=[4, 4], dtype='float32') + + label = paddle.static.data( + name="label", shape=[4, 1], dtype='int64' + ) + + z = paddle.add(x, y) + var = y[0, :] + z[0, :] = var + + prediction = paddle.static.nn.fc( + x=z, size=2, activation='softmax' + ) + + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=label + ) + loss = paddle.mean(cost) + sgd = paddle.optimizer.SGD(learning_rate=0.01) + sgd.minimize(loss) + + exe = paddle.static.Executor(self.place) + exe.run(startup_program) + + var_grad, z_grad = exe.run( + main_program, + feed={"x": x_np, "y": y_np, "label": label_np}, + fetch_list=[var.name + "@GRAD", z.name + "@GRAD"], + ) + + self.assertTrue((var_grad == z_grad[0, :]).all()) + paddle.disable_static() + + class XPUTestGradientTruncated(XPUOpTest): + def setUp(self): + self.__class__.op_type = "set_value" + self.__class__.no_need_check_grad = True + self.place = paddle.XPUPlace(0) + + def test_consistent_with_competitor(self): + paddle.disable_static() + + def set_value(t, value): + a = t * t + a[0, 1] = value + y = a * a + return y.sum() + + # case 1 + array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape( + [1, 2, 1, 3, 1, 4] + ) + value = np.arange(100, 104, dtype="float32").reshape(1, 4) + + inps = paddle.to_tensor(array, stop_gradient=False) + value = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value(inps, value) + loss.backward() + + value_grad = np.array([[600.0, 606.0, 612.0, 618.0]]) + input_grad = np.array( + [ + [ + [ + [ + [[4.0, 32.0, 108.0, 256.0]], + [[500.0, 864.0, 1372.0, 2048.0]], + [[2916.0, 4000.0, 5324.0, 6912.0]], + ] + ], + [ + [ + [[0.0, 0.0, 0.0, 0.0]], + [[0.0, 0.0, 0.0, 0.0]], + [[0.0, 0.0, 0.0, 0.0]], + ] + ], + ] + ] + ) + np.testing.assert_array_equal( + inps.grad.numpy(), + input_grad, + err_msg='The gradient of value should be \n{},\n but reveived {}'.format( + input_grad, inps.grad.numpy() + ), + ) + np.testing.assert_array_equal( + value.grad.numpy(), + value_grad, + err_msg='The gradient of input should be \n{},\n but reveived {}'.format( + value_grad, value.grad.numpy() + ), + ) + + # case 2 + array = np.arange(1, 2 * 3 * 4 + 1, dtype="float32").reshape( + [4, 2, 3] + ) + value = np.arange(100, 100 + 1, dtype="float32") + + inps2 = paddle.to_tensor(array, stop_gradient=False) + value2 = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value(inps2, value2) + loss.backward() + + value_grad2 = np.array([600.0]) + input_grad2 = np.array( + [ + [[4.0, 32.0, 108.0], [0.0, 0.0, 0.0]], + [[1372.0, 2048.0, 2916.0], [4000.0, 5324.0, 6912.0]], + [[8788.0, 10976.0, 13500.0], [16384.0, 19652.0, 23328.0]], + [[27436.0, 32000.0, 37044.0], [42592.0, 48668.0, 55296.0]], + ] + ) + np.testing.assert_array_equal( + inps2.grad.numpy(), + input_grad2, + err_msg='The gradient of value should be \n{},\n but reveived {}'.format( + input_grad, inps2.grad.numpy() + ), + ) + np.testing.assert_array_equal( + value2.grad.numpy(), + value_grad2, + err_msg='The gradient of input should be \n{},\n but reveived {}'.format( + value_grad, value2.grad.numpy() + ), + ) + + # case 3 + def set_value3(t, value): + a = t * t + a[0, :, 0, :] = value + y = a * a + return y.sum() + + array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape( + [4, 3, 1, 1, 2, 1] + ) + value = np.arange(100, 100 + 2, dtype="float32").reshape(1, 2, 1) + + inps = paddle.to_tensor(array, stop_gradient=False) + value = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value3(inps, value) + loss.backward() + + value_grad = np.array([[[600.0], [606.0]]]) + input_grad = np.array( + [ + [ + [[[[0.0], [0.0]]]], + [[[[0.0], [0.0]]]], + [[[[0.0], [0.0]]]], + ], + [ + [[[[1372.0], [2048.0]]]], + [[[[2916.0], [4000.0]]]], + [[[[5324.0], [6912.0]]]], + ], + [ + [[[[8788.0], [10976.0]]]], + [[[[13500.0], [16384.0]]]], + [[[[19652.0], [23328.0]]]], + ], + [ + [[[[27436.0], [32000.0]]]], + [[[[37044.0], [42592.0]]]], + [[[[48668.0], [55296.0]]]], + ], + ] + ) + np.testing.assert_array_equal( + inps.grad.numpy(), + input_grad, + err_msg='The gradient of value should be \n{},\n but reveived {}'.format( + input_grad, inps.grad.numpy() + ), + ) + np.testing.assert_array_equal( + value.grad.numpy(), + value_grad, + err_msg='The gradient of input should be \n{},\n but reveived {}'.format( + value_grad, value.grad.numpy() + ), + ) + + # case 4: step >0 + def set_value4(t, value): + a = t * t + a[0, :, 0, ::3] = value + y = a * a + return y.sum() + + array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape( + [2, 3, 1, 4, 1] + ) + value = np.arange(100, 100 + 2, dtype="float32").reshape(1, 2, 1) + + inps = paddle.to_tensor(array, stop_gradient=False) + value = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value4(inps, value) + loss.backward() + + value_grad = np.array([[[600.0], [606.0]]]) + input_grad = np.array( + [ + [ + [[[0.0], [32.0], [108.0], [0.0]]], + [[[0.0], [864.0], [1372.0], [0.0]]], + [[[0.0], [4000.0], [5324.0], [0.0]]], + ], + [ + [[[8788.0], [10976.0], [13500.0], [16384.0]]], + [[[19652.0], [23328.0], [27436.0], [32000.0]]], + [[[37044.0], [42592.0], [48668.0], [55296.0]]], + ], + ] + ) + np.testing.assert_array_equal( + inps.grad.numpy(), + input_grad, + err_msg='The gradient of value should be \n{},\n but reveived {}'.format( + input_grad, inps.grad.numpy() + ), + ) + np.testing.assert_array_equal( + value.grad.numpy(), + value_grad, + err_msg='The gradient of input should be \n{},\n but reveived {}'.format( + value_grad, value.grad.numpy() + ), + ) + + # case 5:a[0].shape==value.shape + def set_value5(t, value): + a = t * t + a[0] = value + y = a * a + return y.sum() + + array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape( + [2, 3, 4] + ) + value = np.arange(100, 100 + 12, dtype="float32").reshape(3, 4) + + inps = paddle.to_tensor(array, stop_gradient=False) + value = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value5(inps, value) + loss.backward() + + value_grad = np.array( + [ + [200.0, 202.0, 204.0, 206.0], + [208.0, 210.0, 212.0, 214.0], + [216.0, 218.0, 220.0, 222.0], + ] + ) + input_grad = np.array( + [ + [ + [0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0], + ], + [ + [8788.0, 10976.0, 13500.0, 16384.0], + [19652.0, 23328.0, 27436.0, 32000.0], + [37044.0, 42592.0, 48668.0, 55296.0], + ], + ] + ) + np.testing.assert_array_equal( + inps.grad.numpy(), + input_grad, + err_msg='The gradient of value should be \n{},\n but reveived {}'.format( + input_grad, inps.grad.numpy() + ), + ) + np.testing.assert_array_equal( + value.grad.numpy(), + value_grad, + err_msg='The gradient of input should be \n{},\n but reveived {}'.format( + value_grad, value.grad.numpy() + ), + ) + + # case 6: pass stop_gradient from value to x + x = paddle.zeros([8, 8], dtype='float32') + value = paddle.to_tensor([10], dtype='float32', stop_gradient=False) + + self.assertTrue(x.stop_gradient) + self.assertTrue(x.is_leaf) + + x[0, :] = value + + self.assertTrue(not x.stop_gradient) + self.assertTrue(not x.is_leaf) + + def test_static_graph(self): + paddle.enable_static() + + to_string = lambda x, i: x + '_' + str(i) + numel = lambda input_shape: reduce(lambda x, y: x * y, input_shape) + + def op1(x): + value = paddle.fluid.layers.fill_constant([1], "float32", 1) + # test stop_gradient + value.stop_gradient = True + x.stop_gradient = False + start = paddle.fluid.layers.fill_constant( + [1], "int32", 5, force_cpu=True + ) + end = paddle.fluid.layers.fill_constant( + [1], "int32", 0, force_cpu=True + ) + step = paddle.fluid.layers.fill_constant( + [1], "int32", -2, force_cpu=True + ) + + inputs = { + 'Input': x, + 'ValueTensor': value, + 'StartsTensorList': [ + start, + ], + 'EndsTensorList': [ + end, + ], + 'StepsTensorList': [ + step, + ], + } + + helper = LayerHelper("set_value") + y = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op( + type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs={'axes': [0]}, + ) + + return y, value + + def op2(x): + value = paddle.fluid.layers.fill_constant( + [1, 3, 2], "float32", 1 + ) + # test stop_gradient + value.stop_gradient = False + x.stop_gradient = False + attrs = { + 'axes': [0], + 'starts': [6], + 'ends': [0], + 'steps': [-4], + 'decrease_axes': [], + 'none_axes': [], + 'dtype': paddle.float32, + } + inputs = {'Input': x, 'ValueTensor': value} + + helper = LayerHelper("set_value") + y = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op( + type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs=attrs, + ) + + return y, value + + def op3(x): + value = paddle.fluid.layers.fill_constant([1], "float32", 1) + x.stop_gradient = True + value.stop_gradient = False + start = paddle.fluid.layers.fill_constant( + [1], "int32", 0, force_cpu=True + ) + end = paddle.fluid.layers.fill_constant( + [1], "int32", 5, force_cpu=True + ) + step = paddle.fluid.layers.fill_constant( + [1], "int32", 3, force_cpu=True + ) + + inputs = { + 'Input': x, + 'ValueTensor': value, + 'StartsTensorList': [ + start, + ], + 'EndsTensorList': [ + end, + ], + 'StepsTensorList': [ + step, + ], + } + + helper = LayerHelper("set_value") + y = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op( + type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs={'axes': [0]}, + ) + + return y, value + + def set_value(array, i, op): + name_x = to_string('x', i) + x = paddle.static.data( + name=name_x, shape=array.shape, dtype='float32' + ) + + # set_value_op in __get/setitem__ is an inplace operation. + # When `input.stop_gradient = True` and `value.stop_gradient = False`, + # set_value_grad_op will not be run during backward. + y, value = op(x) + y2 = y + 1 + loss = paddle.sum(y2) + sgd = paddle.optimizer.Adam() + sgd.minimize(loss) + place = self.place + + prog = paddle.static.default_main_program() + exe = paddle.static.Executor(place) + exe.run(paddle.static.default_startup_program()) + fetch_list = [] + if not x.stop_gradient: + fetch_list.append(x.grad_name) + if not value.stop_gradient: + fetch_list.append(value.grad_name) + out = exe.run(prog, feed={x.name: array}, fetch_list=fetch_list) + return out + + input_shape = [7, 6, 5, 4, 3, 2] + + array = np.arange(0, numel(input_shape), dtype="float32").reshape( + input_shape + ) + + for i in range(len(input_shape)): + program = paddle.static.Program() + with paddle.static.program_guard(program): + out1 = set_value(array, i, op1) + self.assertTrue((out1[0][5:0:-2] == 0).all()) + + if len(array.shape) > 2: + program2 = paddle.static.Program() + with paddle.static.program_guard(program2): + out2 = set_value(array, i, op2) + self.assertTrue((out2[0][6:0:-4] == 0).all()) + + program3 = paddle.static.Program() + with paddle.static.program_guard(program3): + out3 = set_value(array, i, op3) + self.assertTrue( + (numel(out1[0][0:5:3].shape) == out3[0]).all() + ) + + array = array[0] + paddle.disable_static() + + class XPUTestSetValueInplace(XPUOpTest): + def setUp(self): + self.__class__.op_type = "set_value" + self.__class__.no_need_check_grad = True + self.place = paddle.XPUPlace(0) + + def test_inplace(self): + paddle.disable_static() + with paddle.fluid.dygraph.guard(): + paddle.seed(100) + a = paddle.rand(shape=[1, 4]) + a.stop_gradient = False + b = a[:] + c = b + b[paddle.to_tensor(0)] = 1.0 + + self.assertTrue(id(b) == id(c)) + np.testing.assert_array_equal(b.numpy(), c.numpy()) + self.assertEqual(b.inplace_version, 0) + + paddle.enable_static() + + class XPUTestSetValueInplaceLeafVar(XPUOpTest): + def setUp(self): + self.__class__.op_type = "set_value" + self.__class__.no_need_check_grad = True + self.place = paddle.XPUPlace(0) + + def test_inplace_var_become_leaf_var(self): + paddle.disable_static() + + a_grad_1, b_grad_1, a_grad_2, b_grad_2 = 0, 1, 2, 3 + with paddle.fluid.dygraph.guard(): + paddle.seed(100) + a = paddle.rand(shape=[1, 4]) + b = paddle.rand(shape=[1, 4]) + a.stop_gradient = False + b.stop_gradient = False + c = a / b + c.sum().backward() + a_grad_1 = a.grad.numpy() + b_grad_1 = b.grad.numpy() + + with paddle.fluid.dygraph.guard(): + paddle.seed(100) + a = paddle.rand(shape=[1, 4]) + b = paddle.rand(shape=[1, 4]) + a.stop_gradient = False + b.stop_gradient = False + c = a / b + d = paddle.zeros((4, 4)) + self.assertTrue(d.stop_gradient) + d[0, :] = c + self.assertFalse(d.stop_gradient) + d[0, :].sum().backward() + a_grad_2 = a.grad.numpy() + b_grad_2 = b.grad.numpy() + + np.testing.assert_array_equal(a_grad_1, a_grad_2) + np.testing.assert_array_equal(b_grad_1, b_grad_2) + paddle.enable_static() + support_types = get_xpu_op_support_types('set_value') for stype in support_types: