[XPU] Add unitest for set_value_grad. (#50049)

e2474595 · Leo Guo · GitHub · 754ab705 · e2474595
隐藏空白更改
内联并排

Showing with 558 addition and 5 deletion

python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py ...paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py +558 -5

未找到文件。
--- a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py
@@ -16,12 +16,10 @@

 import sys
 import unittest
+from functools import reduce

 import numpy as np

-# from functools import reduce
-
-
 sys.path.append("../")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
@@ -31,8 +29,7 @@ from xpu.get_test_cover_info import (
 )

 import paddle
-
-# from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.layer_helper import LayerHelper


 class XPUTestSetValueOp(XPUOpTestWrapper):
@@ -927,6 +924,562 @@ class XPUTestSetValueOp(XPUOpTestWrapper):
                self._bool_tensor_error()
            self._broadcast_mismatch()

+    # 5. Test backward
+    class XPUTestBackward(XPUOpTest):
+        def setUp(self):
+            self.__class__.op_type = "set_value"
+            self.__class__.no_need_check_grad = True
+            self.place = paddle.XPUPlace(0)
+
+        def test_static(self):
+            paddle.enable_static()
+            main_program = paddle.static.Program()
+            startup_program = paddle.static.Program()
+
+            x_np = np.random.random(size=(4, 4)).astype('float32')
+            y_np = np.random.random(size=(4, 4)).astype('float32')
+            label_np = np.random.randint(2, size=(4, 1)).astype('int64')
+
+            with paddle.static.program_guard(main_program, startup_program):
+                x = paddle.static.data(name="x", shape=[4, 4], dtype='float32')
+                y = paddle.static.data(name="y", shape=[4, 4], dtype='float32')
+
+                label = paddle.static.data(
+                    name="label", shape=[4, 1], dtype='int64'
+                )
+
+                z = paddle.add(x, y)
+                var = y[0, :]
+                z[0, :] = var
+
+                prediction = paddle.static.nn.fc(
+                    x=z, size=2, activation='softmax'
+                )
+
+                cost = paddle.nn.functional.cross_entropy(
+                    input=prediction, label=label
+                )
+                loss = paddle.mean(cost)
+                sgd = paddle.optimizer.SGD(learning_rate=0.01)
+                sgd.minimize(loss)
+
+            exe = paddle.static.Executor(self.place)
+            exe.run(startup_program)
+
+            var_grad, z_grad = exe.run(
+                main_program,
+                feed={"x": x_np, "y": y_np, "label": label_np},
+                fetch_list=[var.name + "@GRAD", z.name + "@GRAD"],
+            )
+
+            self.assertTrue((var_grad == z_grad[0, :]).all())
+            paddle.disable_static()
+
+    class XPUTestGradientTruncated(XPUOpTest):
+        def setUp(self):
+            self.__class__.op_type = "set_value"
+            self.__class__.no_need_check_grad = True
+            self.place = paddle.XPUPlace(0)
+
+        def test_consistent_with_competitor(self):
+            paddle.disable_static()
+
+            def set_value(t, value):
+                a = t * t
+                a[0, 1] = value
+                y = a * a
+                return y.sum()
+
+            # case 1
+            array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape(
+                [1, 2, 1, 3, 1, 4]
+            )
+            value = np.arange(100, 104, dtype="float32").reshape(1, 4)
+
+            inps = paddle.to_tensor(array, stop_gradient=False)
+            value = paddle.to_tensor(value, stop_gradient=False)
+
+            loss = set_value(inps, value)
+            loss.backward()
+
+            value_grad = np.array([[600.0, 606.0, 612.0, 618.0]])
+            input_grad = np.array(
+                [
+                    [
+                        [
+                            [
+                                [[4.0, 32.0, 108.0, 256.0]],
+                                [[500.0, 864.0, 1372.0, 2048.0]],
+                                [[2916.0, 4000.0, 5324.0, 6912.0]],
+                            ]
+                        ],
+                        [
+                            [
+                                [[0.0, 0.0, 0.0, 0.0]],
+                                [[0.0, 0.0, 0.0, 0.0]],
+                                [[0.0, 0.0, 0.0, 0.0]],
+                            ]
+                        ],
+                    ]
+                ]
+            )
+            np.testing.assert_array_equal(
+                inps.grad.numpy(),
+                input_grad,
+                err_msg='The gradient of value should be \n{},\n but reveived {}'.format(
+                    input_grad, inps.grad.numpy()
+                ),
+            )
+            np.testing.assert_array_equal(
+                value.grad.numpy(),
+                value_grad,
+                err_msg='The gradient of input should be \n{},\n but reveived {}'.format(
+                    value_grad, value.grad.numpy()
+                ),
+            )
+
+            # case 2
+            array = np.arange(1, 2 * 3 * 4 + 1, dtype="float32").reshape(
+                [4, 2, 3]
+            )
+            value = np.arange(100, 100 + 1, dtype="float32")
+
+            inps2 = paddle.to_tensor(array, stop_gradient=False)
+            value2 = paddle.to_tensor(value, stop_gradient=False)
+
+            loss = set_value(inps2, value2)
+            loss.backward()
+
+            value_grad2 = np.array([600.0])
+            input_grad2 = np.array(
+                [
+                    [[4.0, 32.0, 108.0], [0.0, 0.0, 0.0]],
+                    [[1372.0, 2048.0, 2916.0], [4000.0, 5324.0, 6912.0]],
+                    [[8788.0, 10976.0, 13500.0], [16384.0, 19652.0, 23328.0]],
+                    [[27436.0, 32000.0, 37044.0], [42592.0, 48668.0, 55296.0]],
+                ]
+            )
+            np.testing.assert_array_equal(
+                inps2.grad.numpy(),
+                input_grad2,
+                err_msg='The gradient of value should be \n{},\n but reveived {}'.format(
+                    input_grad, inps2.grad.numpy()
+                ),
+            )
+            np.testing.assert_array_equal(
+                value2.grad.numpy(),
+                value_grad2,
+                err_msg='The gradient of input should be \n{},\n but reveived {}'.format(
+                    value_grad, value2.grad.numpy()
+                ),
+            )
+
+            # case 3
+            def set_value3(t, value):
+                a = t * t
+                a[0, :, 0, :] = value
+                y = a * a
+                return y.sum()
+
+            array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape(
+                [4, 3, 1, 1, 2, 1]
+            )
+            value = np.arange(100, 100 + 2, dtype="float32").reshape(1, 2, 1)
+
+            inps = paddle.to_tensor(array, stop_gradient=False)
+            value = paddle.to_tensor(value, stop_gradient=False)
+
+            loss = set_value3(inps, value)
+            loss.backward()
+
+            value_grad = np.array([[[600.0], [606.0]]])
+            input_grad = np.array(
+                [
+                    [
+                        [[[[0.0], [0.0]]]],
+                        [[[[0.0], [0.0]]]],
+                        [[[[0.0], [0.0]]]],
+                    ],
+                    [
+                        [[[[1372.0], [2048.0]]]],
+                        [[[[2916.0], [4000.0]]]],
+                        [[[[5324.0], [6912.0]]]],
+                    ],
+                    [
+                        [[[[8788.0], [10976.0]]]],
+                        [[[[13500.0], [16384.0]]]],
+                        [[[[19652.0], [23328.0]]]],
+                    ],
+                    [
+                        [[[[27436.0], [32000.0]]]],
+                        [[[[37044.0], [42592.0]]]],
+                        [[[[48668.0], [55296.0]]]],
+                    ],
+                ]
+            )
+            np.testing.assert_array_equal(
+                inps.grad.numpy(),
+                input_grad,
+                err_msg='The gradient of value should be \n{},\n but reveived {}'.format(
+                    input_grad, inps.grad.numpy()
+                ),
+            )
+            np.testing.assert_array_equal(
+                value.grad.numpy(),
+                value_grad,
+                err_msg='The gradient of input should be \n{},\n but reveived {}'.format(
+                    value_grad, value.grad.numpy()
+                ),
+            )
+
+            # case 4: step >0
+            def set_value4(t, value):
+                a = t * t
+                a[0, :, 0, ::3] = value
+                y = a * a
+                return y.sum()
+
+            array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape(
+                [2, 3, 1, 4, 1]
+            )
+            value = np.arange(100, 100 + 2, dtype="float32").reshape(1, 2, 1)
+
+            inps = paddle.to_tensor(array, stop_gradient=False)
+            value = paddle.to_tensor(value, stop_gradient=False)
+
+            loss = set_value4(inps, value)
+            loss.backward()
+
+            value_grad = np.array([[[600.0], [606.0]]])
+            input_grad = np.array(
+                [
+                    [
+                        [[[0.0], [32.0], [108.0], [0.0]]],
+                        [[[0.0], [864.0], [1372.0], [0.0]]],
+                        [[[0.0], [4000.0], [5324.0], [0.0]]],
+                    ],
+                    [
+                        [[[8788.0], [10976.0], [13500.0], [16384.0]]],
+                        [[[19652.0], [23328.0], [27436.0], [32000.0]]],
+                        [[[37044.0], [42592.0], [48668.0], [55296.0]]],
+                    ],
+                ]
+            )
+            np.testing.assert_array_equal(
+                inps.grad.numpy(),
+                input_grad,
+                err_msg='The gradient of value should be \n{},\n but reveived {}'.format(
+                    input_grad, inps.grad.numpy()
+                ),
+            )
+            np.testing.assert_array_equal(
+                value.grad.numpy(),
+                value_grad,
+                err_msg='The gradient of input should be \n{},\n but reveived {}'.format(
+                    value_grad, value.grad.numpy()
+                ),
+            )
+
+            # case 5:a[0].shape==value.shape
+            def set_value5(t, value):
+                a = t * t
+                a[0] = value
+                y = a * a
+                return y.sum()
+
+            array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape(
+                [2, 3, 4]
+            )
+            value = np.arange(100, 100 + 12, dtype="float32").reshape(3, 4)
+
+            inps = paddle.to_tensor(array, stop_gradient=False)
+            value = paddle.to_tensor(value, stop_gradient=False)
+
+            loss = set_value5(inps, value)
+            loss.backward()
+
+            value_grad = np.array(
+                [
+                    [200.0, 202.0, 204.0, 206.0],
+                    [208.0, 210.0, 212.0, 214.0],
+                    [216.0, 218.0, 220.0, 222.0],
+                ]
+            )
+            input_grad = np.array(
+                [
+                    [
+                        [0.0, 0.0, 0.0, 0.0],
+                        [0.0, 0.0, 0.0, 0.0],
+                        [0.0, 0.0, 0.0, 0.0],
+                    ],
+                    [
+                        [8788.0, 10976.0, 13500.0, 16384.0],
+                        [19652.0, 23328.0, 27436.0, 32000.0],
+                        [37044.0, 42592.0, 48668.0, 55296.0],
+                    ],
+                ]
+            )
+            np.testing.assert_array_equal(
+                inps.grad.numpy(),
+                input_grad,
+                err_msg='The gradient of value should be \n{},\n but reveived {}'.format(
+                    input_grad, inps.grad.numpy()
+                ),
+            )
+            np.testing.assert_array_equal(
+                value.grad.numpy(),
+                value_grad,
+                err_msg='The gradient of input should be \n{},\n but reveived {}'.format(
+                    value_grad, value.grad.numpy()
+                ),
+            )
+
+            # case 6: pass stop_gradient from value to x
+            x = paddle.zeros([8, 8], dtype='float32')
+            value = paddle.to_tensor([10], dtype='float32', stop_gradient=False)
+
+            self.assertTrue(x.stop_gradient)
+            self.assertTrue(x.is_leaf)
+
+            x[0, :] = value
+
+            self.assertTrue(not x.stop_gradient)
+            self.assertTrue(not x.is_leaf)
+
+        def test_static_graph(self):
+            paddle.enable_static()
+
+            to_string = lambda x, i: x + '_' + str(i)
+            numel = lambda input_shape: reduce(lambda x, y: x * y, input_shape)
+
+            def op1(x):
+                value = paddle.fluid.layers.fill_constant([1], "float32", 1)
+                # test stop_gradient
+                value.stop_gradient = True
+                x.stop_gradient = False
+                start = paddle.fluid.layers.fill_constant(
+                    [1], "int32", 5, force_cpu=True
+                )
+                end = paddle.fluid.layers.fill_constant(
+                    [1], "int32", 0, force_cpu=True
+                )
+                step = paddle.fluid.layers.fill_constant(
+                    [1], "int32", -2, force_cpu=True
+                )
+
+                inputs = {
+                    'Input': x,
+                    'ValueTensor': value,
+                    'StartsTensorList': [
+                        start,
+                    ],
+                    'EndsTensorList': [
+                        end,
+                    ],
+                    'StepsTensorList': [
+                        step,
+                    ],
+                }
+
+                helper = LayerHelper("set_value")
+                y = helper.create_variable_for_type_inference(dtype=x.dtype)
+
+                helper.append_op(
+                    type="set_value",
+                    inputs=inputs,
+                    outputs={'Out': y},
+                    attrs={'axes': [0]},
+                )
+
+                return y, value
+
+            def op2(x):
+                value = paddle.fluid.layers.fill_constant(
+                    [1, 3, 2], "float32", 1
+                )
+                # test stop_gradient
+                value.stop_gradient = False
+                x.stop_gradient = False
+                attrs = {
+                    'axes': [0],
+                    'starts': [6],
+                    'ends': [0],
+                    'steps': [-4],
+                    'decrease_axes': [],
+                    'none_axes': [],
+                    'dtype': paddle.float32,
+                }
+                inputs = {'Input': x, 'ValueTensor': value}
+
+                helper = LayerHelper("set_value")
+                y = helper.create_variable_for_type_inference(dtype=x.dtype)
+
+                helper.append_op(
+                    type="set_value",
+                    inputs=inputs,
+                    outputs={'Out': y},
+                    attrs=attrs,
+                )
+
+                return y, value
+
+            def op3(x):
+                value = paddle.fluid.layers.fill_constant([1], "float32", 1)
+                x.stop_gradient = True
+                value.stop_gradient = False
+                start = paddle.fluid.layers.fill_constant(
+                    [1], "int32", 0, force_cpu=True
+                )
+                end = paddle.fluid.layers.fill_constant(
+                    [1], "int32", 5, force_cpu=True
+                )
+                step = paddle.fluid.layers.fill_constant(
+                    [1], "int32", 3, force_cpu=True
+                )
+
+                inputs = {
+                    'Input': x,
+                    'ValueTensor': value,
+                    'StartsTensorList': [
+                        start,
+                    ],
+                    'EndsTensorList': [
+                        end,
+                    ],
+                    'StepsTensorList': [
+                        step,
+                    ],
+                }
+
+                helper = LayerHelper("set_value")
+                y = helper.create_variable_for_type_inference(dtype=x.dtype)
+
+                helper.append_op(
+                    type="set_value",
+                    inputs=inputs,
+                    outputs={'Out': y},
+                    attrs={'axes': [0]},
+                )
+
+                return y, value
+
+            def set_value(array, i, op):
+                name_x = to_string('x', i)
+                x = paddle.static.data(
+                    name=name_x, shape=array.shape, dtype='float32'
+                )
+
+                # set_value_op in __get/setitem__ is an inplace operation.
+                # When `input.stop_gradient = True` and `value.stop_gradient = False`,
+                # set_value_grad_op will not be run during backward.
+                y, value = op(x)
+                y2 = y + 1
+                loss = paddle.sum(y2)
+                sgd = paddle.optimizer.Adam()
+                sgd.minimize(loss)
+                place = self.place
+
+                prog = paddle.static.default_main_program()
+                exe = paddle.static.Executor(place)
+                exe.run(paddle.static.default_startup_program())
+                fetch_list = []
+                if not x.stop_gradient:
+                    fetch_list.append(x.grad_name)
+                if not value.stop_gradient:
+                    fetch_list.append(value.grad_name)
+                out = exe.run(prog, feed={x.name: array}, fetch_list=fetch_list)
+                return out
+
+            input_shape = [7, 6, 5, 4, 3, 2]
+
+            array = np.arange(0, numel(input_shape), dtype="float32").reshape(
+                input_shape
+            )
+
+            for i in range(len(input_shape)):
+                program = paddle.static.Program()
+                with paddle.static.program_guard(program):
+                    out1 = set_value(array, i, op1)
+                    self.assertTrue((out1[0][5:0:-2] == 0).all())
+
+                if len(array.shape) > 2:
+                    program2 = paddle.static.Program()
+                    with paddle.static.program_guard(program2):
+                        out2 = set_value(array, i, op2)
+                        self.assertTrue((out2[0][6:0:-4] == 0).all())
+
+                program3 = paddle.static.Program()
+                with paddle.static.program_guard(program3):
+                    out3 = set_value(array, i, op3)
+                    self.assertTrue(
+                        (numel(out1[0][0:5:3].shape) == out3[0]).all()
+                    )
+
+                array = array[0]
+            paddle.disable_static()
+
+    class XPUTestSetValueInplace(XPUOpTest):
+        def setUp(self):
+            self.__class__.op_type = "set_value"
+            self.__class__.no_need_check_grad = True
+            self.place = paddle.XPUPlace(0)
+
+        def test_inplace(self):
+            paddle.disable_static()
+            with paddle.fluid.dygraph.guard():
+                paddle.seed(100)
+                a = paddle.rand(shape=[1, 4])
+                a.stop_gradient = False
+                b = a[:]
+                c = b
+                b[paddle.to_tensor(0)] = 1.0
+
+                self.assertTrue(id(b) == id(c))
+                np.testing.assert_array_equal(b.numpy(), c.numpy())
+                self.assertEqual(b.inplace_version, 0)
+
+            paddle.enable_static()
+
+    class XPUTestSetValueInplaceLeafVar(XPUOpTest):
+        def setUp(self):
+            self.__class__.op_type = "set_value"
+            self.__class__.no_need_check_grad = True
+            self.place = paddle.XPUPlace(0)
+
+        def test_inplace_var_become_leaf_var(self):
+            paddle.disable_static()
+
+            a_grad_1, b_grad_1, a_grad_2, b_grad_2 = 0, 1, 2, 3
+            with paddle.fluid.dygraph.guard():
+                paddle.seed(100)
+                a = paddle.rand(shape=[1, 4])
+                b = paddle.rand(shape=[1, 4])
+                a.stop_gradient = False
+                b.stop_gradient = False
+                c = a / b
+                c.sum().backward()
+                a_grad_1 = a.grad.numpy()
+                b_grad_1 = b.grad.numpy()
+
+            with paddle.fluid.dygraph.guard():
+                paddle.seed(100)
+                a = paddle.rand(shape=[1, 4])
+                b = paddle.rand(shape=[1, 4])
+                a.stop_gradient = False
+                b.stop_gradient = False
+                c = a / b
+                d = paddle.zeros((4, 4))
+                self.assertTrue(d.stop_gradient)
+                d[0, :] = c
+                self.assertFalse(d.stop_gradient)
+                d[0, :].sum().backward()
+                a_grad_2 = a.grad.numpy()
+                b_grad_2 = b.grad.numpy()
+
+            np.testing.assert_array_equal(a_grad_1, a_grad_2)
+            np.testing.assert_array_equal(b_grad_1, b_grad_2)
+            paddle.enable_static()
+

 support_types = get_xpu_op_support_types('set_value')
 for stype in support_types: