From a504508c6bfc0b164aa45f87d5d5a3a5590e08e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E6=B0=B8=E4=B9=85?= <34344716+yjjiang11@users.noreply.github.com> Date: Wed, 11 Jan 2023 09:46:56 +0800 Subject: [PATCH] rm retain_grad_flag for tests part0 (#49655) * rm retain_grad_flag for tests * modify transpose op * retain grads for xpu tests * lint * modify xpu test --- .../test_autograd_functional_dynamic.py | 4 -- .../fluid/tests/unittests/test_cast_op.py | 2 - .../fluid/tests/unittests/test_detach.py | 4 +- .../unittests/test_elementwise_nn_grad.py | 2 - .../tests/unittests/test_expand_v2_op.py | 2 - .../test_imperative_recurrent_usage.py | 6 +-- .../fluid/tests/unittests/test_mean_op.py | 2 - .../fluid/tests/unittests/test_sign_op.py | 2 - .../fluid/tests/unittests/test_softmax_op.py | 3 +- .../fluid/tests/unittests/test_squeeze_op.py | 2 - .../unittests/test_tensor_register_hook.py | 32 +++++-------- .../tests/unittests/test_transpose_op.py | 4 +- .../test_uniform_random_inplace_op.py | 4 +- .../tests/unittests/test_unsqueeze_op.py | 2 - .../unittests/xpu/test_zero_dim_tensor_xpu.py | 48 ++++++++++++++++++- 15 files changed, 67 insertions(+), 52 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py b/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py index 9883a88041..fab6d84f9a 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py @@ -22,7 +22,6 @@ import utils from utils import matmul, mul, nested, o2, reduce, reduce_dim import paddle -import paddle.fluid as fluid import paddle.nn.functional as F from paddle.incubate.autograd.utils import as_tensors @@ -553,8 +552,6 @@ class TestHessianNoBatch(unittest.TestCase): ) def func_create_graph_true(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) - def func(x): return paddle.sum(F.sigmoid(x)) @@ -568,7 +565,6 @@ class TestHessianNoBatch(unittest.TestCase): np.testing.assert_allclose( hessian[:].numpy(), numerical_hessian, self.rtol, self.atol ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_out_not_single(self): def func(x): diff --git a/python/paddle/fluid/tests/unittests/test_cast_op.py b/python/paddle/fluid/tests/unittests/test_cast_op.py index bb88d1cc53..ea7f24a488 100644 --- a/python/paddle/fluid/tests/unittests/test_cast_op.py +++ b/python/paddle/fluid/tests/unittests/test_cast_op.py @@ -150,7 +150,6 @@ class TestCastDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.cast_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -182,7 +181,6 @@ class TestCastTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.cast_wrapper, [data], out, x_init=[data_arr], place=place ) diff --git a/python/paddle/fluid/tests/unittests/test_detach.py b/python/paddle/fluid/tests/unittests/test_detach.py index 806cc532da..1a01b7667f 100644 --- a/python/paddle/fluid/tests/unittests/test_detach.py +++ b/python/paddle/fluid/tests/unittests/test_detach.py @@ -106,6 +106,7 @@ class Test_Detach(unittest.TestCase): ) data = to_variable(data) x = linear(data) + x.retain_grads() x1 = linear1(x) loss = x1 # print(loss, loss.shape) @@ -153,6 +154,7 @@ class Test_Detach(unittest.TestCase): ) data = to_variable(data) x = linear(data) + x.retain_grads() x_detach = x.detach() x1 = linear1(x) x2 = linear2(x_detach) @@ -162,12 +164,10 @@ class Test_Detach(unittest.TestCase): return x.gradient() def test_NoDetachMulti_DetachMulti(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) array_no_detach_multi = self.no_detach_multi() array_detach_multi = self.detach_multi() assert not np.array_equal(array_no_detach_multi, array_detach_multi) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_NoDetachSingle_DetachMulti(self): array_no_detach_single = self.no_detach_single() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py index 76bcdd9059..4be4ddd228 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py @@ -359,7 +359,6 @@ class TestElementwiseMulTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.multiply_wrapper, [x, y], @@ -367,7 +366,6 @@ class TestElementwiseMulTripleGradCheck(unittest.TestCase): x_init=[x_arr, y_arr], place=place, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py index 84efb6c338..8839def692 100644 --- a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py @@ -281,7 +281,6 @@ class TestExpandDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.expand_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -313,7 +312,6 @@ class TestExpandTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.expand_wrapper, [data], out, x_init=[data_arr], place=place ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py b/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py index c184a9c6b0..e40cb63206 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py @@ -40,7 +40,6 @@ class TestRecurrentFeed(unittest.TestCase): original_np1 = np.arange(1, 5).reshape(2, 2).astype("float32") original_np2 = np.arange(5, 9).reshape(2, 2).astype("float32") with fluid.dygraph.guard(): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed original_in1 = to_variable(original_np1) @@ -51,16 +50,15 @@ class TestRecurrentFeed(unittest.TestCase): for i in range(3): sum_out, out = rt(original_in1, original_in2) + out.retain_grads() original_in1 = out sum_out_value = sum_out.numpy() sum_out.backward() dyout = out.gradient() original_in1.stop_gradient = True rt.clear_gradients() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) with fluid.dygraph.guard(): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed original_in1 = to_variable(original_np1) @@ -71,13 +69,13 @@ class TestRecurrentFeed(unittest.TestCase): for i in range(3): sum_out, out = rt(original_in1, original_in2) + out.retain_grads() original_in1 = out eager_sum_out_value = sum_out.numpy() sum_out.backward() eager_dyout = out.gradient() original_in1.stop_gradient = True rt.clear_gradients() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) with new_program_scope(): fluid.default_startup_program().random_seed = seed diff --git a/python/paddle/fluid/tests/unittests/test_mean_op.py b/python/paddle/fluid/tests/unittests/test_mean_op.py index 33f95b439c..c6ade1ca53 100644 --- a/python/paddle/fluid/tests/unittests/test_mean_op.py +++ b/python/paddle/fluid/tests/unittests/test_mean_op.py @@ -450,7 +450,6 @@ class TestMeanDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.mean_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -482,7 +481,6 @@ class TestMeanTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.mean_wrapper, [data], out, x_init=[data_arr], place=place ) diff --git a/python/paddle/fluid/tests/unittests/test_sign_op.py b/python/paddle/fluid/tests/unittests/test_sign_op.py index 33e4ca795b..ca6ca55c91 100644 --- a/python/paddle/fluid/tests/unittests/test_sign_op.py +++ b/python/paddle/fluid/tests/unittests/test_sign_op.py @@ -110,7 +110,6 @@ class TestSignDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.sign_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -142,7 +141,6 @@ class TestSignTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.sign_wrapper, [data], out, x_init=[data_arr], place=place ) diff --git a/python/paddle/fluid/tests/unittests/test_softmax_op.py b/python/paddle/fluid/tests/unittests/test_softmax_op.py index 4f689762b8..290d72b248 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_op.py @@ -491,11 +491,12 @@ class TestSoftmaxAPI(unittest.TestCase): class TestSoftmaxAPI_ZeroDim(unittest.TestCase): def test_dygraph(self): paddle.disable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = paddle.nn.functional.softmax(x) + out.retain_grads() out.backward() self.assertEqual(x.shape, []) self.assertEqual(x.grad.shape, []) diff --git a/python/paddle/fluid/tests/unittests/test_squeeze_op.py b/python/paddle/fluid/tests/unittests/test_squeeze_op.py index dd3fc5e3c2..b60152a514 100755 --- a/python/paddle/fluid/tests/unittests/test_squeeze_op.py +++ b/python/paddle/fluid/tests/unittests/test_squeeze_op.py @@ -236,7 +236,6 @@ class TestSqueezeDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.squeeze_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -268,7 +267,6 @@ class TestSqueezeTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.squeeze_wrapper, [data], out, x_init=[data_arr], place=place ) diff --git a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py index 54dadeb9f8..bb1d05fe3e 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py @@ -66,8 +66,6 @@ class TestTensorRegisterHook(unittest.TestCase): self.devices.append("gpu") def test_hook_for_interior_var(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) - def run_double_hook_for_interior_var(double_hook, removed=False): for device in self.devices: paddle.set_device(device) @@ -79,6 +77,7 @@ class TestTensorRegisterHook(unittest.TestCase): w = x + y w.stop_gradient = False + w.retain_grads() helper = w.register_hook(double_hook) z = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) @@ -115,6 +114,7 @@ class TestTensorRegisterHook(unittest.TestCase): w = x + y w.stop_gradient = False + w.retain_grads() helper = w.register_hook(print_hook) z = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) @@ -156,11 +156,8 @@ class TestTensorRegisterHook(unittest.TestCase): run_print_hook_for_interior_var(print_hook) # register hook and removed run_print_hook_for_interior_var(print_hook, removed=True) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_hook_for_leaf_var(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) - def run_double_hook_for_leaf_var(double_hook, removed=False): for device in self.devices: paddle.set_device(device) @@ -173,6 +170,7 @@ class TestTensorRegisterHook(unittest.TestCase): w = x + y w.stop_gradient = False + w.retain_grads() z = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) z.stop_gradient = False @@ -198,11 +196,8 @@ class TestTensorRegisterHook(unittest.TestCase): run_double_hook_for_leaf_var(lambda grad: grad * 2) # register hook and removed run_double_hook_for_leaf_var(lambda grad: grad * 2, removed=True) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_hook_for_accumulated_grad_interior_var(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) - def run_double_hook_for_accumulated_grad_interior_var( double_hook, removed=False ): @@ -213,11 +208,14 @@ class TestTensorRegisterHook(unittest.TestCase): b = paddle.to_tensor([0.0, 0.0, 1.0, 2.0]) a.stop_gradient = False b.stop_gradient = False + a.retain_grads() + b.retain_grads() helper1 = a.register_hook(double_hook) x = a + b x.stop_gradient = False + x.retain_grads() helper2 = x.register_hook(double_hook) @@ -258,11 +256,8 @@ class TestTensorRegisterHook(unittest.TestCase): run_double_hook_for_accumulated_grad_interior_var( lambda grad: grad * 2, removed=True ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_hook_for_accumulated_grad_leaf_var(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) - def run_double_hook_for_accumulated_grad_leaf_var( double_hook, removed=False ): @@ -304,11 +299,8 @@ class TestTensorRegisterHook(unittest.TestCase): run_double_hook_for_accumulated_grad_leaf_var( lambda grad: grad * 2, removed=True ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_hook_in_model(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) - def run_double_hook_in_model( data, label, hook=None, register=False, remove=False ): @@ -321,8 +313,10 @@ class TestTensorRegisterHook(unittest.TestCase): data = paddle.to_tensor(data) label = paddle.to_tensor(label) + data.retain_grads() ret1, out = net(data, hook, register, remove) + ret1.retain_grads() loss = loss_fn(out, label) loss.backward() @@ -357,7 +351,7 @@ class TestTensorRegisterHook(unittest.TestCase): ) # compare original value and with hook - np.testing.assert_array_equal(ret1_grad, ret1_grad_hook) + np.testing.assert_array_equal(ret1_grad * 2, ret1_grad_hook) np.testing.assert_array_equal(linear1_w_grad * 2, linear1_w_grad_hook) np.testing.assert_array_equal(linear1_b_grad * 2, linear1_b_grad_hook) @@ -365,11 +359,8 @@ class TestTensorRegisterHook(unittest.TestCase): np.testing.assert_array_equal(ret1_grad, ret1_grad_rm) np.testing.assert_array_equal(linear1_w_grad, linear1_w_grad_rm) np.testing.assert_array_equal(linear1_b_grad, linear1_b_grad_rm) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_multiple_hooks_for_interior_var(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) - def run_multiple_hooks_for_interior_var( device, hooks, remove1=False, remove2=False, remove3=False ): @@ -380,7 +371,11 @@ class TestTensorRegisterHook(unittest.TestCase): x.stop_gradient = False y.stop_gradient = False + x.retain_grads() + y.retain_grads() + w = x + y + w.retain_grads() w.stop_gradient = False helpers = [] @@ -449,7 +444,6 @@ class TestTensorRegisterHook(unittest.TestCase): np.testing.assert_array_equal(w_grad, z) np.testing.assert_array_equal(x_grad, z) np.testing.assert_array_equal(y_grad, z) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_hook_in_double_grad(self): def double_print_hook(grad): diff --git a/python/paddle/fluid/tests/unittests/test_transpose_op.py b/python/paddle/fluid/tests/unittests/test_transpose_op.py index ee56a2aad4..ad57289ffd 100644 --- a/python/paddle/fluid/tests/unittests/test_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_transpose_op.py @@ -528,7 +528,6 @@ class TestTransposeDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.transpose_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -560,7 +559,6 @@ class TestTransposeTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.transpose_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -577,11 +575,11 @@ class TestTransposeTripleGradCheck(unittest.TestCase): class TestTransposeAPI_ZeroDim(unittest.TestCase): def test_dygraph(self): paddle.disable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) x = paddle.rand([]) x.stop_gradient = False out = paddle.transpose(x, []) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py index f420209dda..43b1e117dc 100644 --- a/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py +++ b/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py @@ -161,12 +161,11 @@ class TestUniformRandomInplaceGrad(unittest.TestCase): self.shape = (1000, 784) def run_(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) - def test_grad(): tensor_a = paddle.ones(self.shape) tensor_a.stop_gradient = False tensor_b = tensor_a * 0.5 + tensor_b.retain_grads() tensor_b.uniform_(min=-2, max=2) loss = tensor_b.sum() loss.backward() @@ -179,7 +178,6 @@ class TestUniformRandomInplaceGrad(unittest.TestCase): for place in places: paddle.set_device(place) test_grad() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_uniform_random_inplace_grad(self): self.run_() diff --git a/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py b/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py index 8bfac13d9a..44fd0888f6 100755 --- a/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py +++ b/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py @@ -337,7 +337,6 @@ class TestUnsqueezeDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.unsqueeze_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -369,7 +368,6 @@ class TestUnsqueezeTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.unsqueeze_wrapper, [data], out, x_init=[data_arr], place=place ) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py index 7ec32d4bf0..71dd40a6c3 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py @@ -17,12 +17,10 @@ import unittest import numpy as np import paddle -import paddle.fluid as fluid import paddle.nn.functional as F paddle.set_device('xpu') -fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) unary_api_list = [ paddle.nn.functional.elu, @@ -102,7 +100,9 @@ class TestUnaryAPI(unittest.TestCase): for api in unary_api_list: x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = api(x) + out.retain_grads() out.backward() self.assertEqual(x.shape, []) @@ -149,7 +149,9 @@ class TestReduceAPI(unittest.TestCase): else: x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = api(x, None) + out.retain_grads() out.backward() self.assertEqual(x.shape, []) @@ -199,12 +201,15 @@ class TestBinaryAPI(unittest.TestCase): y = paddle.rand([]) x.stop_gradient = False y.stop_gradient = False + x.retain_grads() + y.retain_grads() if isinstance(api, dict): out = api['func'](x, y) out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y) np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) else: out = api(x, y) + out.retain_grads() self.assertEqual(out.shape, []) out.backward() @@ -224,6 +229,7 @@ class TestBinaryAPI(unittest.TestCase): np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) else: out = api(x, y) + out.retain_grads() self.assertEqual(out.shape, [2, 3, 4]) out.backward() @@ -243,6 +249,7 @@ class TestBinaryAPI(unittest.TestCase): np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) else: out = api(x, y) + out.retain_grads() self.assertEqual(out.shape, [2, 3, 4]) out.backward() @@ -367,7 +374,9 @@ class TestSundryAPI(unittest.TestCase): def test_pow_factor(self): x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = paddle.pow(x, 2.0) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -377,7 +386,9 @@ class TestSundryAPI(unittest.TestCase): def test_cast(self): x = paddle.full([], 1.0, 'float32') x.stop_gradient = False + x.retain_grads() out = paddle.cast(x, 'int32') + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -388,6 +399,7 @@ class TestSundryAPI(unittest.TestCase): x = paddle.uniform([], None, -10, 10) x.stop_gradient = False out = paddle.clip(x, -5, 5) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -432,6 +444,7 @@ class TestSundryAPI(unittest.TestCase): x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False) index = paddle.full([], 2, 'int64') out = paddle.gather(x, index) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -444,6 +457,7 @@ class TestSundryAPI(unittest.TestCase): ) index = paddle.full([], 1, 'int64') out = paddle.gather(x, index) + out.retain_grads() out.backward() self.assertEqual(out.shape, [3]) @@ -486,10 +500,18 @@ class TestSundryAPI(unittest.TestCase): x2.stop_gradient = False x3.stop_gradient = False + x1.retain_grads() + x2.retain_grads() + x3.retain_grads() + out1 = paddle.diagflat(x1, 1) out2 = paddle.diagflat(x2, -1) out3 = paddle.diagflat(x3, 0) + out1.retain_grads() + out2.retain_grads() + out3.retain_grads() + out1.backward() out2.backward() out3.backward() @@ -539,7 +561,9 @@ class TestSundryAPI(unittest.TestCase): def test_scale(self): x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = paddle.scale(x, scale=2.0, bias=1.0) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -574,26 +598,31 @@ class TestSundryAPI(unittest.TestCase): def test_reshape_list(self): x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = paddle.reshape(x, []) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, []) self.assertEqual(out.grad.shape, []) out = paddle.reshape(x, [1]) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, [1]) self.assertEqual(out.grad.shape, [1]) out = paddle.reshape(x, [-1]) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, [1]) self.assertEqual(out.grad.shape, [1]) out = paddle.reshape(x, [-1, 1]) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, [1, 1]) @@ -602,8 +631,10 @@ class TestSundryAPI(unittest.TestCase): def test_reshape_tensor(self): x = paddle.rand([1, 1]) x.stop_gradient = False + x.retain_grads() out = paddle.reshape(x, []) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(out.shape, []) @@ -611,6 +642,7 @@ class TestSundryAPI(unittest.TestCase): new_shape = paddle.full([], 1, "int32") out = paddle.reshape(x, new_shape) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(out.shape, [1]) @@ -618,6 +650,7 @@ class TestSundryAPI(unittest.TestCase): new_shape = paddle.full([], -1, "int32") out = paddle.reshape(x, new_shape) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(out.shape, [1]) @@ -625,6 +658,7 @@ class TestSundryAPI(unittest.TestCase): new_shape = [paddle.full([], -1, "int32"), paddle.full([], 1, "int32")] out = paddle.reshape(x, new_shape) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(out.shape, [1, 1]) @@ -666,8 +700,13 @@ class TestSundryAPI(unittest.TestCase): x2 = paddle.rand([]) x1.stop_gradient = False x2.stop_gradient = False + x1.retain_grads() + x2.retain_grads() + out1 = paddle.sort(x1, axis=-1) out2 = paddle.sort(x2, axis=0) + out1.retain_grads() + out2.retain_grads() out1.backward() out2.backward() @@ -688,8 +727,13 @@ class TestSundryAPI(unittest.TestCase): x2 = paddle.rand([]) x1.stop_gradient = False x2.stop_gradient = False + x1.retain_grads() + x2.retain_grads() + out1 = paddle.argsort(x1, axis=-1) out2 = paddle.argsort(x2, axis=0) + out1.retain_grads() + out2.retain_grads() out1.backward() out2.backward() -- GitLab