From 4d3b7d7d587deecea68c0add417a03eec5f9e61d Mon Sep 17 00:00:00 2001 From: Weilong Wu Date: Thu, 2 Jun 2022 21:40:16 +0800 Subject: [PATCH] [Eager] FLAGS_retain_grad_for_all_tensor set false in default (#43142) * [Eager] FLAGS_retain_grad set false * Add FLAGS_retain_grad_ for some tests * Add FLAGS_retain_grad_ to some tests * modified set_flags * modified set_flags * fix windows-ci and windows-openblas-ci * import paddle.fluid --- paddle/fluid/eager/utils.cc | 2 +- .../custom_op/test_custom_tanh_double_grad.py | 5 +++++ .../autograd/test_autograd_functional_dynamic.py | 13 +++++++++++++ .../tests/unittests/test_activation_nn_grad.py | 16 ++++++++++++++++ .../fluid/tests/unittests/test_assign_op.py | 14 ++++++++++++++ .../paddle/fluid/tests/unittests/test_detach.py | 2 ++ .../tests/unittests/test_elementwise_nn_grad.py | 2 ++ .../tests/unittests/test_elementwise_pow_op.py | 2 ++ .../unittests/test_imperative_auto_prune.py | 6 ++++++ .../tests/unittests/test_imperative_basic.py | 2 ++ .../unittests/test_imperative_recurrent_usage.py | 4 ++++ .../unittests/test_imperative_selected_rows.py | 4 ++++ .../unittests/test_imperative_triple_grad.py | 4 ++++ .../unittests/test_op_function_generator.py | 2 ++ .../fluid/tests/unittests/test_set_value_op.py | 3 +++ .../fluid/tests/unittests/test_sparse_norm_op.py | 3 +++ .../tests/unittests/test_sparse_unary_op.py | 3 +++ .../tests/unittests/test_sparse_utils_op.py | 5 +++++ .../fluid/tests/unittests/test_tensor_fill_.py | 2 ++ .../unittests/test_tensor_fill_diagonal_.py | 10 ++++++++++ .../test_tensor_fill_diagonal_tensor.py | 8 ++++++++ .../test_tensor_fill_diagonal_tensor_.py | 10 ++++++++++ .../tests/unittests/test_tensor_register_hook.py | 10 ++++++++++ .../unittests/test_uniform_random_inplace_op.py | 3 +++ 24 files changed, 134 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 7d9554c52eb..4d7d1aa2d8a 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -27,7 +27,7 @@ #include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/framework/variable.h" -PADDLE_DEFINE_EXPORTED_bool(retain_grad_for_all_tensor, true, +PADDLE_DEFINE_EXPORTED_bool(retain_grad_for_all_tensor, false, "retain grad for all tensor"); namespace egr { diff --git a/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py b/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py index 3b3a0e2edec..0fa07abe9d0 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py @@ -17,6 +17,7 @@ import unittest import numpy as np import paddle +import paddle.fluid as fluid import paddle.static as static from paddle.utils.cpp_extension import load, get_build_directory from paddle.utils.cpp_extension.extension_utils import run_cmd @@ -40,6 +41,7 @@ custom_ops = load( def custom_tanh_double_grad_dynamic(func, device, dtype, np_x): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) paddle.set_device(device) t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False) @@ -55,6 +57,7 @@ def custom_tanh_double_grad_dynamic(func, device, dtype, np_x): assert out.grad is not None assert dx[0].grad is not None return dx[0].numpy(), dx[0].grad.numpy(), out.grad.numpy() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) class TestCustomTanhDoubleGradJit(unittest.TestCase): @@ -85,9 +88,11 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase): dout, pd_dout)) def test_func_double_grad_dynamic(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_double_grad_dynamic() self.func_double_grad_dynamic() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py b/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py index 40aead90765..a4c09cd661a 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py @@ -18,6 +18,7 @@ import unittest import numpy as np import paddle +import paddle.fluid as fluid import paddle.compat as cpt import paddle.nn.functional as F from paddle.autograd.functional import _as_tensors @@ -490,6 +491,8 @@ class TestHessianClassNoBatch(unittest.TestCase): self.rtol, self.atol) def func_create_graph_true(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + def func(x): return paddle.sum(F.sigmoid(x)) @@ -501,6 +504,7 @@ class TestHessianClassNoBatch(unittest.TestCase): assert hessian[:].stop_gradient == False np.testing.assert_allclose(hessian[:].numpy(), numerical_hessian, self.rtol, self.atol) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_out_not_single(self): def func(x): @@ -733,6 +737,8 @@ class TestHessian(unittest.TestCase): "does not appear") > 0 def func_create_graph_true(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + def func(x): return paddle.sum(F.sigmoid(x)) @@ -745,6 +751,7 @@ class TestHessian(unittest.TestCase): self.rtol, self.atol) triple_grad = paddle.grad(hessian, self.x) assert triple_grad is not None + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_all_cases(self): with _test_eager_guard(): @@ -1018,6 +1025,8 @@ class TestVHP(unittest.TestCase): self.atol) def func_create_graph_true(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + def func(x): return paddle.sum(F.sigmoid(x)) @@ -1034,6 +1043,7 @@ class TestVHP(unittest.TestCase): self.atol) triple_grad = paddle.grad(vhp, self.x) assert triple_grad is not None + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_all_cases(self): with _test_eager_guard(): @@ -1102,6 +1112,8 @@ class TestJacobian(unittest.TestCase): self.atol) def func_multi_input_and_multi_output(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + def func(x, y): return paddle.matmul(x, y), x * y @@ -1115,6 +1127,7 @@ class TestJacobian(unittest.TestCase): np.testing.assert_allclose(jacobian[i][j].numpy(), numerical_jacobian[i][j], self.rtol, self.atol) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_allow_unused_false(self): def func(x, y): diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index 919ae524471..41e1f98e5f6 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -67,8 +67,10 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase): x_arr[np.abs(x_arr) < 0.005] = 0.002 gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.sigmoid_wrapper, [x], y, x_init=x_arr, place=place) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -95,8 +97,10 @@ class TestTanhTripleGradCheck(unittest.TestCase): x_arr[np.abs(x_arr) < 0.005] = 0.002 gradient_checker.triple_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.tanh_wrapper, [x], y, x_init=x_arr, place=place) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -123,8 +127,10 @@ class TestTanhDoubleGradCheck(unittest.TestCase): x_arr[np.abs(x_arr) < 0.005] = 0.002 gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.tanh_wrapper, [x], y, x_init=x_arr, place=place) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -151,8 +157,10 @@ class TestAbsDoubleGradCheck(unittest.TestCase): x_arr[np.abs(x_arr) < 0.005] = 0.002 gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.abs_wrapper, [x], y, x_init=x_arr, place=place) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -240,8 +248,10 @@ class TestELUDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.elu_wrapper, [x], y, x_init=x_arr, place=place) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -272,8 +282,10 @@ class TestCELUDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.celu_wrapper, [x], y, x_init=x_arr, place=place) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -362,8 +374,10 @@ class TestSquareDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.square_wrapper, [x], y, x_init=x_arr, place=place) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -421,8 +435,10 @@ class TestLogDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.log_wrapper, [x], y, x_init=x_arr, place=place) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_assign_op.py b/python/paddle/fluid/tests/unittests/test_assign_op.py index bfe23c62127..e1fae5d5aa5 100644 --- a/python/paddle/fluid/tests/unittests/test_assign_op.py +++ b/python/paddle/fluid/tests/unittests/test_assign_op.py @@ -34,10 +34,14 @@ class TestAssignOp(op_test.OpTest): self.outputs = {'Out': x} def test_forward(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.check_output(check_eager=True) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_backward(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.check_grad(['X'], 'Out', check_eager=True) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) class TestAssignFP16Op(op_test.OpTest): @@ -49,14 +53,19 @@ class TestAssignFP16Op(op_test.OpTest): self.outputs = {'Out': x} def test_forward(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.check_output(check_eager=True) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_backward(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.check_grad(['X'], 'Out', check_eager=True) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) class TestAssignOpWithLoDTensorArray(unittest.TestCase): def test_assign_LoDTensorArray(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) main_program = Program() startup_program = Program() with program_guard(main_program): @@ -71,6 +80,7 @@ class TestAssignOpWithLoDTensorArray(unittest.TestCase): sums = fluid.layers.array_read(array=init_array, i=i) mean = fluid.layers.mean(sums) append_backward(mean) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() @@ -173,6 +183,7 @@ class TestAssignOApi(unittest.TestCase): def test_clone(self): paddle.disable_static() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.python_api = paddle.clone x = paddle.ones([2]) @@ -185,6 +196,7 @@ class TestAssignOApi(unittest.TestCase): self.assertTrue(np.array_equal(x, [1, 1]), True) self.assertTrue(np.array_equal(clone_x.grad.numpy(), [3, 3]), True) self.assertTrue(np.array_equal(x.grad.numpy(), [3, 3]), True) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) paddle.enable_static() with program_guard(Program(), Program()): @@ -201,6 +213,7 @@ class TestAssignOApi(unittest.TestCase): class TestAssignOpErrorApi(unittest.TestCase): def test_errors(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with program_guard(Program(), Program()): # The type of input must be Variable or numpy.ndarray. x1 = fluid.create_lod_tensor( @@ -209,6 +222,7 @@ class TestAssignOpErrorApi(unittest.TestCase): # When the type of input is numpy.ndarray, the dtype of input must be float32, int32. x2 = np.array([[2.5, 2.5]], dtype='uint8') self.assertRaises(TypeError, paddle.assign, x2) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_type_error(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_detach.py b/python/paddle/fluid/tests/unittests/test_detach.py index 8d19a1d3f65..8cf88027e37 100644 --- a/python/paddle/fluid/tests/unittests/test_detach.py +++ b/python/paddle/fluid/tests/unittests/test_detach.py @@ -139,10 +139,12 @@ class Test_Detach(unittest.TestCase): return x.gradient() def test_NoDetachMulti_DetachMulti(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) array_no_detach_multi = self.no_detach_multi() array_detach_multi = self.detach_multi() assert not np.array_equal(array_no_detach_multi, array_detach_multi) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_NoDetachSingle_DetachMulti(self): array_no_detach_single = self.no_detach_single() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py index ccfed61185f..36e5d4d8e09 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py @@ -346,11 +346,13 @@ class TestElementwiseMulTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.multiply_wrapper, [x, y], out, x_init=[x_arr, y_arr], place=place) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py index 3c9e350360d..79945a10c80 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py @@ -185,6 +185,7 @@ class TestElementwisePowGradOpInt(unittest.TestCase): print(self.grad_res, self.grad_x, self.grad_y) def test_grad(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) places = [fluid.CPUPlace()] if fluid.is_compiled_with_cuda(): places.append(fluid.CUDAPlace(0)) @@ -200,6 +201,7 @@ class TestElementwisePowGradOpInt(unittest.TestCase): self.assertTrue(np.array_equal(res.gradient(), self.grad_res)) self.assertTrue(np.array_equal(x.gradient(), self.grad_x)) self.assertTrue(np.array_equal(y.gradient(), self.grad_y)) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py index 39b79dd4ba2..ffc5baf5cd2 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py @@ -195,9 +195,11 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue((part2.gradient() == 0).all()) def test_auto_prune3(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_auto_prune3() self.func_auto_prune3() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_auto_prune4(self): with fluid.dygraph.guard(): @@ -212,9 +214,11 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue((part2.gradient() == 1).all()) def test_auto_prune4(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_auto_prune4() self.func_auto_prune4() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_auto_prune5(self): with fluid.dygraph.guard(): @@ -229,9 +233,11 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue((part2.gradient() == 0).all()) def test_auto_prune5(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_auto_prune5() self.func_auto_prune5() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_auto_prune6(self): with fluid.dygraph.guard(): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index ebbf681f3dc..c129f0756cc 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -479,9 +479,11 @@ class TestImperative(unittest.TestCase): self.assertTrue(np.array_equal(dy_grad2, static_grad)) def test_layer_in_out(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_layer_in_out() self.func_layer_in_out() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_mlp(self): np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py b/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py index f12ca0a93ff..0579d727dd2 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py @@ -45,6 +45,7 @@ class TestRecurrentFeed(unittest.TestCase): original_np1 = np.arange(1, 5).reshape(2, 2).astype("float32") original_np2 = np.arange(5, 9).reshape(2, 2).astype("float32") with fluid.dygraph.guard(): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed original_in1 = to_variable(original_np1) @@ -61,8 +62,10 @@ class TestRecurrentFeed(unittest.TestCase): dyout = out.gradient() original_in1.stop_gradient = True rt.clear_gradients() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) with fluid.dygraph.guard(): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed @@ -80,6 +83,7 @@ class TestRecurrentFeed(unittest.TestCase): eager_dyout = out.gradient() original_in1.stop_gradient = True rt.clear_gradients() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) with new_program_scope(): fluid.default_startup_program().random_seed = seed diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py index 8bb4088dc3b..05f00cba0c2 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py @@ -79,9 +79,11 @@ class TestSimpleNet(unittest.TestCase): paddle.enable_static() def test_selectedrows_gradient1(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_selectedrows_gradient1() self.func_selectedrows_gradient1() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_selectedrows_gradient2(self): places = [fluid.CPUPlace()] @@ -120,9 +122,11 @@ class TestSimpleNet(unittest.TestCase): self.assertTrue(input_emb.gradient() is not None) def test_selectedrows_gradient2(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_selectedrows_gradient2() self.func_selectedrows_gradient2() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py index 3a8a3a96e9a..33ec0df46cd 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py @@ -209,11 +209,13 @@ class TestDygraphTripleGrad(TestCase): self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected)) def test_all_cases(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.func_exception() self.func_example_with_gradient_and_create_graph() with _test_eager_guard(): self.func_exception() self.func_example_with_gradient_and_create_graph() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) class TestDygraphTripleGradBradcastCase(TestCase): @@ -298,9 +300,11 @@ class TestDygraphTripleGradBradcastCase(TestCase): self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected)) def test_all_cases(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.func_example_with_gradient_and_create_graph() with _test_eager_guard(): self.func_example_with_gradient_and_create_graph() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_op_function_generator.py b/python/paddle/fluid/tests/unittests/test_op_function_generator.py index c712b5db0f3..eeaf1a012ad 100644 --- a/python/paddle/fluid/tests/unittests/test_op_function_generator.py +++ b/python/paddle/fluid/tests/unittests/test_op_function_generator.py @@ -74,6 +74,7 @@ class TestVariable(unittest.TestCase): self.assertTrue(np.array_equal(res1.numpy(), res2.numpy())) def test_trace_backward(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with fluid.dygraph.guard(): a = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) b = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) @@ -90,6 +91,7 @@ class TestVariable(unittest.TestCase): self.assertTrue(np.array_equal(x_grad, loss.gradient() * b)) self.assertTrue(np.array_equal(y_grad, loss.gradient() * a)) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_traced_layer(self): if in_dygraph_mode(): diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py index 8f9801780cd..098999501bc 100644 --- a/python/paddle/fluid/tests/unittests/test_set_value_op.py +++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py @@ -20,6 +20,7 @@ import unittest import numpy as np import paddle +import paddle.fluid as fluid from paddle.fluid.layer_helper import LayerHelper from functools import reduce from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph @@ -1014,9 +1015,11 @@ class TestBackward(unittest.TestCase): self.assertTrue((0 == x.grad[0, :, 0, 0]).all()) def test_dynamic(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_test_dynamic() self.func_test_dynamic() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) class TestGradientTruncated(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py b/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py index f22d48ae92b..cc917e1ab42 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py @@ -16,12 +16,14 @@ from __future__ import print_function import unittest import numpy as np import paddle +import paddle.fluid as fluid from paddle.fluid.framework import _test_eager_guard import copy class TestSparseBatchNorm(unittest.TestCase): def test(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): paddle.seed(0) channels = 4 @@ -59,6 +61,7 @@ class TestSparseBatchNorm(unittest.TestCase): sparse_x.grad.values().flatten().numpy(), atol=1e-5, rtol=1e-5) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_error_layout(self): with _test_eager_guard(): diff --git a/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py b/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py index f3337bce911..85afe10349e 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py @@ -17,6 +17,7 @@ import unittest from typing import Union, Callable import numpy as np import paddle +import paddle.fluid as fluid from paddle.fluid.framework import _test_eager_guard from paddle import _C_ops @@ -42,6 +43,7 @@ class TestSparseUnary(unittest.TestCase): return np.allclose(dense_numpy[mask], sparse_tensor.to_dense().numpy()[mask]) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): dense_x = paddle.to_tensor( x, dtype="float32", stop_gradient=not test_gradient) @@ -59,6 +61,7 @@ class TestSparseUnary(unittest.TestCase): dense_out.backward(dense_out) sparse_out.backward(sparse_out) assert tensor_allclose(dense_x.grad, sparse_x.grad) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_sparse_relu(self): x = [[0, -1, 0, 2], [0, 0, -3, 0], [4, 5, 0, 0]] diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py index 5a5af059951..31f4092666e 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py @@ -16,6 +16,7 @@ from __future__ import print_function import unittest import numpy as np import paddle +import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.framework import _test_eager_guard @@ -151,6 +152,7 @@ class TestSparseConvert(unittest.TestCase): out_grad.to_dense().numpy()) def test_coo_to_dense(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] values = [1.0, 2.0, 3.0, 4.0, 5.0] @@ -179,6 +181,7 @@ class TestSparseConvert(unittest.TestCase): dense_tensor_cpu.backward(paddle.to_tensor(out_grad)) assert np.array_equal(correct_x_grad, sparse_x_cpu.grad.values().numpy()) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_to_sparse_csr(self): with _test_eager_guard(): @@ -196,6 +199,7 @@ class TestSparseConvert(unittest.TestCase): assert np.array_equal(dense_tensor.numpy(), x) def test_coo_values_grad(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] values = [1.0, 2.0, 3.0, 4.0, 5.0] @@ -223,6 +227,7 @@ class TestSparseConvert(unittest.TestCase): # test coo_values_grad values_tensor.backward(paddle.to_tensor(out_grad)) assert np.array_equal(out_grad, sparse_x.grad.values().numpy()) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_sparse_coo_tensor_grad(self): with _test_eager_guard(): diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_.py index 2f43f129978..4a34b2022b9 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_.py @@ -78,9 +78,11 @@ class TensorFill_Test(unittest.TestCase): self.assertEqual((y.grad.numpy() == 0).all().item(), True) def test_tensor_fill_backward(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_test_tensor_fill_backward() self.func_test_tensor_fill_backward() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_test_errors(self): def test_list(): diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py index ca0c97adedb..2b6d3a5ca5f 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py @@ -52,9 +52,11 @@ class TensorFillDiagonal_Test(unittest.TestCase): True) def test_dim2_normal(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_dim2_normal() self.func_dim2_normal() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_offset(self): expected_np = np.array( @@ -87,9 +89,11 @@ class TensorFillDiagonal_Test(unittest.TestCase): True) def test_offset(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_offset() self.func_offset() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_bool(self): expected_np = np.array( @@ -150,9 +154,11 @@ class TensorFillDiagonal_Test(unittest.TestCase): True) def test_dim2_unnormal_wrap(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_dim2_unnormal_wrap() self.func_dim2_unnormal_wrap() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_dim2_unnormal_unwrap(self): expected_np = np.array([[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2], @@ -187,9 +193,11 @@ class TensorFillDiagonal_Test(unittest.TestCase): True) def test_dim2_unnormal_unwrap(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_dim2_unnormal_unwrap() self.func_dim2_unnormal_unwrap() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_dim_larger2_normal(self): expected_np = np.array([[[1, 2, 2], [2, 2, 2], [2, 2, 2]], [[2, 2, 2], [ @@ -225,9 +233,11 @@ class TensorFillDiagonal_Test(unittest.TestCase): True) def test_dim_larger2_normal(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_dim_larger2_normal() self.func_dim_larger2_normal() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py index 47316809189..c140b61bdf4 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py @@ -28,6 +28,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): self.places.append(fluid.CUDAPlace(0)) def test_dim2(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2]]).astype('float32') expected_grad = np.array( @@ -53,8 +54,10 @@ class TensorFillDiagTensor_Test(unittest.TestCase): self.assertEqual( (y.grad.numpy().astype('float32') == expected_grad).all(), True) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_dim2_offset_1(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [[2, 2, 2], [1, 2, 2], [2, 1, 2], [2, 2, 1]]).astype('float32') expected_grad = np.array( @@ -80,8 +83,10 @@ class TensorFillDiagTensor_Test(unittest.TestCase): self.assertEqual( (y.grad.numpy().astype('float32') == expected_grad).all(), True) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_dim2_offset1(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [[2, 1, 2], [2, 2, 1], [2, 2, 2], [2, 2, 2]]).astype('float32') expected_grad = np.array( @@ -107,8 +112,10 @@ class TensorFillDiagTensor_Test(unittest.TestCase): self.assertEqual( (y.grad.numpy().astype('float32') == expected_grad).all(), True) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_dim4(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [[[[0, 3], [2, 2], [2, 2]], [[2, 2], [1, 4], [2, 2]], [[2, 2], [2, 2], [2, 5]], [[2, 2], [2, 2], [2, 2]]], @@ -143,6 +150,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): self.assertEqual( (y.grad.numpy().astype('float32') == expected_grad).all(), True) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_largedim(self): if len(self.places) > 1: diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py index 81ec1daa669..0bab3ec10d7 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py @@ -56,9 +56,11 @@ class TensorFillDiagTensor_Test(unittest.TestCase): True) def test_dim2(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_dim2() self.func_dim2() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_dim2_offset_1(self): expected_np = np.array( @@ -88,9 +90,11 @@ class TensorFillDiagTensor_Test(unittest.TestCase): True) def test_dim2_offset_1(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_dim2_offset_1() self.func_dim2_offset_1() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_dim2_offset1(self): expected_np = np.array( @@ -120,9 +124,11 @@ class TensorFillDiagTensor_Test(unittest.TestCase): True) def test_dim2_offset1(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_dim2_offset1() self.func_dim2_offset1() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_dim4(self): expected_np = np.array( @@ -161,9 +167,11 @@ class TensorFillDiagTensor_Test(unittest.TestCase): True) def test_func_dim4(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_dim4() self.func_dim4() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_largedim(self): #large dim only test on gpu because the cpu version is too slow for ci test, and the memory is limited @@ -190,9 +198,11 @@ class TensorFillDiagTensor_Test(unittest.TestCase): self.assertEqual((y.grad == expected_grad).all(), True) def test_largedim(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_largedim() self.func_largedim() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py index e7f85f0451a..d725a672c34 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py @@ -158,9 +158,11 @@ class TestTensorRegisterHook(unittest.TestCase): run_print_hook_for_interior_var(print_hook, removed=True) def test_hook_for_interior_var(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_hook_for_interior_var() self.func_hook_for_interior_var() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_hook_for_leaf_var(self): def run_double_hook_for_leaf_var(double_hook, removed=False): @@ -202,9 +204,11 @@ class TestTensorRegisterHook(unittest.TestCase): run_double_hook_for_leaf_var(lambda grad: grad * 2, removed=True) def test_hook_for_leaf_var(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_hook_for_leaf_var() self.func_hook_for_leaf_var() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_hook_for_accumulated_grad_interior_var(self): def run_double_hook_for_accumulated_grad_interior_var(double_hook, @@ -262,9 +266,11 @@ class TestTensorRegisterHook(unittest.TestCase): lambda grad: grad * 2, removed=True) def test_hook_for_accumulated_grad_interior_var(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_hook_for_accumulated_grad_interior_var() self.func_hook_for_accumulated_grad_interior_var() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_hook_for_accumulated_grad_leaf_var(self): def run_double_hook_for_accumulated_grad_leaf_var(double_hook, @@ -360,9 +366,11 @@ class TestTensorRegisterHook(unittest.TestCase): self.assertTrue(np.array_equal(linear1_b_grad, linear1_b_grad_rm)) def test_func_hook_in_model(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_hook_in_model() self.func_hook_in_model() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_multiple_hooks_for_interior_var(self): def run_multiple_hooks_for_interior_var(device, @@ -443,9 +451,11 @@ class TestTensorRegisterHook(unittest.TestCase): self.assertTrue(np.array_equal(y_grad, z)) def test_multiple_hooks_for_interior_var(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): self.func_multiple_hooks_for_interior_var() self.func_multiple_hooks_for_interior_var() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_hook_in_double_grad(self): def double_print_hook(grad): diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py index ec3aeb24239..e7c88dd8398 100644 --- a/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py +++ b/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py @@ -158,6 +158,8 @@ class TestUniformRandomInplaceGrad(unittest.TestCase): self.shape = (1000, 784) def test_uniform_random_inplace_grad(self): + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + def test_grad(): tensor_a = paddle.ones(self.shape) tensor_a.stop_gradient = False @@ -174,6 +176,7 @@ class TestUniformRandomInplaceGrad(unittest.TestCase): for place in places: paddle.set_device(place) test_grad() + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == '__main__': -- GitLab