diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py index 8d8046f19aa79f83ccb1f8757a3099b03b7ec61d..833993c621612f28a3acc895b544e9cc084b4486 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py @@ -21,7 +21,6 @@ import numpy as np import paddle import paddle.static as static -from paddle import fluid from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.vision.transforms import Compose, Normalize @@ -146,8 +145,10 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): paddle.set_device(device) t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False) + t.retain_grads() out = func(t) if use_func else paddle.nn.functional.relu(t) + out.retain_grads() dx = paddle.grad( outputs=out, inputs=t, @@ -259,7 +260,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ) def test_dynamic(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) for device in self.devices: for dtype in self.dtypes: if device == 'cpu' and dtype == 'float16': @@ -286,7 +286,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): x_grad, pd_x_grad ), ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_static_save_and_load_inference_model(self): paddle.enable_static() @@ -354,7 +353,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): paddle.disable_static() def test_double_grad_dynamic(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) for device in self.devices: for dtype in self.dtypes: if device == 'cpu' and dtype == 'float16': @@ -380,7 +378,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): dx_grad, pd_dx_grad ), ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_with_dataloader(self): for device in self.devices: diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py index 7b251e8063a05e7d4a09238feaf1efef04739fe4..ef0f52d5c3f2dafbc6a480aa2c1497c87b793666 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py @@ -30,8 +30,10 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True): t = paddle.to_tensor(np_x, dtype=dtype) t.stop_gradient = False + t.retain_grads() out = func(t) if use_func else paddle.nn.functional.relu(t) + out.retain_grads() out.stop_gradient = False out.backward() @@ -142,14 +144,14 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix): def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): - import paddle.fluid as fluid - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) paddle.set_device(device) t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False) + t.retain_grads() out = func(t) if use_func else paddle.nn.functional.relu(t) + out.retain_grads() dx = paddle.grad( outputs=out, inputs=t, @@ -164,7 +166,6 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): grad_outputs=paddle.ones_like(t), create_graph=False, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) assert ddout[0].numpy() is not None return dx[0].numpy(), ddout[0].numpy() diff --git a/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py b/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py index bedaf36832f91ee600ea7f789ea0ff6b73366a78..ad139e84a90c4dedd49d28567ed9cb83f62ff7b5 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py @@ -19,7 +19,6 @@ import numpy as np from utils import extra_cc_args, extra_nvcc_args, paddle_includes import paddle -import paddle.fluid as fluid from paddle.utils.cpp_extension import get_build_directory, load from paddle.utils.cpp_extension.extension_utils import run_cmd @@ -41,24 +40,25 @@ custom_ops = load( def custom_tanh_double_grad_dynamic(func, device, dtype, np_x): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) paddle.set_device(device) t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False) + t.retain_grads() out = func(t) out.stop_gradient = False + out.retain_grads() dx = paddle.grad( outputs=[out], inputs=[t], create_graph=True, retain_graph=True ) + dx[0].retain_grads() dx[0].backward() assert out.grad is not None assert dx[0].grad is not None return dx[0].numpy(), dx[0].grad.numpy(), out.grad.numpy() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) class TestCustomTanhDoubleGradJit(unittest.TestCase): @@ -68,7 +68,6 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase): self.devices = ['cpu'] def test_double_grad_dynamic(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) for device in self.devices: for dtype in self.dtypes: x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) @@ -102,7 +101,6 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase): dout, pd_dout ), ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/custom_runtime/test_custom_op_setup.py b/python/paddle/fluid/tests/custom_runtime/test_custom_op_setup.py index 969fdb2f8a6b2229d77c805fcf977ee6dbcc926a..b347ee139728abf9029a34af605d966285301d27 100644 --- a/python/paddle/fluid/tests/custom_runtime/test_custom_op_setup.py +++ b/python/paddle/fluid/tests/custom_runtime/test_custom_op_setup.py @@ -24,11 +24,11 @@ import numpy as np def custom_relu_dynamic(func, device, dtype, np_x, use_func=True): import paddle - paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) paddle.set_device(device) t = paddle.to_tensor(np_x, dtype=dtype) t.stop_gradient = False + t.retain_grads() sys.stdout.flush() out = func(t) if use_func else paddle.nn.functional.relu(t) @@ -36,7 +36,6 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True): out.backward() - paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if t.grad is None: return out.numpy(), t.grad else: @@ -105,11 +104,12 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): import paddle paddle.set_device(device) - paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False) + t.retain_grads() out = func(t) if use_func else paddle.nn.functional.relu(t) + out.retain_grads() dx = paddle.grad( outputs=out, inputs=t, @@ -125,7 +125,6 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): create_graph=False, ) - paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) assert ddout[0].numpy() is not None return dx[0].numpy(), ddout[0].numpy() diff --git a/python/paddle/fluid/tests/unittests/collective/collective_alltoall_single.py b/python/paddle/fluid/tests/unittests/collective/collective_alltoall_single.py index dea1d1ee2d9e1f71a175d664864aac4493eefb51..13ebddbd786da06a5b7930b533db864f69d4b937 100644 --- a/python/paddle/fluid/tests/unittests/collective/collective_alltoall_single.py +++ b/python/paddle/fluid/tests/unittests/collective/collective_alltoall_single.py @@ -30,8 +30,6 @@ class TestCollectiveAllToAllSingle(unittest.TestCase): paddle.distributed.is_initialized() ), "The distributed environment has been initialized." - paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) - def test_collective_alltoall_single(self): rank = dist.get_rank() size = dist.get_world_size() diff --git a/python/paddle/fluid/tests/unittests/collective/collective_batch_isend_irecv.py b/python/paddle/fluid/tests/unittests/collective/collective_batch_isend_irecv.py index ec3b2ad5e4a4b622c111e857dcda5ce73e97d574..13dbd974a1bca81746fbd2e19b7950740409a0c2 100644 --- a/python/paddle/fluid/tests/unittests/collective/collective_batch_isend_irecv.py +++ b/python/paddle/fluid/tests/unittests/collective/collective_batch_isend_irecv.py @@ -23,7 +23,6 @@ import paddle.distributed as dist class TestCollectiveBatchIsendIrecv(unittest.TestCase): def setUp(self): dist.init_parallel_env() - paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) def test_collective_batch_isend_irecv(self): rank = dist.get_rank() diff --git a/python/paddle/fluid/tests/unittests/collective/collective_reduce_scatter.py b/python/paddle/fluid/tests/unittests/collective/collective_reduce_scatter.py index c2cf243ee02cd7edda79a14a8c578fa825a34790..7017237cacd1e5be5f4e304270c5521233d73962 100644 --- a/python/paddle/fluid/tests/unittests/collective/collective_reduce_scatter.py +++ b/python/paddle/fluid/tests/unittests/collective/collective_reduce_scatter.py @@ -24,7 +24,6 @@ from paddle.distributed.communication.reduce_scatter import _reduce_scatter_base class TestCollectiveReduceScatter(unittest.TestCase): def setUp(self): dist.init_parallel_env() - paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) def test_collective_reduce_scatter_sum(self): rank = dist.get_rank() diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_margin_cross_entropy.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_margin_cross_entropy.py index 78e119fa8fb424693e5cc28d16170a535af4541b..97dd4e39395914d83ac559f3335967fb25743395 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_margin_cross_entropy.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_margin_cross_entropy.py @@ -34,7 +34,6 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase): def setUp(self): strategy = fleet.DistributedStrategy() fleet.init(is_collective=True, strategy=strategy) - paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) def test_parallel_margin_softmax_cross_entropy(self): margin1s = [1.0, 1.0, 1.35] @@ -93,6 +92,7 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase): norm_weight = paddle.divide(weight, weight_l2) data = paddle.matmul(norm_input, norm_weight) + data.retain_grads() data.stop_gradient = False sta = ( @@ -118,6 +118,7 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase): group=check_group, ) integral_data = integral_data.detach().clone() + integral_data.retain_grads() integral_data.stop_gradient = False # add arcface margin to logit diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index 5026ae9fc96d478e78d5596a22a507c07dde18b1..48349cfe910b35f35e1e19399c39bd40a0b63726 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -96,11 +96,9 @@ class TestTanhTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.tanh_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -128,11 +126,9 @@ class TestTanhDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.tanh_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -160,11 +156,9 @@ class TestAbsDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.abs_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -256,11 +250,9 @@ class TestELUDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.elu_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -292,11 +284,9 @@ class TestCELUDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.celu_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -390,11 +380,9 @@ class TestSquareDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.square_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -424,11 +412,9 @@ class TestLogDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.log_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -456,11 +442,9 @@ class TestSinDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.sin_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -488,11 +472,9 @@ class TestCosDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.cos_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_assign_op.py b/python/paddle/fluid/tests/unittests/test_assign_op.py index 078811b4969995f7db2fb112f10ab63a2404b881..02f649b39bfab231fe0691c39b217e017ebf3a9a 100644 --- a/python/paddle/fluid/tests/unittests/test_assign_op.py +++ b/python/paddle/fluid/tests/unittests/test_assign_op.py @@ -37,16 +37,12 @@ class TestAssignOp(op_test.OpTest): def test_forward(self): paddle.enable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.check_output(check_eager=True) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) paddle.disable_static() def test_backward(self): paddle.enable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.check_grad(['X'], 'Out', check_eager=True) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) paddle.disable_static() @@ -60,23 +56,18 @@ class TestAssignFP16Op(op_test.OpTest): def test_forward(self): paddle.enable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.check_output(check_eager=True) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) paddle.disable_static() def test_backward(self): paddle.enable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.check_grad(['X'], 'Out', check_eager=True) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) paddle.disable_static() class TestAssignOpWithLoDTensorArray(unittest.TestCase): def test_assign_LoDTensorArray(self): paddle.enable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) main_program = Program() startup_program = Program() with program_guard(main_program): @@ -92,7 +83,6 @@ class TestAssignOpWithLoDTensorArray(unittest.TestCase): sums = paddle.tensor.array_read(array=init_array, i=i) mean = paddle.mean(sums) append_backward(mean) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) place = ( fluid.CUDAPlace(0) @@ -207,12 +197,13 @@ class TestAssignOApi(unittest.TestCase): np.testing.assert_allclose(result3.numpy(), np.array([1]), rtol=1e-05) def test_clone(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.python_api = paddle.clone x = paddle.ones([2]) x.stop_gradient = False + x.retain_grads() clone_x = paddle.clone(x) + clone_x.retain_grads() y = clone_x**3 y.backward() @@ -220,7 +211,6 @@ class TestAssignOApi(unittest.TestCase): np.testing.assert_array_equal(x, [1, 1]) np.testing.assert_array_equal(clone_x.grad.numpy(), [3, 3]) np.testing.assert_array_equal(x.grad.numpy(), [3, 3]) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) paddle.enable_static() with program_guard(Program(), Program()): @@ -241,7 +231,6 @@ class TestAssignOApi(unittest.TestCase): class TestAssignOpErrorApi(unittest.TestCase): def test_errors(self): paddle.enable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with program_guard(Program(), Program()): # The type of input must be Variable or numpy.ndarray. x1 = fluid.create_lod_tensor( @@ -251,7 +240,6 @@ class TestAssignOpErrorApi(unittest.TestCase): # When the type of input is numpy.ndarray, the dtype of input must be float32, int32. x2 = np.array([[2.5, 2.5]], dtype='uint8') self.assertRaises(TypeError, paddle.assign, x2) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) paddle.disable_static() def test_type_error(self): @@ -281,7 +269,6 @@ class TestAssignDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.assign_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -313,7 +300,6 @@ class TestAssignTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.assign_wrapper, [data], out, x_init=[data_arr], place=place ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py index 3f1283864dfbe047fabe9fb5cfd08b23bdb64f7a..63322b3f6d868f1f89557be4e4b38bc4b838df69 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py @@ -166,6 +166,7 @@ class TestDygraphTripleGrad(TestCase): @dygraph_guard def func_example_with_gradient_and_create_graph(self): x = random_var(self.shape) + x.retain_grads() x_np = x.numpy() x.stop_gradient = False @@ -222,10 +223,8 @@ class TestDygraphTripleGrad(TestCase): np.testing.assert_allclose(dddx_grad_actual, dddx_expected, rtol=1e-05) def test_all_cases(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.func_exception() self.func_example_with_gradient_and_create_graph() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) class TestDygraphTripleGradBradcastCase(TestCase): @@ -259,6 +258,7 @@ class TestDygraphTripleGradBradcastCase(TestCase): @dygraph_guard def func_example_with_gradient_and_create_graph(self): x = random_var(self.x_shape) + x.retain_grads() x_np = x.numpy() x.stop_gradient = False @@ -316,9 +316,7 @@ class TestDygraphTripleGradBradcastCase(TestCase): np.testing.assert_allclose(dddx_grad_actual, dddx_expected, rtol=1e-05) def test_all_cases(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.func_example_with_gradient_and_create_graph() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) # d_ddout is none, dtype is float32 diff --git a/python/paddle/fluid/tests/unittests/test_op_function_generator.py b/python/paddle/fluid/tests/unittests/test_op_function_generator.py index eff73a4548f0eec2c67661a3332611ea25e9ab58..54356f4f8e999d357f10eab994b50da54fe5283b 100644 --- a/python/paddle/fluid/tests/unittests/test_op_function_generator.py +++ b/python/paddle/fluid/tests/unittests/test_op_function_generator.py @@ -72,7 +72,6 @@ class TestVariable(unittest.TestCase): np.testing.assert_array_equal(res1.numpy(), res2.numpy()) def test_trace_backward(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with fluid.dygraph.guard(): a = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) b = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) @@ -80,8 +79,11 @@ class TestVariable(unittest.TestCase): y = fluid.dygraph.to_variable(b) x.stop_gradient = False y.stop_gradient = False + x.retain_grads() + y.retain_grads() loss = _legacy_C_ops.elementwise_mul(x, y) + loss.retain_grads() loss.backward() x_grad = x.gradient() @@ -89,7 +91,6 @@ class TestVariable(unittest.TestCase): np.testing.assert_array_equal(x_grad, loss.gradient() * b) np.testing.assert_array_equal(y_grad, loss.gradient() * a) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_reshape_op.py b/python/paddle/fluid/tests/unittests/test_reshape_op.py index 887ce9ff3f7411bb01115e8d648b53be0ec7de31..0fb23bf73d58e11e65c99c57bf9dec05a9b5838c 100755 --- a/python/paddle/fluid/tests/unittests/test_reshape_op.py +++ b/python/paddle/fluid/tests/unittests/test_reshape_op.py @@ -505,17 +505,18 @@ class TestReshapeZeroTensor(unittest.TestCase): class TestReshapeAPI_ZeroDim(unittest.TestCase): def test_dygraph(self): paddle.disable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) x = paddle.rand([]) x.stop_gradient = False out = paddle.reshape(x, [1]) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, [1]) self.assertEqual(out.grad.shape, [1]) out = paddle.reshape(x, [-1, 1]) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, [1, 1]) @@ -524,6 +525,7 @@ class TestReshapeAPI_ZeroDim(unittest.TestCase): x = paddle.rand([1]) x.stop_gradient = False out = paddle.reshape(x, []) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1]) self.assertEqual(out.shape, []) diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py index af76b09047bce619ae6bfadd7dcdccf5ee807f6a..12838b218b43eadb6c2d8e45cfde4c7094a33a2d 100644 --- a/python/paddle/fluid/tests/unittests/test_slice_op.py +++ b/python/paddle/fluid/tests/unittests/test_slice_op.py @@ -895,7 +895,6 @@ class TestSliceDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.slice_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -931,7 +930,6 @@ class TestSliceTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.slice_wrapper, [data], out, x_init=[data_arr], place=place ) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_elementwise_op.py b/python/paddle/fluid/tests/unittests/test_sparse_elementwise_op.py index e2a98b170e91ca013a1756b82460ec05bd53f826..96ea87dd1b9e19559655e5ef0970f5b2fe38153a 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_elementwise_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_elementwise_op.py @@ -43,7 +43,6 @@ class TestSparseElementWiseAPI(unittest.TestCase): """ def setUp(self): - paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) np.random.seed(2022) self.op_list = op_list self.csr_shape = [128, 256] @@ -109,7 +108,9 @@ class TestSparseElementWiseAPI(unittest.TestCase): y, dtype=dtype, stop_gradient=False ) coo_x = s_dense_x.to_sparse_coo(sparse_dim) + coo_x.retain_grads() coo_y = s_dense_y.to_sparse_coo(sparse_dim) + coo_y.retain_grads() actual_res = get_actual_res(coo_x, coo_y, op) actual_res.backward(actual_res) @@ -157,9 +158,12 @@ class TestSparseElementWiseAPI(unittest.TestCase): sp_a = sparse.sparse_coo_tensor( indices_data, values1_data, shape, stop_gradient=False ) + sp_a.retain_grads() + sp_b = sparse.sparse_coo_tensor( indices_data, values2_data, shape, stop_gradient=False ) + sp_b.retain_grads() values1 = paddle.to_tensor(values1_data, stop_gradient=False) values2 = paddle.to_tensor(values2_data, stop_gradient=False) @@ -185,6 +189,7 @@ class TestSparseElementWiseAPI(unittest.TestCase): sp_a = sparse.sparse_coo_tensor( indices_data, values_data, shape, stop_gradient=False ) + sp_a.retain_grads() bias_values = [1.0, 2.0] diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py index 5cd625770ba7ec0061864ab82a72b7e2f88379d0..5d6d83d6586e74037cc327d4694fed2029f9f0b8 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py @@ -17,7 +17,6 @@ import unittest import numpy as np import paddle -import paddle.fluid as fluid import paddle.fluid.core as core devices = ['cpu', 'gpu'] @@ -148,7 +147,6 @@ class TestSparseConvert(unittest.TestCase): assert np.array_equal(dense_x.grad.numpy(), out_grad.to_dense().numpy()) def test_coo_to_dense(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] values = [1.0, 2.0, 3.0, 4.0, 5.0] indices_dtypes = ['int32', 'int64'] @@ -159,6 +157,7 @@ class TestSparseConvert(unittest.TestCase): shape=[3, 4], stop_gradient=False, ) + sparse_x.retain_grads() dense_tensor = sparse_x.to_dense() # test to_dense_grad backward out_grad = [ @@ -180,12 +179,12 @@ class TestSparseConvert(unittest.TestCase): shape=[3, 4], stop_gradient=False, ) + sparse_x_cpu.retain_grads() dense_tensor_cpu = sparse_x_cpu.to_dense() dense_tensor_cpu.backward(paddle.to_tensor(out_grad)) assert np.array_equal( correct_x_grad, sparse_x_cpu.grad.values().numpy() ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_to_sparse_csr(self): x = [[0, 1, 0, 2], [0, 0, 3, 0], [4, 5, 0, 0]] @@ -202,7 +201,6 @@ class TestSparseConvert(unittest.TestCase): assert np.array_equal(dense_tensor.numpy(), x) def test_coo_values_grad(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] values = [1.0, 2.0, 3.0, 4.0, 5.0] sparse_x = paddle.sparse.sparse_coo_tensor( @@ -211,6 +209,7 @@ class TestSparseConvert(unittest.TestCase): shape=[3, 4], stop_gradient=False, ) + sparse_x.retain_grads() values_tensor = sparse_x.values() out_grad = [2.0, 3.0, 5.0, 8.0, 9.0] # test coo_values_grad @@ -230,6 +229,7 @@ class TestSparseConvert(unittest.TestCase): shape=[3, 4, 2], stop_gradient=False, ) + sparse_x.retain_grads() values_tensor = sparse_x.values() out_grad = [ [2.0, 2.0], @@ -241,7 +241,6 @@ class TestSparseConvert(unittest.TestCase): # test coo_values_grad values_tensor.backward(paddle.to_tensor(out_grad)) assert np.array_equal(out_grad, sparse_x.grad.values().numpy()) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_sparse_coo_tensor_grad(self): for device in devices: diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py index a83f5b8e5aa0b15731d338643db310537022305d..e4e12f4387d3264af38dd433e7f43c387a871fd8 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py @@ -22,7 +22,6 @@ import paddle.fluid as fluid class TensorFillDiagonal_Test(unittest.TestCase): def test_dim2_normal(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array([[1, 2, 2], [2, 1, 2], [2, 2, 1]]).astype( 'float32' ) @@ -44,6 +43,7 @@ class TensorFillDiagonal_Test(unittest.TestCase): x = paddle.ones((3, 3), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() y.fill_diagonal_(1, offset=0, wrap=True) loss = y.sum() loss.backward() @@ -55,10 +55,8 @@ class TensorFillDiagonal_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_offset(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array([[2, 2, 1], [2, 2, 2], [2, 2, 2]]).astype( 'float32' ) @@ -80,6 +78,7 @@ class TensorFillDiagonal_Test(unittest.TestCase): x = paddle.ones((3, 3), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() y.fill_diagonal_(1, offset=2, wrap=True) loss = y.sum() loss.backward() @@ -91,7 +90,6 @@ class TensorFillDiagonal_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_bool(self): expected_np = np.array( @@ -116,7 +114,6 @@ class TensorFillDiagonal_Test(unittest.TestCase): self.assertEqual((x.numpy() == expected_np).all(), True) def test_dim2_unnormal_wrap(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [ [1, 2, 2], @@ -154,6 +151,7 @@ class TensorFillDiagonal_Test(unittest.TestCase): x = paddle.ones((7, 3), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() y.fill_diagonal_(1, offset=0, wrap=True) loss = y.sum() loss.backward() @@ -165,10 +163,8 @@ class TensorFillDiagonal_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_dim2_unnormal_unwrap(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [ [1, 2, 2], @@ -206,6 +202,7 @@ class TensorFillDiagonal_Test(unittest.TestCase): x = paddle.ones((7, 3), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() y.fill_diagonal_(1, offset=0, wrap=False) loss = y.sum() loss.backward() @@ -217,10 +214,8 @@ class TensorFillDiagonal_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_dim_larger2_normal(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [ [[1, 2, 2], [2, 2, 2], [2, 2, 2]], @@ -250,6 +245,7 @@ class TensorFillDiagonal_Test(unittest.TestCase): x = paddle.ones((3, 3, 3), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() y.fill_diagonal_(1, offset=0, wrap=True) loss = y.sum() loss.backward() @@ -261,7 +257,6 @@ class TensorFillDiagonal_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_tile_op.py b/python/paddle/fluid/tests/unittests/test_tile_op.py index 5e2756a8d24248ae6b78b0e52934ed6e718d0c43..419c142b6df5ff54909d62e649471741b48666bd 100644 --- a/python/paddle/fluid/tests/unittests/test_tile_op.py +++ b/python/paddle/fluid/tests/unittests/test_tile_op.py @@ -286,7 +286,6 @@ class TestTileDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.tile_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -318,7 +317,6 @@ class TestTileTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.tile_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -335,24 +333,26 @@ class TestTileTripleGradCheck(unittest.TestCase): class TestTileAPI_ZeroDim(unittest.TestCase): def test_dygraph(self): paddle.disable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) x = paddle.rand([]) x.stop_gradient = False out = paddle.tile(x, []) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) self.assertEqual(x.grad.shape, []) self.assertEqual(out.grad.shape, []) out = paddle.tile(x, [3]) + out.retain_grads() out.backward() self.assertEqual(out.shape, [3]) self.assertEqual(x.grad.shape, []) self.assertEqual(out.grad.shape, [3]) out = paddle.tile(x, [2, 3]) + out.retain_grads() out.backward() self.assertEqual(out.shape, [2, 3]) self.assertEqual(x.grad.shape, []) diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py index c99e7e0712929eb7d14624406423ac39df62cbf9..eedf4ae596cab6cc26aa279427cf1ce188bcdb05 100644 --- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py @@ -21,8 +21,6 @@ import paddle import paddle.fluid as fluid import paddle.nn.functional as F -fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) - unary_api_list = [ paddle.nn.functional.elu, paddle.nn.functional.gelu, @@ -102,7 +100,9 @@ class TestUnaryAPI(unittest.TestCase): for api in unary_api_list: x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = api(x) + out.retain_grads() out.backward() self.assertEqual(x.shape, []) @@ -202,7 +202,9 @@ class TestReduceAPI(unittest.TestCase): else: x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = api(x, None) + out.retain_grads() out.backward() self.assertEqual(x.shape, []) @@ -291,12 +293,16 @@ class TestBinaryAPI(unittest.TestCase): y = paddle.rand([]) x.stop_gradient = False y.stop_gradient = False + x.retain_grads() + y.retain_grads() if isinstance(api, dict): out = api['func'](x, y) out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y) np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) else: out = api(x, y) + + out.retain_grads() out.backward() self.assertEqual(x.shape, []) @@ -312,12 +318,16 @@ class TestBinaryAPI(unittest.TestCase): y = paddle.rand([]) x.stop_gradient = False y.stop_gradient = False + x.retain_grads() + y.retain_grads() if isinstance(api, dict): out = api['func'](x, y) out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y) np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) else: out = api(x, y) + + out.retain_grads() out.backward() self.assertEqual(x.shape, [2, 3, 4]) @@ -331,6 +341,8 @@ class TestBinaryAPI(unittest.TestCase): # 3) x is 0D , y is ND x = paddle.rand([]) y = paddle.rand([2, 3, 4]) + x.retain_grads() + y.retain_grads() x.stop_gradient = False y.stop_gradient = False if isinstance(api, dict): @@ -339,6 +351,8 @@ class TestBinaryAPI(unittest.TestCase): np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) else: out = api(x, y) + + out.retain_grads() out.backward() self.assertEqual(x.shape, []) @@ -352,9 +366,11 @@ class TestBinaryAPI(unittest.TestCase): # 4) x is 0D , y is scalar x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() y = 0.5 if isinstance(api, dict): out = getattr(paddle.Tensor, api['cls_method'])(x, y) + out.retain_grads() out.backward() self.assertEqual(x.shape, []) @@ -528,7 +544,9 @@ class TestSundryAPI(unittest.TestCase): def test_flip(self): x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = paddle.flip(x, axis=[]) + out.retain_grads() out.backward() self.assertEqual(x.shape, []) self.assertEqual(out.shape, []) @@ -618,7 +636,9 @@ class TestSundryAPI(unittest.TestCase): def test_pow_factor(self): x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = paddle.pow(x, 2.0) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -628,7 +648,9 @@ class TestSundryAPI(unittest.TestCase): def test_cast(self): x = paddle.full([], 1.0, 'float32') x.stop_gradient = False + x.retain_grads() out = paddle.cast(x, 'int32') + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -638,7 +660,9 @@ class TestSundryAPI(unittest.TestCase): def test_cumprod(self): x = paddle.full([], 1.0, 'float32') x.stop_gradient = False + x.retain_grads() out = paddle.cumprod(x, 0) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -651,7 +675,9 @@ class TestSundryAPI(unittest.TestCase): def test_clip(self): x = paddle.uniform([], None, -10, 10) x.stop_gradient = False + x.retain_grads() out = paddle.clip(x, -5, 5) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -661,7 +687,9 @@ class TestSundryAPI(unittest.TestCase): def test_increment(self): x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = paddle.increment(x, 1.0) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -694,8 +722,10 @@ class TestSundryAPI(unittest.TestCase): def test_gather_1D(self): x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False) + x.retain_grads() index = paddle.full([], 2, 'int64') out = paddle.gather(x, index) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -707,8 +737,10 @@ class TestSundryAPI(unittest.TestCase): x = paddle.to_tensor( [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False ) + x.retain_grads() index = paddle.full([], 1, 'int64') out = paddle.gather(x, index) + out.retain_grads() out.backward() self.assertEqual(out.shape, [3]) @@ -720,8 +752,10 @@ class TestSundryAPI(unittest.TestCase): x = paddle.to_tensor( [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False ) + x.retain_grads() index = paddle.full([], 1, 'int64') out = paddle.gather(x, index, axis=1) + out.retain_grads() out.backward() self.assertEqual(out.shape, [2]) @@ -731,9 +765,11 @@ class TestSundryAPI(unittest.TestCase): def test_scatter_1D(self): x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False) + x.retain_grads() index = paddle.full([], 2, 'int64') updates = paddle.full([], 4.0) out = paddle.scatter(x, index, updates) + out.retain_grads() out.backward() self.assertEqual(out.shape, [5]) @@ -747,6 +783,7 @@ class TestSundryAPI(unittest.TestCase): index = paddle.full([], 1, 'int64') updates = paddle.to_tensor([1.0, 2.0, 3.0]) out = paddle.scatter(x, index, updates) + out.retain_grads() out.backward() self.assertEqual(out.shape, [2, 3]) @@ -762,10 +799,18 @@ class TestSundryAPI(unittest.TestCase): x2.stop_gradient = False x3.stop_gradient = False + x1.retain_grads() + x2.retain_grads() + x3.retain_grads() + out1 = paddle.diagflat(x1, 1) out2 = paddle.diagflat(x2, -1) out3 = paddle.diagflat(x3, 0) + out1.retain_grads() + out2.retain_grads() + out3.retain_grads() + out1.backward() out2.backward() out3.backward() @@ -800,8 +845,11 @@ class TestSundryAPI(unittest.TestCase): def test_scatter_nd(self): index = paddle.to_tensor([3], dtype="int64") updates = paddle.full([], 2, dtype='float32') + updates.retain_grads() updates.stop_gradient = False + out = paddle.scatter_nd(index, updates, [5]) + out.retain_grads() out.backward() self.assertEqual(out.shape, [5]) @@ -818,6 +866,7 @@ class TestSundryAPI(unittest.TestCase): x = paddle.randn(()) x.stop_gradient = False + x.retain_grads() out = paddle.kthvalue(x, 1) out[0].backward() @@ -838,6 +887,7 @@ class TestSundryAPI(unittest.TestCase): paddle.set_device(place) x = paddle.randn(()) + x.retain_grads() x.stop_gradient = False out = paddle.mode(x) @@ -854,11 +904,13 @@ class TestSundryAPI(unittest.TestCase): def test_flatten(self): x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() start_axis = 0 stop_axis = -1 out = paddle.flatten(x, start_axis=start_axis, stop_axis=stop_axis) + out.retain_grads() out.backward() self.assertEqual(out.shape, [1]) @@ -868,7 +920,9 @@ class TestSundryAPI(unittest.TestCase): def test_scale(self): x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = paddle.scale(x, scale=2.0, bias=1.0) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -911,6 +965,9 @@ class TestSundryAPI(unittest.TestCase): out1 = paddle.add_n(x1) out2 = paddle.add_n([x2, x3]) + out1.retain_grads() + out2.retain_grads() + out1.backward() out2.backward() @@ -928,26 +985,31 @@ class TestSundryAPI(unittest.TestCase): def test_reshape_list(self): x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = paddle.reshape(x, []) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, []) self.assertEqual(out.grad.shape, []) out = paddle.reshape(x, [1]) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, [1]) self.assertEqual(out.grad.shape, [1]) out = paddle.reshape(x, [-1]) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, [1]) self.assertEqual(out.grad.shape, [1]) out = paddle.reshape(x, [-1, 1]) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, [1, 1]) @@ -955,9 +1017,11 @@ class TestSundryAPI(unittest.TestCase): def test_reshape_tensor(self): x = paddle.rand([1, 1]) + x.retain_grads() x.stop_gradient = False out = paddle.reshape(x, []) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(out.shape, []) @@ -965,6 +1029,7 @@ class TestSundryAPI(unittest.TestCase): new_shape = paddle.to_tensor([1, 1, 1], "int32") out = paddle.reshape(x, new_shape) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(out.shape, [1, 1, 1]) @@ -972,6 +1037,7 @@ class TestSundryAPI(unittest.TestCase): new_shape = paddle.to_tensor([-1], "int32") out = paddle.reshape(x, new_shape) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(out.shape, [1]) @@ -979,6 +1045,7 @@ class TestSundryAPI(unittest.TestCase): new_shape = [paddle.full([], -1, "int32"), paddle.full([], 1, "int32")] out = paddle.reshape(x, new_shape) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(out.shape, [1, 1]) @@ -1019,6 +1086,7 @@ class TestSundryAPI(unittest.TestCase): x = paddle.rand([]) x.stop_gradient = False out = paddle.reverse(x, axis=[]) + out.retain_grads() out.backward() self.assertEqual(x.shape, []) self.assertEqual(out.shape, []) @@ -1029,9 +1097,14 @@ class TestSundryAPI(unittest.TestCase): x2 = paddle.rand([]) x1.stop_gradient = False x2.stop_gradient = False + x1.retain_grads() + x2.retain_grads() out1 = paddle.sort(x1, axis=-1) out2 = paddle.sort(x2, axis=0) + out1.retain_grads() + out2.retain_grads() + out1.backward() out2.backward() @@ -1051,9 +1124,15 @@ class TestSundryAPI(unittest.TestCase): x2 = paddle.rand([]) x1.stop_gradient = False x2.stop_gradient = False + x1.retain_grads() + x2.retain_grads() + out1 = paddle.argsort(x1, axis=-1) out2 = paddle.argsort(x2, axis=0) + out1.retain_grads() + out2.retain_grads() + out1.backward() out2.backward() @@ -1075,6 +1154,7 @@ class TestSundryAPI(unittest.TestCase): w0 = paddle.rand([]) x0.stop_gradient = False y0.stop_gradient = False + y0.retain_grads() out0 = paddle.lerp(x0, y0, w0) out0.backward() @@ -1089,6 +1169,8 @@ class TestSundryAPI(unittest.TestCase): w1 = paddle.rand([]) x1.stop_gradient = False y1.stop_gradient = False + x1.retain_grads() + y1.retain_grads() out1 = paddle.lerp(x1, y1, w1) out1.backward() @@ -1103,6 +1185,8 @@ class TestSundryAPI(unittest.TestCase): w2 = paddle.rand([]) x2.stop_gradient = False y2.stop_gradient = False + x2.retain_grads() + y2.retain_grads() out2 = paddle.lerp(x2, y2, w2) out2.backward() @@ -1120,6 +1204,7 @@ class TestSundryAPI(unittest.TestCase): x = paddle.randn(()) x.stop_gradient = False + x.retain_grads() out = paddle.repeat_interleave(x, 2, None) out.backward() @@ -1145,6 +1230,7 @@ class TestSundryAPI(unittest.TestCase): dtype='float32', stop_gradient=False, ) + logit.retain_grads() label = paddle.to_tensor( [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype='float32' ) @@ -1153,6 +1239,7 @@ class TestSundryAPI(unittest.TestCase): out0 = F.sigmoid_focal_loss(logit, label, normalizer=fg_num_0) out1 = F.sigmoid_focal_loss(logit, label, normalizer=fg_num_1) + out0.retain_grads() np.testing.assert_array_equal( out0.numpy(), @@ -1173,7 +1260,10 @@ class TestSundryAPI(unittest.TestCase): x2 = paddle.full([], 2) x1.stop_gradient = False x2.stop_gradient = False + x1.retain_grads() + x2.retain_grads() out = paddle.where(x1 > x2, x1, x2) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) self.assertEqual(out.numpy(), 2) @@ -1186,9 +1276,12 @@ class TestSundryAPI(unittest.TestCase): def test_atan2(self): x1 = paddle.full([], 0) x2 = paddle.full([], 2) + x1.retain_grads() + x2.retain_grads() x1.stop_gradient = False x2.stop_gradient = False out = paddle.atan2(x1, x2) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) self.assertEqual(out.numpy(), 0)