From 3f2f036c84c9ea88480e816b6cb99cefd3a0522f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E6=B0=B8=E4=B9=85?= <34344716+yjjiang11@users.noreply.github.com> Date: Tue, 10 Jan 2023 16:53:41 +0800 Subject: [PATCH] rm retain grads flag for tests part1 (#49660) * rm retain grads flag for tests * modify fill_diagonal * retain grads for fill_diagonal tests * reset sum & concat * fix fill_diagonal --- .../tests/unittests/test_activation_nn_grad.py | 16 ---------------- .../fluid/tests/unittests/test_concat_op.py | 2 -- .../fluid/tests/unittests/test_cumsum_op.py | 2 -- .../tests/unittests/test_elementwise_pow_op.py | 3 +-- .../unittests/test_imperative_auto_prune.py | 9 +++------ .../unittests/test_imperative_selected_rows.py | 6 ++---- .../fluid/tests/unittests/test_scale_op.py | 2 -- .../fluid/tests/unittests/test_set_value_op.py | 3 --- .../fluid/tests/unittests/test_sparse_norm_op.py | 3 +-- .../fluid/tests/unittests/test_stack_op.py | 2 +- .../paddle/fluid/tests/unittests/test_sum_op.py | 4 ---- .../fluid/tests/unittests/test_tensor_fill_.py | 3 +-- .../test_tensor_fill_diagonal_tensor.py | 15 +++++---------- .../test_tensor_fill_diagonal_tensor_.py | 15 +++++---------- .../fluid/tests/unittests/test_unsqueeze2_op.py | 4 +++- 15 files changed, 22 insertions(+), 67 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index 4e4d6a08bc..5026ae9fc9 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -66,11 +66,9 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.sigmoid_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -521,11 +519,9 @@ class TestPowDoubleGradCheck1(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.pow_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -552,11 +548,9 @@ class TestPowDoubleGradCheck2(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.pow_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -584,11 +578,9 @@ class TestSinTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.sin_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -615,11 +607,9 @@ class TestPowTripleGradCheck1(unittest.TestCase): gradient_checker.triple_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.pow_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -646,11 +636,9 @@ class TestPowTripleGradCheck2(unittest.TestCase): gradient_checker.triple_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.pow_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -677,11 +665,9 @@ class TestPowTripleGradCheck3(unittest.TestCase): gradient_checker.triple_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.pow_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() @@ -709,11 +695,9 @@ class TestCosTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [x], y, x_init=x_arr, place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.cos_wrapper, [x], y, x_init=x_arr, place=place ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_grad(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py index 8d5d24f324..d20b3e92c4 100644 --- a/python/paddle/fluid/tests/unittests/test_concat_op.py +++ b/python/paddle/fluid/tests/unittests/test_concat_op.py @@ -471,7 +471,6 @@ class TestConcatDoubleGradCheck(unittest.TestCase): place=place, eps=eps, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.concat_wrapper, [data1, data2], @@ -513,7 +512,6 @@ class TestConcatTripleGradCheck(unittest.TestCase): place=place, eps=eps, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.concat_wrapper, [data1, data2], diff --git a/python/paddle/fluid/tests/unittests/test_cumsum_op.py b/python/paddle/fluid/tests/unittests/test_cumsum_op.py index 4675ac9f52..4b0cae035b 100644 --- a/python/paddle/fluid/tests/unittests/test_cumsum_op.py +++ b/python/paddle/fluid/tests/unittests/test_cumsum_op.py @@ -462,7 +462,6 @@ class TestCumsumDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.cumsum_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -494,7 +493,6 @@ class TestCumsumTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.cumsum_wrapper, [data], out, x_init=[data_arr], place=place ) diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py index 6ff9c7961c..12a42b780d 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py @@ -229,7 +229,6 @@ class TestElementwisePowGradOpInt(unittest.TestCase): ).astype("int") def test_grad(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) places = [fluid.CPUPlace()] if fluid.is_compiled_with_cuda(): places.append(fluid.CUDAPlace(0)) @@ -240,11 +239,11 @@ class TestElementwisePowGradOpInt(unittest.TestCase): x.stop_gradient = False y.stop_gradient = False res = x**y + res.retain_grads() res.backward() np.testing.assert_array_equal(res.gradient(), self.grad_res) np.testing.assert_array_equal(x.gradient(), self.grad_x) np.testing.assert_array_equal(y.gradient(), self.grad_y) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) class TestElementwisePowOpFP16(OpTest): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py index 679a141fc5..0637675f9b 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py @@ -193,7 +193,6 @@ class TestImperativeAutoPrune(unittest.TestCase): # TODO(jiabin): Support this when we support better split tensor def test_auto_prune3(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with fluid.dygraph.guard(): case3 = AutoPruneLayer3(input_size=784) value1 = np.arange(784).reshape(1, 784).astype("float32") @@ -201,13 +200,12 @@ class TestImperativeAutoPrune(unittest.TestCase): v1 = fluid.dygraph.to_variable(value1) v2 = fluid.dygraph.to_variable(value2) loss, part2 = case3(v1, v2, 1) + part2.retain_grads() loss.backward() self.assertIsNotNone(case3.linear.weight._grad_ivar()) self.assertTrue((part2.gradient() == 0).all()) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_auto_prune4(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with fluid.dygraph.guard(): case4 = AutoPruneLayer3(input_size=784) value1 = np.arange(784).reshape(1, 784).astype("float32") @@ -215,13 +213,12 @@ class TestImperativeAutoPrune(unittest.TestCase): v1 = fluid.dygraph.to_variable(value1) v2 = fluid.dygraph.to_variable(value2) loss, part2 = case4(v1, v2, 1) + part2.retain_grads() part2.backward() self.assertIsNotNone(case4.linear.weight._grad_ivar()) self.assertTrue((part2.gradient() == 1).all()) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_auto_prune5(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with fluid.dygraph.guard(): case4 = AutoPruneLayer3(input_size=784) value1 = np.arange(784).reshape(1, 784).astype("float32") @@ -229,10 +226,10 @@ class TestImperativeAutoPrune(unittest.TestCase): v1 = fluid.dygraph.to_variable(value1) v2 = fluid.dygraph.to_variable(value2) loss, part1, part2 = case4(v1, v2, 2) + part2.retain_grads() part1.backward() self.assertIsNotNone(case4.linear.weight._grad_ivar()) self.assertTrue((part2.gradient() == 0).all()) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_auto_prune6(self): with fluid.dygraph.guard(): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py index 5cc7f63eb7..230d4c8ac6 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py @@ -40,7 +40,6 @@ class SimpleNet(paddle.nn.Layer): class TestSimpleNet(unittest.TestCase): def test_selectedrows_gradient1(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): places.append(fluid.CUDAPlace(0)) @@ -63,6 +62,7 @@ class TestSimpleNet(unittest.TestCase): parameter_list=simplenet.parameters(), ) # grad_clip=grad_clip input_emb, emb = simplenet(input) + input_emb.retain_grads() self.assertIsNone(emb.weight.gradient()) self.assertIsNone(input_emb.gradient()) @@ -77,10 +77,8 @@ class TestSimpleNet(unittest.TestCase): input_emb.clear_gradient() self.assertIsNotNone(input_emb.gradient()) paddle.enable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_selectedrows_gradient2(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): places.append(fluid.CUDAPlace(0)) @@ -103,6 +101,7 @@ class TestSimpleNet(unittest.TestCase): grad_clip=grad_clip, ) input_emb, emb = simplenet(input) + input_emb.retain_grads() self.assertIsNone(emb.weight.gradient()) self.assertIsNone(input_emb.gradient()) @@ -116,7 +115,6 @@ class TestSimpleNet(unittest.TestCase): input_emb.clear_gradient() self.assertIsNotNone(input_emb.gradient()) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_scale_op.py b/python/paddle/fluid/tests/unittests/test_scale_op.py index 06e8846a4b..3fc7aedf02 100644 --- a/python/paddle/fluid/tests/unittests/test_scale_op.py +++ b/python/paddle/fluid/tests/unittests/test_scale_op.py @@ -255,7 +255,6 @@ class TestScaleDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.scale_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -287,7 +286,6 @@ class TestScaleTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.scale_wrapper, [data], out, x_init=[data_arr], place=place ) diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py index e3b9bc45cf..1cab13d20f 100644 --- a/python/paddle/fluid/tests/unittests/test_set_value_op.py +++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py @@ -20,7 +20,6 @@ from functools import reduce import numpy as np import paddle -import paddle.fluid as fluid from paddle.fluid.layer_helper import LayerHelper @@ -1028,7 +1027,6 @@ class TestBackward(unittest.TestCase): paddle.disable_static() def func_test_dynamic(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) model = Model() x = paddle.ones([1, 12, 3, 3]).astype("float32") y = paddle.ones([1, 12, 3, 3]).astype("float32") @@ -1037,7 +1035,6 @@ class TestBackward(unittest.TestCase): self.assertTrue(var.grad.shape == x.grad[0, :, 0, 0].shape) self.assertTrue((0 == x.grad[0, :, 0, 0]).all()) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) class TestGradientTruncated(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py b/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py index 4620f7ad46..66dd19cb13 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py @@ -25,7 +25,6 @@ from paddle.sparse import nn class TestSparseBatchNorm(unittest.TestCase): def test(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) paddle.seed(0) channels = 4 shape = [2, 3, 6, 6, channels] @@ -41,6 +40,7 @@ class TestSparseBatchNorm(unittest.TestCase): dense_x2 = copy.deepcopy(dense_x) dense_x2.stop_gradient = False sparse_x = dense_x2.to_sparse_coo(sparse_dim) + sparse_x.retain_grads() sparse_batch_norm = paddle.sparse.nn.BatchNorm(channels) # set same params sparse_batch_norm._mean.set_value(batch_norm._mean) @@ -64,7 +64,6 @@ class TestSparseBatchNorm(unittest.TestCase): atol=1e-5, rtol=1e-5, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_error_layout(self): with self.assertRaises(ValueError): diff --git a/python/paddle/fluid/tests/unittests/test_stack_op.py b/python/paddle/fluid/tests/unittests/test_stack_op.py index ce21cfa424..9e84268a5f 100644 --- a/python/paddle/fluid/tests/unittests/test_stack_op.py +++ b/python/paddle/fluid/tests/unittests/test_stack_op.py @@ -308,13 +308,13 @@ class TestStackOpWithNegativeShape(unittest.TestCase): class TestStackAPI_ZeroDim(unittest.TestCase): def test_dygraph(self): paddle.disable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) x1 = paddle.rand([]) x2 = paddle.rand([]) x1.stop_gradient = False x2.stop_gradient = False out = paddle.stack([x1, x2]) + out.retain_grads() out.backward() self.assertEqual(out.shape, [2]) diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py index 4e750d4b86..22b9dc573f 100644 --- a/python/paddle/fluid/tests/unittests/test_sum_op.py +++ b/python/paddle/fluid/tests/unittests/test_sum_op.py @@ -618,7 +618,6 @@ class TestAddNDoubleGradCheck(unittest.TestCase): place=place, eps=eps, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.add_n_wrapper, [data1, data2], @@ -661,7 +660,6 @@ class TestAddNTripleGradCheck(unittest.TestCase): place=place, eps=eps, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.add_n_wrapper, [data1, data2], @@ -697,7 +695,6 @@ class TestSumDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.sum_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -729,7 +726,6 @@ class TestSumTripleGradCheck(unittest.TestCase): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.sum_wrapper, [data], out, x_init=[data_arr], place=place ) diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_.py index 37e45e69b9..08d9a6370f 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_.py @@ -49,7 +49,6 @@ class TensorFill_Test(unittest.TestCase): self.assertEqual((tensor.numpy() == target).all(), True) def test_tensor_fill_backward(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) typelist = ['float32'] places = [fluid.CPUPlace()] if fluid.core.is_compiled_with_cuda(): @@ -69,12 +68,12 @@ class TensorFill_Test(unittest.TestCase): tensor = paddle.to_tensor(np_arr, place=p, dtype=dtype) tensor.stop_gradient = False y = tensor * 2 + y.retain_grads() y.fill_(var) loss = y.sum() loss.backward() self.assertEqual((y.grad.numpy() == 0).all().item(), True) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_errors(self): def test_list(): diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py index 0f375cc0ae..8da207f203 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py @@ -29,7 +29,6 @@ class TensorFillDiagTensor_Test(unittest.TestCase): self.places.append(fluid.CUDAPlace(0)) def test_dim2(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2]] ).astype('float32') @@ -48,6 +47,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): x = paddle.ones((4, 3), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() ny = y.fill_diagonal_tensor(v, offset=0, dim1=0, dim2=1) loss = ny.sum() loss.backward() @@ -59,10 +59,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_dim2_offset_1(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [[2, 2, 2], [1, 2, 2], [2, 1, 2], [2, 2, 1]] ).astype('float32') @@ -81,6 +79,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): x = paddle.ones((4, 3), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() ny = y.fill_diagonal_tensor(v, offset=-1, dim1=0, dim2=1) loss = ny.sum() loss.backward() @@ -92,10 +91,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_dim2_offset1(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [[2, 1, 2], [2, 2, 1], [2, 2, 2], [2, 2, 2]] ).astype('float32') @@ -114,6 +111,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): x = paddle.ones((4, 3), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() ny = y.fill_diagonal_tensor(v, offset=1, dim1=0, dim2=1) loss = ny.sum() loss.backward() @@ -125,10 +123,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_dim4(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [ [ @@ -175,6 +171,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): x = paddle.ones((2, 4, 3, 2), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() ny = y.fill_diagonal_tensor(v, offset=0, dim1=1, dim2=2) loss = ny.sum() loss.backward() @@ -186,10 +183,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_largedim(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) if len(self.places) > 1: bsdim = 1024 fsdim = 128 @@ -201,6 +196,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): y = paddle.ones((bsdim, fsdim, fsdim), dtype=dtype) y.stop_gradient = False y = y * 2 + y.retain_grads() ny = y.fill_diagonal_tensor(v, offset=0, dim1=1, dim2=2) loss = ny.sum() loss.backward() @@ -212,7 +208,6 @@ class TensorFillDiagTensor_Test(unittest.TestCase): self.assertEqual((ny == expected_pred).all(), True) self.assertEqual((y.grad == expected_grad).all(), True) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py index d3c5fc15ed..c16c9f23ff 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py @@ -29,7 +29,6 @@ class TensorFillDiagTensor_Test(unittest.TestCase): self.places.append(fluid.CUDAPlace(0)) def test_dim2(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2]] ).astype('float32') @@ -48,6 +47,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): x = paddle.ones((4, 3), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() y.fill_diagonal_tensor_(v, offset=0, dim1=0, dim2=1) loss = y.sum() loss.backward() @@ -59,10 +59,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_dim2_offset_1(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [[2, 2, 2], [1, 2, 2], [2, 1, 2], [2, 2, 1]] ).astype('float32') @@ -81,6 +79,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): x = paddle.ones((4, 3), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() y.fill_diagonal_tensor_(v, offset=-1, dim1=0, dim2=1) loss = y.sum() loss.backward() @@ -92,10 +91,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_dim2_offset1(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [[2, 1, 2], [2, 2, 1], [2, 2, 2], [2, 2, 2]] ).astype('float32') @@ -114,6 +111,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): x = paddle.ones((4, 3), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() y.fill_diagonal_tensor_(v, offset=1, dim1=0, dim2=1) loss = y.sum() loss.backward() @@ -125,10 +123,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_dim4(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array( [ [ @@ -175,6 +171,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): x = paddle.ones((2, 4, 3, 2), dtype=dtype) x.stop_gradient = False y = x * 2 + y.retain_grads() y.fill_diagonal_tensor_(v, offset=0, dim1=1, dim2=2) loss = y.sum() loss.backward() @@ -186,10 +183,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): (y.grad.numpy().astype('float32') == expected_grad).all(), True, ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def test_largedim(self): - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) # large dim only test on gpu because the cpu version is too slow for ci test, and the memory is limited if len(self.places) > 1: bsdim = 1024 @@ -202,6 +197,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): y = paddle.ones((bsdim, fsdim, fsdim), dtype=dtype) y.stop_gradient = False y = y * 2 + y.retain_grads() y.fill_diagonal_tensor_(v, offset=0, dim1=1, dim2=2) loss = y.sum() loss.backward() @@ -213,7 +209,6 @@ class TensorFillDiagTensor_Test(unittest.TestCase): self.assertEqual((y == expected_pred).all(), True) self.assertEqual((y.grad == expected_grad).all(), True) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py b/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py index a4854477ff..298901ece7 100755 --- a/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py +++ b/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py @@ -292,24 +292,26 @@ class TestUnsqueezeInplaceAPI(TestUnsqueezeAPI): class TestUnsqueezeAPI_ZeroDim(unittest.TestCase): def test_dygraph(self): paddle.disable_static() - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) x = paddle.rand([]) x.stop_gradient = False out = paddle.unsqueeze(x, [-1]) + out.retain_grads() out.backward() self.assertEqual(out.shape, [1]) self.assertEqual(x.grad.shape, []) self.assertEqual(out.grad.shape, [1]) out = paddle.unsqueeze(x, [-1, 1]) + out.retain_grads() out.backward() self.assertEqual(out.shape, [1, 1]) self.assertEqual(x.grad.shape, []) self.assertEqual(out.grad.shape, [1, 1]) out = paddle.unsqueeze(x, [0, 1, 2]) + out.retain_grads() out.backward() self.assertEqual(out.shape, [1, 1, 1]) self.assertEqual(x.grad.shape, []) -- GitLab