未验证 提交 3f2f036c 编写于 作者: 姜永久 提交者: GitHub

rm retain grads flag for tests part1 (#49660)

* rm retain grads flag for tests

* modify fill_diagonal

* retain grads for fill_diagonal tests

* reset sum & concat

* fix fill_diagonal
上级 1712e212
...@@ -66,11 +66,9 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase): ...@@ -66,11 +66,9 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.sigmoid_wrapper, [x], y, x_init=x_arr, place=place self.sigmoid_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -521,11 +519,9 @@ class TestPowDoubleGradCheck1(unittest.TestCase): ...@@ -521,11 +519,9 @@ class TestPowDoubleGradCheck1(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.pow_wrapper, [x], y, x_init=x_arr, place=place self.pow_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -552,11 +548,9 @@ class TestPowDoubleGradCheck2(unittest.TestCase): ...@@ -552,11 +548,9 @@ class TestPowDoubleGradCheck2(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.pow_wrapper, [x], y, x_init=x_arr, place=place self.pow_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -584,11 +578,9 @@ class TestSinTripleGradCheck(unittest.TestCase): ...@@ -584,11 +578,9 @@ class TestSinTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.sin_wrapper, [x], y, x_init=x_arr, place=place self.sin_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -615,11 +607,9 @@ class TestPowTripleGradCheck1(unittest.TestCase): ...@@ -615,11 +607,9 @@ class TestPowTripleGradCheck1(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.pow_wrapper, [x], y, x_init=x_arr, place=place self.pow_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -646,11 +636,9 @@ class TestPowTripleGradCheck2(unittest.TestCase): ...@@ -646,11 +636,9 @@ class TestPowTripleGradCheck2(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.pow_wrapper, [x], y, x_init=x_arr, place=place self.pow_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -677,11 +665,9 @@ class TestPowTripleGradCheck3(unittest.TestCase): ...@@ -677,11 +665,9 @@ class TestPowTripleGradCheck3(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.pow_wrapper, [x], y, x_init=x_arr, place=place self.pow_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -709,11 +695,9 @@ class TestCosTripleGradCheck(unittest.TestCase): ...@@ -709,11 +695,9 @@ class TestCosTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.cos_wrapper, [x], y, x_init=x_arr, place=place self.cos_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
......
...@@ -471,7 +471,6 @@ class TestConcatDoubleGradCheck(unittest.TestCase): ...@@ -471,7 +471,6 @@ class TestConcatDoubleGradCheck(unittest.TestCase):
place=place, place=place,
eps=eps, eps=eps,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.concat_wrapper, self.concat_wrapper,
[data1, data2], [data1, data2],
...@@ -513,7 +512,6 @@ class TestConcatTripleGradCheck(unittest.TestCase): ...@@ -513,7 +512,6 @@ class TestConcatTripleGradCheck(unittest.TestCase):
place=place, place=place,
eps=eps, eps=eps,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.concat_wrapper, self.concat_wrapper,
[data1, data2], [data1, data2],
......
...@@ -462,7 +462,6 @@ class TestCumsumDoubleGradCheck(unittest.TestCase): ...@@ -462,7 +462,6 @@ class TestCumsumDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.cumsum_wrapper, [data], out, x_init=[data_arr], place=place self.cumsum_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -494,7 +493,6 @@ class TestCumsumTripleGradCheck(unittest.TestCase): ...@@ -494,7 +493,6 @@ class TestCumsumTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.cumsum_wrapper, [data], out, x_init=[data_arr], place=place self.cumsum_wrapper, [data], out, x_init=[data_arr], place=place
) )
......
...@@ -229,7 +229,6 @@ class TestElementwisePowGradOpInt(unittest.TestCase): ...@@ -229,7 +229,6 @@ class TestElementwisePowGradOpInt(unittest.TestCase):
).astype("int") ).astype("int")
def test_grad(self): def test_grad(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
places = [fluid.CPUPlace()] places = [fluid.CPUPlace()]
if fluid.is_compiled_with_cuda(): if fluid.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0)) places.append(fluid.CUDAPlace(0))
...@@ -240,11 +239,11 @@ class TestElementwisePowGradOpInt(unittest.TestCase): ...@@ -240,11 +239,11 @@ class TestElementwisePowGradOpInt(unittest.TestCase):
x.stop_gradient = False x.stop_gradient = False
y.stop_gradient = False y.stop_gradient = False
res = x**y res = x**y
res.retain_grads()
res.backward() res.backward()
np.testing.assert_array_equal(res.gradient(), self.grad_res) np.testing.assert_array_equal(res.gradient(), self.grad_res)
np.testing.assert_array_equal(x.gradient(), self.grad_x) np.testing.assert_array_equal(x.gradient(), self.grad_x)
np.testing.assert_array_equal(y.gradient(), self.grad_y) np.testing.assert_array_equal(y.gradient(), self.grad_y)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
class TestElementwisePowOpFP16(OpTest): class TestElementwisePowOpFP16(OpTest):
......
...@@ -193,7 +193,6 @@ class TestImperativeAutoPrune(unittest.TestCase): ...@@ -193,7 +193,6 @@ class TestImperativeAutoPrune(unittest.TestCase):
# TODO(jiabin): Support this when we support better split tensor # TODO(jiabin): Support this when we support better split tensor
def test_auto_prune3(self): def test_auto_prune3(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with fluid.dygraph.guard(): with fluid.dygraph.guard():
case3 = AutoPruneLayer3(input_size=784) case3 = AutoPruneLayer3(input_size=784)
value1 = np.arange(784).reshape(1, 784).astype("float32") value1 = np.arange(784).reshape(1, 784).astype("float32")
...@@ -201,13 +200,12 @@ class TestImperativeAutoPrune(unittest.TestCase): ...@@ -201,13 +200,12 @@ class TestImperativeAutoPrune(unittest.TestCase):
v1 = fluid.dygraph.to_variable(value1) v1 = fluid.dygraph.to_variable(value1)
v2 = fluid.dygraph.to_variable(value2) v2 = fluid.dygraph.to_variable(value2)
loss, part2 = case3(v1, v2, 1) loss, part2 = case3(v1, v2, 1)
part2.retain_grads()
loss.backward() loss.backward()
self.assertIsNotNone(case3.linear.weight._grad_ivar()) self.assertIsNotNone(case3.linear.weight._grad_ivar())
self.assertTrue((part2.gradient() == 0).all()) self.assertTrue((part2.gradient() == 0).all())
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_auto_prune4(self): def test_auto_prune4(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with fluid.dygraph.guard(): with fluid.dygraph.guard():
case4 = AutoPruneLayer3(input_size=784) case4 = AutoPruneLayer3(input_size=784)
value1 = np.arange(784).reshape(1, 784).astype("float32") value1 = np.arange(784).reshape(1, 784).astype("float32")
...@@ -215,13 +213,12 @@ class TestImperativeAutoPrune(unittest.TestCase): ...@@ -215,13 +213,12 @@ class TestImperativeAutoPrune(unittest.TestCase):
v1 = fluid.dygraph.to_variable(value1) v1 = fluid.dygraph.to_variable(value1)
v2 = fluid.dygraph.to_variable(value2) v2 = fluid.dygraph.to_variable(value2)
loss, part2 = case4(v1, v2, 1) loss, part2 = case4(v1, v2, 1)
part2.retain_grads()
part2.backward() part2.backward()
self.assertIsNotNone(case4.linear.weight._grad_ivar()) self.assertIsNotNone(case4.linear.weight._grad_ivar())
self.assertTrue((part2.gradient() == 1).all()) self.assertTrue((part2.gradient() == 1).all())
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_auto_prune5(self): def test_auto_prune5(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with fluid.dygraph.guard(): with fluid.dygraph.guard():
case4 = AutoPruneLayer3(input_size=784) case4 = AutoPruneLayer3(input_size=784)
value1 = np.arange(784).reshape(1, 784).astype("float32") value1 = np.arange(784).reshape(1, 784).astype("float32")
...@@ -229,10 +226,10 @@ class TestImperativeAutoPrune(unittest.TestCase): ...@@ -229,10 +226,10 @@ class TestImperativeAutoPrune(unittest.TestCase):
v1 = fluid.dygraph.to_variable(value1) v1 = fluid.dygraph.to_variable(value1)
v2 = fluid.dygraph.to_variable(value2) v2 = fluid.dygraph.to_variable(value2)
loss, part1, part2 = case4(v1, v2, 2) loss, part1, part2 = case4(v1, v2, 2)
part2.retain_grads()
part1.backward() part1.backward()
self.assertIsNotNone(case4.linear.weight._grad_ivar()) self.assertIsNotNone(case4.linear.weight._grad_ivar())
self.assertTrue((part2.gradient() == 0).all()) self.assertTrue((part2.gradient() == 0).all())
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_auto_prune6(self): def test_auto_prune6(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
......
...@@ -40,7 +40,6 @@ class SimpleNet(paddle.nn.Layer): ...@@ -40,7 +40,6 @@ class SimpleNet(paddle.nn.Layer):
class TestSimpleNet(unittest.TestCase): class TestSimpleNet(unittest.TestCase):
def test_selectedrows_gradient1(self): def test_selectedrows_gradient1(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
places = [fluid.CPUPlace()] places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0)) places.append(fluid.CUDAPlace(0))
...@@ -63,6 +62,7 @@ class TestSimpleNet(unittest.TestCase): ...@@ -63,6 +62,7 @@ class TestSimpleNet(unittest.TestCase):
parameter_list=simplenet.parameters(), parameter_list=simplenet.parameters(),
) # grad_clip=grad_clip ) # grad_clip=grad_clip
input_emb, emb = simplenet(input) input_emb, emb = simplenet(input)
input_emb.retain_grads()
self.assertIsNone(emb.weight.gradient()) self.assertIsNone(emb.weight.gradient())
self.assertIsNone(input_emb.gradient()) self.assertIsNone(input_emb.gradient())
...@@ -77,10 +77,8 @@ class TestSimpleNet(unittest.TestCase): ...@@ -77,10 +77,8 @@ class TestSimpleNet(unittest.TestCase):
input_emb.clear_gradient() input_emb.clear_gradient()
self.assertIsNotNone(input_emb.gradient()) self.assertIsNotNone(input_emb.gradient())
paddle.enable_static() paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_selectedrows_gradient2(self): def test_selectedrows_gradient2(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
places = [fluid.CPUPlace()] places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0)) places.append(fluid.CUDAPlace(0))
...@@ -103,6 +101,7 @@ class TestSimpleNet(unittest.TestCase): ...@@ -103,6 +101,7 @@ class TestSimpleNet(unittest.TestCase):
grad_clip=grad_clip, grad_clip=grad_clip,
) )
input_emb, emb = simplenet(input) input_emb, emb = simplenet(input)
input_emb.retain_grads()
self.assertIsNone(emb.weight.gradient()) self.assertIsNone(emb.weight.gradient())
self.assertIsNone(input_emb.gradient()) self.assertIsNone(input_emb.gradient())
...@@ -116,7 +115,6 @@ class TestSimpleNet(unittest.TestCase): ...@@ -116,7 +115,6 @@ class TestSimpleNet(unittest.TestCase):
input_emb.clear_gradient() input_emb.clear_gradient()
self.assertIsNotNone(input_emb.gradient()) self.assertIsNotNone(input_emb.gradient())
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -255,7 +255,6 @@ class TestScaleDoubleGradCheck(unittest.TestCase): ...@@ -255,7 +255,6 @@ class TestScaleDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.scale_wrapper, [data], out, x_init=[data_arr], place=place self.scale_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -287,7 +286,6 @@ class TestScaleTripleGradCheck(unittest.TestCase): ...@@ -287,7 +286,6 @@ class TestScaleTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.scale_wrapper, [data], out, x_init=[data_arr], place=place self.scale_wrapper, [data], out, x_init=[data_arr], place=place
) )
......
...@@ -20,7 +20,6 @@ from functools import reduce ...@@ -20,7 +20,6 @@ from functools import reduce
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
...@@ -1028,7 +1027,6 @@ class TestBackward(unittest.TestCase): ...@@ -1028,7 +1027,6 @@ class TestBackward(unittest.TestCase):
paddle.disable_static() paddle.disable_static()
def func_test_dynamic(self): def func_test_dynamic(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
model = Model() model = Model()
x = paddle.ones([1, 12, 3, 3]).astype("float32") x = paddle.ones([1, 12, 3, 3]).astype("float32")
y = paddle.ones([1, 12, 3, 3]).astype("float32") y = paddle.ones([1, 12, 3, 3]).astype("float32")
...@@ -1037,7 +1035,6 @@ class TestBackward(unittest.TestCase): ...@@ -1037,7 +1035,6 @@ class TestBackward(unittest.TestCase):
self.assertTrue(var.grad.shape == x.grad[0, :, 0, 0].shape) self.assertTrue(var.grad.shape == x.grad[0, :, 0, 0].shape)
self.assertTrue((0 == x.grad[0, :, 0, 0]).all()) self.assertTrue((0 == x.grad[0, :, 0, 0]).all())
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
class TestGradientTruncated(unittest.TestCase): class TestGradientTruncated(unittest.TestCase):
......
...@@ -25,7 +25,6 @@ from paddle.sparse import nn ...@@ -25,7 +25,6 @@ from paddle.sparse import nn
class TestSparseBatchNorm(unittest.TestCase): class TestSparseBatchNorm(unittest.TestCase):
def test(self): def test(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
paddle.seed(0) paddle.seed(0)
channels = 4 channels = 4
shape = [2, 3, 6, 6, channels] shape = [2, 3, 6, 6, channels]
...@@ -41,6 +40,7 @@ class TestSparseBatchNorm(unittest.TestCase): ...@@ -41,6 +40,7 @@ class TestSparseBatchNorm(unittest.TestCase):
dense_x2 = copy.deepcopy(dense_x) dense_x2 = copy.deepcopy(dense_x)
dense_x2.stop_gradient = False dense_x2.stop_gradient = False
sparse_x = dense_x2.to_sparse_coo(sparse_dim) sparse_x = dense_x2.to_sparse_coo(sparse_dim)
sparse_x.retain_grads()
sparse_batch_norm = paddle.sparse.nn.BatchNorm(channels) sparse_batch_norm = paddle.sparse.nn.BatchNorm(channels)
# set same params # set same params
sparse_batch_norm._mean.set_value(batch_norm._mean) sparse_batch_norm._mean.set_value(batch_norm._mean)
...@@ -64,7 +64,6 @@ class TestSparseBatchNorm(unittest.TestCase): ...@@ -64,7 +64,6 @@ class TestSparseBatchNorm(unittest.TestCase):
atol=1e-5, atol=1e-5,
rtol=1e-5, rtol=1e-5,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_error_layout(self): def test_error_layout(self):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
......
...@@ -308,13 +308,13 @@ class TestStackOpWithNegativeShape(unittest.TestCase): ...@@ -308,13 +308,13 @@ class TestStackOpWithNegativeShape(unittest.TestCase):
class TestStackAPI_ZeroDim(unittest.TestCase): class TestStackAPI_ZeroDim(unittest.TestCase):
def test_dygraph(self): def test_dygraph(self):
paddle.disable_static() paddle.disable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
x1 = paddle.rand([]) x1 = paddle.rand([])
x2 = paddle.rand([]) x2 = paddle.rand([])
x1.stop_gradient = False x1.stop_gradient = False
x2.stop_gradient = False x2.stop_gradient = False
out = paddle.stack([x1, x2]) out = paddle.stack([x1, x2])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [2]) self.assertEqual(out.shape, [2])
......
...@@ -618,7 +618,6 @@ class TestAddNDoubleGradCheck(unittest.TestCase): ...@@ -618,7 +618,6 @@ class TestAddNDoubleGradCheck(unittest.TestCase):
place=place, place=place,
eps=eps, eps=eps,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.add_n_wrapper, self.add_n_wrapper,
[data1, data2], [data1, data2],
...@@ -661,7 +660,6 @@ class TestAddNTripleGradCheck(unittest.TestCase): ...@@ -661,7 +660,6 @@ class TestAddNTripleGradCheck(unittest.TestCase):
place=place, place=place,
eps=eps, eps=eps,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.add_n_wrapper, self.add_n_wrapper,
[data1, data2], [data1, data2],
...@@ -697,7 +695,6 @@ class TestSumDoubleGradCheck(unittest.TestCase): ...@@ -697,7 +695,6 @@ class TestSumDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.sum_wrapper, [data], out, x_init=[data_arr], place=place self.sum_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -729,7 +726,6 @@ class TestSumTripleGradCheck(unittest.TestCase): ...@@ -729,7 +726,6 @@ class TestSumTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.sum_wrapper, [data], out, x_init=[data_arr], place=place self.sum_wrapper, [data], out, x_init=[data_arr], place=place
) )
......
...@@ -49,7 +49,6 @@ class TensorFill_Test(unittest.TestCase): ...@@ -49,7 +49,6 @@ class TensorFill_Test(unittest.TestCase):
self.assertEqual((tensor.numpy() == target).all(), True) self.assertEqual((tensor.numpy() == target).all(), True)
def test_tensor_fill_backward(self): def test_tensor_fill_backward(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
typelist = ['float32'] typelist = ['float32']
places = [fluid.CPUPlace()] places = [fluid.CPUPlace()]
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
...@@ -69,12 +68,12 @@ class TensorFill_Test(unittest.TestCase): ...@@ -69,12 +68,12 @@ class TensorFill_Test(unittest.TestCase):
tensor = paddle.to_tensor(np_arr, place=p, dtype=dtype) tensor = paddle.to_tensor(np_arr, place=p, dtype=dtype)
tensor.stop_gradient = False tensor.stop_gradient = False
y = tensor * 2 y = tensor * 2
y.retain_grads()
y.fill_(var) y.fill_(var)
loss = y.sum() loss = y.sum()
loss.backward() loss.backward()
self.assertEqual((y.grad.numpy() == 0).all().item(), True) self.assertEqual((y.grad.numpy() == 0).all().item(), True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_errors(self): def test_errors(self):
def test_list(): def test_list():
......
...@@ -29,7 +29,6 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -29,7 +29,6 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
self.places.append(fluid.CUDAPlace(0)) self.places.append(fluid.CUDAPlace(0))
def test_dim2(self): def test_dim2(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array( expected_np = np.array(
[[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2]] [[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2]]
).astype('float32') ).astype('float32')
...@@ -48,6 +47,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -48,6 +47,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
x = paddle.ones((4, 3), dtype=dtype) x = paddle.ones((4, 3), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
ny = y.fill_diagonal_tensor(v, offset=0, dim1=0, dim2=1) ny = y.fill_diagonal_tensor(v, offset=0, dim1=0, dim2=1)
loss = ny.sum() loss = ny.sum()
loss.backward() loss.backward()
...@@ -59,10 +59,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -59,10 +59,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_dim2_offset_1(self): def test_dim2_offset_1(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array( expected_np = np.array(
[[2, 2, 2], [1, 2, 2], [2, 1, 2], [2, 2, 1]] [[2, 2, 2], [1, 2, 2], [2, 1, 2], [2, 2, 1]]
).astype('float32') ).astype('float32')
...@@ -81,6 +79,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -81,6 +79,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
x = paddle.ones((4, 3), dtype=dtype) x = paddle.ones((4, 3), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
ny = y.fill_diagonal_tensor(v, offset=-1, dim1=0, dim2=1) ny = y.fill_diagonal_tensor(v, offset=-1, dim1=0, dim2=1)
loss = ny.sum() loss = ny.sum()
loss.backward() loss.backward()
...@@ -92,10 +91,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -92,10 +91,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_dim2_offset1(self): def test_dim2_offset1(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array( expected_np = np.array(
[[2, 1, 2], [2, 2, 1], [2, 2, 2], [2, 2, 2]] [[2, 1, 2], [2, 2, 1], [2, 2, 2], [2, 2, 2]]
).astype('float32') ).astype('float32')
...@@ -114,6 +111,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -114,6 +111,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
x = paddle.ones((4, 3), dtype=dtype) x = paddle.ones((4, 3), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
ny = y.fill_diagonal_tensor(v, offset=1, dim1=0, dim2=1) ny = y.fill_diagonal_tensor(v, offset=1, dim1=0, dim2=1)
loss = ny.sum() loss = ny.sum()
loss.backward() loss.backward()
...@@ -125,10 +123,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -125,10 +123,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_dim4(self): def test_dim4(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array( expected_np = np.array(
[ [
[ [
...@@ -175,6 +171,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -175,6 +171,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
x = paddle.ones((2, 4, 3, 2), dtype=dtype) x = paddle.ones((2, 4, 3, 2), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
ny = y.fill_diagonal_tensor(v, offset=0, dim1=1, dim2=2) ny = y.fill_diagonal_tensor(v, offset=0, dim1=1, dim2=2)
loss = ny.sum() loss = ny.sum()
loss.backward() loss.backward()
...@@ -186,10 +183,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -186,10 +183,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_largedim(self): def test_largedim(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
if len(self.places) > 1: if len(self.places) > 1:
bsdim = 1024 bsdim = 1024
fsdim = 128 fsdim = 128
...@@ -201,6 +196,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -201,6 +196,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
y = paddle.ones((bsdim, fsdim, fsdim), dtype=dtype) y = paddle.ones((bsdim, fsdim, fsdim), dtype=dtype)
y.stop_gradient = False y.stop_gradient = False
y = y * 2 y = y * 2
y.retain_grads()
ny = y.fill_diagonal_tensor(v, offset=0, dim1=1, dim2=2) ny = y.fill_diagonal_tensor(v, offset=0, dim1=1, dim2=2)
loss = ny.sum() loss = ny.sum()
loss.backward() loss.backward()
...@@ -212,7 +208,6 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -212,7 +208,6 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
self.assertEqual((ny == expected_pred).all(), True) self.assertEqual((ny == expected_pred).all(), True)
self.assertEqual((y.grad == expected_grad).all(), True) self.assertEqual((y.grad == expected_grad).all(), True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -29,7 +29,6 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -29,7 +29,6 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
self.places.append(fluid.CUDAPlace(0)) self.places.append(fluid.CUDAPlace(0))
def test_dim2(self): def test_dim2(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array( expected_np = np.array(
[[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2]] [[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2]]
).astype('float32') ).astype('float32')
...@@ -48,6 +47,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -48,6 +47,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
x = paddle.ones((4, 3), dtype=dtype) x = paddle.ones((4, 3), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
y.fill_diagonal_tensor_(v, offset=0, dim1=0, dim2=1) y.fill_diagonal_tensor_(v, offset=0, dim1=0, dim2=1)
loss = y.sum() loss = y.sum()
loss.backward() loss.backward()
...@@ -59,10 +59,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -59,10 +59,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_dim2_offset_1(self): def test_dim2_offset_1(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array( expected_np = np.array(
[[2, 2, 2], [1, 2, 2], [2, 1, 2], [2, 2, 1]] [[2, 2, 2], [1, 2, 2], [2, 1, 2], [2, 2, 1]]
).astype('float32') ).astype('float32')
...@@ -81,6 +79,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -81,6 +79,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
x = paddle.ones((4, 3), dtype=dtype) x = paddle.ones((4, 3), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
y.fill_diagonal_tensor_(v, offset=-1, dim1=0, dim2=1) y.fill_diagonal_tensor_(v, offset=-1, dim1=0, dim2=1)
loss = y.sum() loss = y.sum()
loss.backward() loss.backward()
...@@ -92,10 +91,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -92,10 +91,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_dim2_offset1(self): def test_dim2_offset1(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array( expected_np = np.array(
[[2, 1, 2], [2, 2, 1], [2, 2, 2], [2, 2, 2]] [[2, 1, 2], [2, 2, 1], [2, 2, 2], [2, 2, 2]]
).astype('float32') ).astype('float32')
...@@ -114,6 +111,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -114,6 +111,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
x = paddle.ones((4, 3), dtype=dtype) x = paddle.ones((4, 3), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
y.fill_diagonal_tensor_(v, offset=1, dim1=0, dim2=1) y.fill_diagonal_tensor_(v, offset=1, dim1=0, dim2=1)
loss = y.sum() loss = y.sum()
loss.backward() loss.backward()
...@@ -125,10 +123,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -125,10 +123,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_dim4(self): def test_dim4(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array( expected_np = np.array(
[ [
[ [
...@@ -175,6 +171,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -175,6 +171,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
x = paddle.ones((2, 4, 3, 2), dtype=dtype) x = paddle.ones((2, 4, 3, 2), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
y.fill_diagonal_tensor_(v, offset=0, dim1=1, dim2=2) y.fill_diagonal_tensor_(v, offset=0, dim1=1, dim2=2)
loss = y.sum() loss = y.sum()
loss.backward() loss.backward()
...@@ -186,10 +183,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -186,10 +183,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_largedim(self): def test_largedim(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
# large dim only test on gpu because the cpu version is too slow for ci test, and the memory is limited # large dim only test on gpu because the cpu version is too slow for ci test, and the memory is limited
if len(self.places) > 1: if len(self.places) > 1:
bsdim = 1024 bsdim = 1024
...@@ -202,6 +197,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -202,6 +197,7 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
y = paddle.ones((bsdim, fsdim, fsdim), dtype=dtype) y = paddle.ones((bsdim, fsdim, fsdim), dtype=dtype)
y.stop_gradient = False y.stop_gradient = False
y = y * 2 y = y * 2
y.retain_grads()
y.fill_diagonal_tensor_(v, offset=0, dim1=1, dim2=2) y.fill_diagonal_tensor_(v, offset=0, dim1=1, dim2=2)
loss = y.sum() loss = y.sum()
loss.backward() loss.backward()
...@@ -213,7 +209,6 @@ class TensorFillDiagTensor_Test(unittest.TestCase): ...@@ -213,7 +209,6 @@ class TensorFillDiagTensor_Test(unittest.TestCase):
self.assertEqual((y == expected_pred).all(), True) self.assertEqual((y == expected_pred).all(), True)
self.assertEqual((y.grad == expected_grad).all(), True) self.assertEqual((y.grad == expected_grad).all(), True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -292,24 +292,26 @@ class TestUnsqueezeInplaceAPI(TestUnsqueezeAPI): ...@@ -292,24 +292,26 @@ class TestUnsqueezeInplaceAPI(TestUnsqueezeAPI):
class TestUnsqueezeAPI_ZeroDim(unittest.TestCase): class TestUnsqueezeAPI_ZeroDim(unittest.TestCase):
def test_dygraph(self): def test_dygraph(self):
paddle.disable_static() paddle.disable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
out = paddle.unsqueeze(x, [-1]) out = paddle.unsqueeze(x, [-1])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [1]) self.assertEqual(out.shape, [1])
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.grad.shape, [1]) self.assertEqual(out.grad.shape, [1])
out = paddle.unsqueeze(x, [-1, 1]) out = paddle.unsqueeze(x, [-1, 1])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [1, 1]) self.assertEqual(out.shape, [1, 1])
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.grad.shape, [1, 1]) self.assertEqual(out.grad.shape, [1, 1])
out = paddle.unsqueeze(x, [0, 1, 2]) out = paddle.unsqueeze(x, [0, 1, 2])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [1, 1, 1]) self.assertEqual(out.shape, [1, 1, 1])
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册