未验证 提交 a504508c 编写于 作者: 姜永久 提交者: GitHub

rm retain_grad_flag for tests part0 (#49655)

* rm retain_grad_flag for tests

* modify transpose op

* retain grads for xpu tests

* lint

* modify xpu test
上级 0cae5c7f
...@@ -22,7 +22,6 @@ import utils ...@@ -22,7 +22,6 @@ import utils
from utils import matmul, mul, nested, o2, reduce, reduce_dim from utils import matmul, mul, nested, o2, reduce, reduce_dim
import paddle import paddle
import paddle.fluid as fluid
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle.incubate.autograd.utils import as_tensors from paddle.incubate.autograd.utils import as_tensors
...@@ -553,8 +552,6 @@ class TestHessianNoBatch(unittest.TestCase): ...@@ -553,8 +552,6 @@ class TestHessianNoBatch(unittest.TestCase):
) )
def func_create_graph_true(self): def func_create_graph_true(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def func(x): def func(x):
return paddle.sum(F.sigmoid(x)) return paddle.sum(F.sigmoid(x))
...@@ -568,7 +565,6 @@ class TestHessianNoBatch(unittest.TestCase): ...@@ -568,7 +565,6 @@ class TestHessianNoBatch(unittest.TestCase):
np.testing.assert_allclose( np.testing.assert_allclose(
hessian[:].numpy(), numerical_hessian, self.rtol, self.atol hessian[:].numpy(), numerical_hessian, self.rtol, self.atol
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def func_out_not_single(self): def func_out_not_single(self):
def func(x): def func(x):
......
...@@ -150,7 +150,6 @@ class TestCastDoubleGradCheck(unittest.TestCase): ...@@ -150,7 +150,6 @@ class TestCastDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.cast_wrapper, [data], out, x_init=[data_arr], place=place self.cast_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -182,7 +181,6 @@ class TestCastTripleGradCheck(unittest.TestCase): ...@@ -182,7 +181,6 @@ class TestCastTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.cast_wrapper, [data], out, x_init=[data_arr], place=place self.cast_wrapper, [data], out, x_init=[data_arr], place=place
) )
......
...@@ -106,6 +106,7 @@ class Test_Detach(unittest.TestCase): ...@@ -106,6 +106,7 @@ class Test_Detach(unittest.TestCase):
) )
data = to_variable(data) data = to_variable(data)
x = linear(data) x = linear(data)
x.retain_grads()
x1 = linear1(x) x1 = linear1(x)
loss = x1 loss = x1
# print(loss, loss.shape) # print(loss, loss.shape)
...@@ -153,6 +154,7 @@ class Test_Detach(unittest.TestCase): ...@@ -153,6 +154,7 @@ class Test_Detach(unittest.TestCase):
) )
data = to_variable(data) data = to_variable(data)
x = linear(data) x = linear(data)
x.retain_grads()
x_detach = x.detach() x_detach = x.detach()
x1 = linear1(x) x1 = linear1(x)
x2 = linear2(x_detach) x2 = linear2(x_detach)
...@@ -162,12 +164,10 @@ class Test_Detach(unittest.TestCase): ...@@ -162,12 +164,10 @@ class Test_Detach(unittest.TestCase):
return x.gradient() return x.gradient()
def test_NoDetachMulti_DetachMulti(self): def test_NoDetachMulti_DetachMulti(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
array_no_detach_multi = self.no_detach_multi() array_no_detach_multi = self.no_detach_multi()
array_detach_multi = self.detach_multi() array_detach_multi = self.detach_multi()
assert not np.array_equal(array_no_detach_multi, array_detach_multi) assert not np.array_equal(array_no_detach_multi, array_detach_multi)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_NoDetachSingle_DetachMulti(self): def test_NoDetachSingle_DetachMulti(self):
array_no_detach_single = self.no_detach_single() array_no_detach_single = self.no_detach_single()
......
...@@ -359,7 +359,6 @@ class TestElementwiseMulTripleGradCheck(unittest.TestCase): ...@@ -359,7 +359,6 @@ class TestElementwiseMulTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.multiply_wrapper, self.multiply_wrapper,
[x, y], [x, y],
...@@ -367,7 +366,6 @@ class TestElementwiseMulTripleGradCheck(unittest.TestCase): ...@@ -367,7 +366,6 @@ class TestElementwiseMulTripleGradCheck(unittest.TestCase):
x_init=[x_arr, y_arr], x_init=[x_arr, y_arr],
place=place, place=place,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
......
...@@ -281,7 +281,6 @@ class TestExpandDoubleGradCheck(unittest.TestCase): ...@@ -281,7 +281,6 @@ class TestExpandDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.expand_wrapper, [data], out, x_init=[data_arr], place=place self.expand_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -313,7 +312,6 @@ class TestExpandTripleGradCheck(unittest.TestCase): ...@@ -313,7 +312,6 @@ class TestExpandTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.expand_wrapper, [data], out, x_init=[data_arr], place=place self.expand_wrapper, [data], out, x_init=[data_arr], place=place
) )
......
...@@ -40,7 +40,6 @@ class TestRecurrentFeed(unittest.TestCase): ...@@ -40,7 +40,6 @@ class TestRecurrentFeed(unittest.TestCase):
original_np1 = np.arange(1, 5).reshape(2, 2).astype("float32") original_np1 = np.arange(1, 5).reshape(2, 2).astype("float32")
original_np2 = np.arange(5, 9).reshape(2, 2).astype("float32") original_np2 = np.arange(5, 9).reshape(2, 2).astype("float32")
with fluid.dygraph.guard(): with fluid.dygraph.guard():
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
original_in1 = to_variable(original_np1) original_in1 = to_variable(original_np1)
...@@ -51,16 +50,15 @@ class TestRecurrentFeed(unittest.TestCase): ...@@ -51,16 +50,15 @@ class TestRecurrentFeed(unittest.TestCase):
for i in range(3): for i in range(3):
sum_out, out = rt(original_in1, original_in2) sum_out, out = rt(original_in1, original_in2)
out.retain_grads()
original_in1 = out original_in1 = out
sum_out_value = sum_out.numpy() sum_out_value = sum_out.numpy()
sum_out.backward() sum_out.backward()
dyout = out.gradient() dyout = out.gradient()
original_in1.stop_gradient = True original_in1.stop_gradient = True
rt.clear_gradients() rt.clear_gradients()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
with fluid.dygraph.guard(): with fluid.dygraph.guard():
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
original_in1 = to_variable(original_np1) original_in1 = to_variable(original_np1)
...@@ -71,13 +69,13 @@ class TestRecurrentFeed(unittest.TestCase): ...@@ -71,13 +69,13 @@ class TestRecurrentFeed(unittest.TestCase):
for i in range(3): for i in range(3):
sum_out, out = rt(original_in1, original_in2) sum_out, out = rt(original_in1, original_in2)
out.retain_grads()
original_in1 = out original_in1 = out
eager_sum_out_value = sum_out.numpy() eager_sum_out_value = sum_out.numpy()
sum_out.backward() sum_out.backward()
eager_dyout = out.gradient() eager_dyout = out.gradient()
original_in1.stop_gradient = True original_in1.stop_gradient = True
rt.clear_gradients() rt.clear_gradients()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
......
...@@ -450,7 +450,6 @@ class TestMeanDoubleGradCheck(unittest.TestCase): ...@@ -450,7 +450,6 @@ class TestMeanDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.mean_wrapper, [data], out, x_init=[data_arr], place=place self.mean_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -482,7 +481,6 @@ class TestMeanTripleGradCheck(unittest.TestCase): ...@@ -482,7 +481,6 @@ class TestMeanTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.mean_wrapper, [data], out, x_init=[data_arr], place=place self.mean_wrapper, [data], out, x_init=[data_arr], place=place
) )
......
...@@ -110,7 +110,6 @@ class TestSignDoubleGradCheck(unittest.TestCase): ...@@ -110,7 +110,6 @@ class TestSignDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.sign_wrapper, [data], out, x_init=[data_arr], place=place self.sign_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -142,7 +141,6 @@ class TestSignTripleGradCheck(unittest.TestCase): ...@@ -142,7 +141,6 @@ class TestSignTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.sign_wrapper, [data], out, x_init=[data_arr], place=place self.sign_wrapper, [data], out, x_init=[data_arr], place=place
) )
......
...@@ -491,11 +491,12 @@ class TestSoftmaxAPI(unittest.TestCase): ...@@ -491,11 +491,12 @@ class TestSoftmaxAPI(unittest.TestCase):
class TestSoftmaxAPI_ZeroDim(unittest.TestCase): class TestSoftmaxAPI_ZeroDim(unittest.TestCase):
def test_dygraph(self): def test_dygraph(self):
paddle.disable_static() paddle.disable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.nn.functional.softmax(x) out = paddle.nn.functional.softmax(x)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.shape, []) self.assertEqual(x.shape, [])
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
......
...@@ -236,7 +236,6 @@ class TestSqueezeDoubleGradCheck(unittest.TestCase): ...@@ -236,7 +236,6 @@ class TestSqueezeDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.squeeze_wrapper, [data], out, x_init=[data_arr], place=place self.squeeze_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -268,7 +267,6 @@ class TestSqueezeTripleGradCheck(unittest.TestCase): ...@@ -268,7 +267,6 @@ class TestSqueezeTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.squeeze_wrapper, [data], out, x_init=[data_arr], place=place self.squeeze_wrapper, [data], out, x_init=[data_arr], place=place
) )
......
...@@ -66,8 +66,6 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -66,8 +66,6 @@ class TestTensorRegisterHook(unittest.TestCase):
self.devices.append("gpu") self.devices.append("gpu")
def test_hook_for_interior_var(self): def test_hook_for_interior_var(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def run_double_hook_for_interior_var(double_hook, removed=False): def run_double_hook_for_interior_var(double_hook, removed=False):
for device in self.devices: for device in self.devices:
paddle.set_device(device) paddle.set_device(device)
...@@ -79,6 +77,7 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -79,6 +77,7 @@ class TestTensorRegisterHook(unittest.TestCase):
w = x + y w = x + y
w.stop_gradient = False w.stop_gradient = False
w.retain_grads()
helper = w.register_hook(double_hook) helper = w.register_hook(double_hook)
z = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) z = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
...@@ -115,6 +114,7 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -115,6 +114,7 @@ class TestTensorRegisterHook(unittest.TestCase):
w = x + y w = x + y
w.stop_gradient = False w.stop_gradient = False
w.retain_grads()
helper = w.register_hook(print_hook) helper = w.register_hook(print_hook)
z = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) z = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
...@@ -156,11 +156,8 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -156,11 +156,8 @@ class TestTensorRegisterHook(unittest.TestCase):
run_print_hook_for_interior_var(print_hook) run_print_hook_for_interior_var(print_hook)
# register hook and removed # register hook and removed
run_print_hook_for_interior_var(print_hook, removed=True) run_print_hook_for_interior_var(print_hook, removed=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_hook_for_leaf_var(self): def test_hook_for_leaf_var(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def run_double_hook_for_leaf_var(double_hook, removed=False): def run_double_hook_for_leaf_var(double_hook, removed=False):
for device in self.devices: for device in self.devices:
paddle.set_device(device) paddle.set_device(device)
...@@ -173,6 +170,7 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -173,6 +170,7 @@ class TestTensorRegisterHook(unittest.TestCase):
w = x + y w = x + y
w.stop_gradient = False w.stop_gradient = False
w.retain_grads()
z = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) z = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
z.stop_gradient = False z.stop_gradient = False
...@@ -198,11 +196,8 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -198,11 +196,8 @@ class TestTensorRegisterHook(unittest.TestCase):
run_double_hook_for_leaf_var(lambda grad: grad * 2) run_double_hook_for_leaf_var(lambda grad: grad * 2)
# register hook and removed # register hook and removed
run_double_hook_for_leaf_var(lambda grad: grad * 2, removed=True) run_double_hook_for_leaf_var(lambda grad: grad * 2, removed=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_hook_for_accumulated_grad_interior_var(self): def test_hook_for_accumulated_grad_interior_var(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def run_double_hook_for_accumulated_grad_interior_var( def run_double_hook_for_accumulated_grad_interior_var(
double_hook, removed=False double_hook, removed=False
): ):
...@@ -213,11 +208,14 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -213,11 +208,14 @@ class TestTensorRegisterHook(unittest.TestCase):
b = paddle.to_tensor([0.0, 0.0, 1.0, 2.0]) b = paddle.to_tensor([0.0, 0.0, 1.0, 2.0])
a.stop_gradient = False a.stop_gradient = False
b.stop_gradient = False b.stop_gradient = False
a.retain_grads()
b.retain_grads()
helper1 = a.register_hook(double_hook) helper1 = a.register_hook(double_hook)
x = a + b x = a + b
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
helper2 = x.register_hook(double_hook) helper2 = x.register_hook(double_hook)
...@@ -258,11 +256,8 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -258,11 +256,8 @@ class TestTensorRegisterHook(unittest.TestCase):
run_double_hook_for_accumulated_grad_interior_var( run_double_hook_for_accumulated_grad_interior_var(
lambda grad: grad * 2, removed=True lambda grad: grad * 2, removed=True
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_hook_for_accumulated_grad_leaf_var(self): def test_hook_for_accumulated_grad_leaf_var(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def run_double_hook_for_accumulated_grad_leaf_var( def run_double_hook_for_accumulated_grad_leaf_var(
double_hook, removed=False double_hook, removed=False
): ):
...@@ -304,11 +299,8 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -304,11 +299,8 @@ class TestTensorRegisterHook(unittest.TestCase):
run_double_hook_for_accumulated_grad_leaf_var( run_double_hook_for_accumulated_grad_leaf_var(
lambda grad: grad * 2, removed=True lambda grad: grad * 2, removed=True
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_hook_in_model(self): def test_hook_in_model(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def run_double_hook_in_model( def run_double_hook_in_model(
data, label, hook=None, register=False, remove=False data, label, hook=None, register=False, remove=False
): ):
...@@ -321,8 +313,10 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -321,8 +313,10 @@ class TestTensorRegisterHook(unittest.TestCase):
data = paddle.to_tensor(data) data = paddle.to_tensor(data)
label = paddle.to_tensor(label) label = paddle.to_tensor(label)
data.retain_grads()
ret1, out = net(data, hook, register, remove) ret1, out = net(data, hook, register, remove)
ret1.retain_grads()
loss = loss_fn(out, label) loss = loss_fn(out, label)
loss.backward() loss.backward()
...@@ -357,7 +351,7 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -357,7 +351,7 @@ class TestTensorRegisterHook(unittest.TestCase):
) )
# compare original value and with hook # compare original value and with hook
np.testing.assert_array_equal(ret1_grad, ret1_grad_hook) np.testing.assert_array_equal(ret1_grad * 2, ret1_grad_hook)
np.testing.assert_array_equal(linear1_w_grad * 2, linear1_w_grad_hook) np.testing.assert_array_equal(linear1_w_grad * 2, linear1_w_grad_hook)
np.testing.assert_array_equal(linear1_b_grad * 2, linear1_b_grad_hook) np.testing.assert_array_equal(linear1_b_grad * 2, linear1_b_grad_hook)
...@@ -365,11 +359,8 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -365,11 +359,8 @@ class TestTensorRegisterHook(unittest.TestCase):
np.testing.assert_array_equal(ret1_grad, ret1_grad_rm) np.testing.assert_array_equal(ret1_grad, ret1_grad_rm)
np.testing.assert_array_equal(linear1_w_grad, linear1_w_grad_rm) np.testing.assert_array_equal(linear1_w_grad, linear1_w_grad_rm)
np.testing.assert_array_equal(linear1_b_grad, linear1_b_grad_rm) np.testing.assert_array_equal(linear1_b_grad, linear1_b_grad_rm)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_multiple_hooks_for_interior_var(self): def test_multiple_hooks_for_interior_var(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def run_multiple_hooks_for_interior_var( def run_multiple_hooks_for_interior_var(
device, hooks, remove1=False, remove2=False, remove3=False device, hooks, remove1=False, remove2=False, remove3=False
): ):
...@@ -380,7 +371,11 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -380,7 +371,11 @@ class TestTensorRegisterHook(unittest.TestCase):
x.stop_gradient = False x.stop_gradient = False
y.stop_gradient = False y.stop_gradient = False
x.retain_grads()
y.retain_grads()
w = x + y w = x + y
w.retain_grads()
w.stop_gradient = False w.stop_gradient = False
helpers = [] helpers = []
...@@ -449,7 +444,6 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -449,7 +444,6 @@ class TestTensorRegisterHook(unittest.TestCase):
np.testing.assert_array_equal(w_grad, z) np.testing.assert_array_equal(w_grad, z)
np.testing.assert_array_equal(x_grad, z) np.testing.assert_array_equal(x_grad, z)
np.testing.assert_array_equal(y_grad, z) np.testing.assert_array_equal(y_grad, z)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_hook_in_double_grad(self): def test_hook_in_double_grad(self):
def double_print_hook(grad): def double_print_hook(grad):
......
...@@ -528,7 +528,6 @@ class TestTransposeDoubleGradCheck(unittest.TestCase): ...@@ -528,7 +528,6 @@ class TestTransposeDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.transpose_wrapper, [data], out, x_init=[data_arr], place=place self.transpose_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -560,7 +559,6 @@ class TestTransposeTripleGradCheck(unittest.TestCase): ...@@ -560,7 +559,6 @@ class TestTransposeTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.transpose_wrapper, [data], out, x_init=[data_arr], place=place self.transpose_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -577,11 +575,11 @@ class TestTransposeTripleGradCheck(unittest.TestCase): ...@@ -577,11 +575,11 @@ class TestTransposeTripleGradCheck(unittest.TestCase):
class TestTransposeAPI_ZeroDim(unittest.TestCase): class TestTransposeAPI_ZeroDim(unittest.TestCase):
def test_dygraph(self): def test_dygraph(self):
paddle.disable_static() paddle.disable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
out = paddle.transpose(x, []) out = paddle.transpose(x, [])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
......
...@@ -161,12 +161,11 @@ class TestUniformRandomInplaceGrad(unittest.TestCase): ...@@ -161,12 +161,11 @@ class TestUniformRandomInplaceGrad(unittest.TestCase):
self.shape = (1000, 784) self.shape = (1000, 784)
def run_(self): def run_(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def test_grad(): def test_grad():
tensor_a = paddle.ones(self.shape) tensor_a = paddle.ones(self.shape)
tensor_a.stop_gradient = False tensor_a.stop_gradient = False
tensor_b = tensor_a * 0.5 tensor_b = tensor_a * 0.5
tensor_b.retain_grads()
tensor_b.uniform_(min=-2, max=2) tensor_b.uniform_(min=-2, max=2)
loss = tensor_b.sum() loss = tensor_b.sum()
loss.backward() loss.backward()
...@@ -179,7 +178,6 @@ class TestUniformRandomInplaceGrad(unittest.TestCase): ...@@ -179,7 +178,6 @@ class TestUniformRandomInplaceGrad(unittest.TestCase):
for place in places: for place in places:
paddle.set_device(place) paddle.set_device(place)
test_grad() test_grad()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_uniform_random_inplace_grad(self): def test_uniform_random_inplace_grad(self):
self.run_() self.run_()
......
...@@ -337,7 +337,6 @@ class TestUnsqueezeDoubleGradCheck(unittest.TestCase): ...@@ -337,7 +337,6 @@ class TestUnsqueezeDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.unsqueeze_wrapper, [data], out, x_init=[data_arr], place=place self.unsqueeze_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -369,7 +368,6 @@ class TestUnsqueezeTripleGradCheck(unittest.TestCase): ...@@ -369,7 +368,6 @@ class TestUnsqueezeTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.unsqueeze_wrapper, [data], out, x_init=[data_arr], place=place self.unsqueeze_wrapper, [data], out, x_init=[data_arr], place=place
) )
......
...@@ -17,12 +17,10 @@ import unittest ...@@ -17,12 +17,10 @@ import unittest
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid
import paddle.nn.functional as F import paddle.nn.functional as F
paddle.set_device('xpu') paddle.set_device('xpu')
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
unary_api_list = [ unary_api_list = [
paddle.nn.functional.elu, paddle.nn.functional.elu,
...@@ -102,7 +100,9 @@ class TestUnaryAPI(unittest.TestCase): ...@@ -102,7 +100,9 @@ class TestUnaryAPI(unittest.TestCase):
for api in unary_api_list: for api in unary_api_list:
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = api(x) out = api(x)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.shape, []) self.assertEqual(x.shape, [])
...@@ -149,7 +149,9 @@ class TestReduceAPI(unittest.TestCase): ...@@ -149,7 +149,9 @@ class TestReduceAPI(unittest.TestCase):
else: else:
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = api(x, None) out = api(x, None)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.shape, []) self.assertEqual(x.shape, [])
...@@ -199,12 +201,15 @@ class TestBinaryAPI(unittest.TestCase): ...@@ -199,12 +201,15 @@ class TestBinaryAPI(unittest.TestCase):
y = paddle.rand([]) y = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
y.stop_gradient = False y.stop_gradient = False
x.retain_grads()
y.retain_grads()
if isinstance(api, dict): if isinstance(api, dict):
out = api['func'](x, y) out = api['func'](x, y)
out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y) out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y)
np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) np.testing.assert_array_equal(out_cls.numpy(), out.numpy())
else: else:
out = api(x, y) out = api(x, y)
out.retain_grads()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
out.backward() out.backward()
...@@ -224,6 +229,7 @@ class TestBinaryAPI(unittest.TestCase): ...@@ -224,6 +229,7 @@ class TestBinaryAPI(unittest.TestCase):
np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) np.testing.assert_array_equal(out_cls.numpy(), out.numpy())
else: else:
out = api(x, y) out = api(x, y)
out.retain_grads()
self.assertEqual(out.shape, [2, 3, 4]) self.assertEqual(out.shape, [2, 3, 4])
out.backward() out.backward()
...@@ -243,6 +249,7 @@ class TestBinaryAPI(unittest.TestCase): ...@@ -243,6 +249,7 @@ class TestBinaryAPI(unittest.TestCase):
np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) np.testing.assert_array_equal(out_cls.numpy(), out.numpy())
else: else:
out = api(x, y) out = api(x, y)
out.retain_grads()
self.assertEqual(out.shape, [2, 3, 4]) self.assertEqual(out.shape, [2, 3, 4])
out.backward() out.backward()
...@@ -367,7 +374,9 @@ class TestSundryAPI(unittest.TestCase): ...@@ -367,7 +374,9 @@ class TestSundryAPI(unittest.TestCase):
def test_pow_factor(self): def test_pow_factor(self):
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.pow(x, 2.0) out = paddle.pow(x, 2.0)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -377,7 +386,9 @@ class TestSundryAPI(unittest.TestCase): ...@@ -377,7 +386,9 @@ class TestSundryAPI(unittest.TestCase):
def test_cast(self): def test_cast(self):
x = paddle.full([], 1.0, 'float32') x = paddle.full([], 1.0, 'float32')
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.cast(x, 'int32') out = paddle.cast(x, 'int32')
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -388,6 +399,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -388,6 +399,7 @@ class TestSundryAPI(unittest.TestCase):
x = paddle.uniform([], None, -10, 10) x = paddle.uniform([], None, -10, 10)
x.stop_gradient = False x.stop_gradient = False
out = paddle.clip(x, -5, 5) out = paddle.clip(x, -5, 5)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -432,6 +444,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -432,6 +444,7 @@ class TestSundryAPI(unittest.TestCase):
x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False) x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
index = paddle.full([], 2, 'int64') index = paddle.full([], 2, 'int64')
out = paddle.gather(x, index) out = paddle.gather(x, index)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -444,6 +457,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -444,6 +457,7 @@ class TestSundryAPI(unittest.TestCase):
) )
index = paddle.full([], 1, 'int64') index = paddle.full([], 1, 'int64')
out = paddle.gather(x, index) out = paddle.gather(x, index)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [3]) self.assertEqual(out.shape, [3])
...@@ -486,10 +500,18 @@ class TestSundryAPI(unittest.TestCase): ...@@ -486,10 +500,18 @@ class TestSundryAPI(unittest.TestCase):
x2.stop_gradient = False x2.stop_gradient = False
x3.stop_gradient = False x3.stop_gradient = False
x1.retain_grads()
x2.retain_grads()
x3.retain_grads()
out1 = paddle.diagflat(x1, 1) out1 = paddle.diagflat(x1, 1)
out2 = paddle.diagflat(x2, -1) out2 = paddle.diagflat(x2, -1)
out3 = paddle.diagflat(x3, 0) out3 = paddle.diagflat(x3, 0)
out1.retain_grads()
out2.retain_grads()
out3.retain_grads()
out1.backward() out1.backward()
out2.backward() out2.backward()
out3.backward() out3.backward()
...@@ -539,7 +561,9 @@ class TestSundryAPI(unittest.TestCase): ...@@ -539,7 +561,9 @@ class TestSundryAPI(unittest.TestCase):
def test_scale(self): def test_scale(self):
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.scale(x, scale=2.0, bias=1.0) out = paddle.scale(x, scale=2.0, bias=1.0)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -574,26 +598,31 @@ class TestSundryAPI(unittest.TestCase): ...@@ -574,26 +598,31 @@ class TestSundryAPI(unittest.TestCase):
def test_reshape_list(self): def test_reshape_list(self):
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.reshape(x, []) out = paddle.reshape(x, [])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
self.assertEqual(out.grad.shape, []) self.assertEqual(out.grad.shape, [])
out = paddle.reshape(x, [1]) out = paddle.reshape(x, [1])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1]) self.assertEqual(out.shape, [1])
self.assertEqual(out.grad.shape, [1]) self.assertEqual(out.grad.shape, [1])
out = paddle.reshape(x, [-1]) out = paddle.reshape(x, [-1])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1]) self.assertEqual(out.shape, [1])
self.assertEqual(out.grad.shape, [1]) self.assertEqual(out.grad.shape, [1])
out = paddle.reshape(x, [-1, 1]) out = paddle.reshape(x, [-1, 1])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1, 1]) self.assertEqual(out.shape, [1, 1])
...@@ -602,8 +631,10 @@ class TestSundryAPI(unittest.TestCase): ...@@ -602,8 +631,10 @@ class TestSundryAPI(unittest.TestCase):
def test_reshape_tensor(self): def test_reshape_tensor(self):
x = paddle.rand([1, 1]) x = paddle.rand([1, 1])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.reshape(x, []) out = paddle.reshape(x, [])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(x.grad.shape, [1, 1])
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -611,6 +642,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -611,6 +642,7 @@ class TestSundryAPI(unittest.TestCase):
new_shape = paddle.full([], 1, "int32") new_shape = paddle.full([], 1, "int32")
out = paddle.reshape(x, new_shape) out = paddle.reshape(x, new_shape)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(x.grad.shape, [1, 1])
self.assertEqual(out.shape, [1]) self.assertEqual(out.shape, [1])
...@@ -618,6 +650,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -618,6 +650,7 @@ class TestSundryAPI(unittest.TestCase):
new_shape = paddle.full([], -1, "int32") new_shape = paddle.full([], -1, "int32")
out = paddle.reshape(x, new_shape) out = paddle.reshape(x, new_shape)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(x.grad.shape, [1, 1])
self.assertEqual(out.shape, [1]) self.assertEqual(out.shape, [1])
...@@ -625,6 +658,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -625,6 +658,7 @@ class TestSundryAPI(unittest.TestCase):
new_shape = [paddle.full([], -1, "int32"), paddle.full([], 1, "int32")] new_shape = [paddle.full([], -1, "int32"), paddle.full([], 1, "int32")]
out = paddle.reshape(x, new_shape) out = paddle.reshape(x, new_shape)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(x.grad.shape, [1, 1])
self.assertEqual(out.shape, [1, 1]) self.assertEqual(out.shape, [1, 1])
...@@ -666,8 +700,13 @@ class TestSundryAPI(unittest.TestCase): ...@@ -666,8 +700,13 @@ class TestSundryAPI(unittest.TestCase):
x2 = paddle.rand([]) x2 = paddle.rand([])
x1.stop_gradient = False x1.stop_gradient = False
x2.stop_gradient = False x2.stop_gradient = False
x1.retain_grads()
x2.retain_grads()
out1 = paddle.sort(x1, axis=-1) out1 = paddle.sort(x1, axis=-1)
out2 = paddle.sort(x2, axis=0) out2 = paddle.sort(x2, axis=0)
out1.retain_grads()
out2.retain_grads()
out1.backward() out1.backward()
out2.backward() out2.backward()
...@@ -688,8 +727,13 @@ class TestSundryAPI(unittest.TestCase): ...@@ -688,8 +727,13 @@ class TestSundryAPI(unittest.TestCase):
x2 = paddle.rand([]) x2 = paddle.rand([])
x1.stop_gradient = False x1.stop_gradient = False
x2.stop_gradient = False x2.stop_gradient = False
x1.retain_grads()
x2.retain_grads()
out1 = paddle.argsort(x1, axis=-1) out1 = paddle.argsort(x1, axis=-1)
out2 = paddle.argsort(x2, axis=0) out2 = paddle.argsort(x2, axis=0)
out1.retain_grads()
out2.retain_grads()
out1.backward() out1.backward()
out2.backward() out2.backward()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册