未验证 提交 73f97de0 编写于 作者: 姜永久 提交者: GitHub

rm flag retain grad (#49835)

* rm retain grad

* fix zero_dim

* fix zero_dim for xpu

* reset zero dim for xpu

* reset xpu

* reset custom_relu

* Reset flip

* fix zero dim
上级 60ee518a
......@@ -21,7 +21,6 @@ import numpy as np
import paddle
import paddle.static as static
from paddle import fluid
from paddle.utils.cpp_extension.extension_utils import run_cmd
from paddle.vision.transforms import Compose, Normalize
......@@ -146,8 +145,10 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
paddle.set_device(device)
t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
t.retain_grads()
out = func(t) if use_func else paddle.nn.functional.relu(t)
out.retain_grads()
dx = paddle.grad(
outputs=out,
inputs=t,
......@@ -259,7 +260,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
)
def test_dynamic(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
for device in self.devices:
for dtype in self.dtypes:
if device == 'cpu' and dtype == 'float16':
......@@ -286,7 +286,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
x_grad, pd_x_grad
),
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_static_save_and_load_inference_model(self):
paddle.enable_static()
......@@ -354,7 +353,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
paddle.disable_static()
def test_double_grad_dynamic(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
for device in self.devices:
for dtype in self.dtypes:
if device == 'cpu' and dtype == 'float16':
......@@ -380,7 +378,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
dx_grad, pd_dx_grad
),
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_with_dataloader(self):
for device in self.devices:
......
......@@ -30,8 +30,10 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True):
t = paddle.to_tensor(np_x, dtype=dtype)
t.stop_gradient = False
t.retain_grads()
out = func(t) if use_func else paddle.nn.functional.relu(t)
out.retain_grads()
out.stop_gradient = False
out.backward()
......@@ -142,14 +144,14 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix):
def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
import paddle.fluid as fluid
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
paddle.set_device(device)
t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
t.retain_grads()
out = func(t) if use_func else paddle.nn.functional.relu(t)
out.retain_grads()
dx = paddle.grad(
outputs=out,
inputs=t,
......@@ -164,7 +166,6 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
grad_outputs=paddle.ones_like(t),
create_graph=False,
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
assert ddout[0].numpy() is not None
return dx[0].numpy(), ddout[0].numpy()
......
......@@ -19,7 +19,6 @@ import numpy as np
from utils import extra_cc_args, extra_nvcc_args, paddle_includes
import paddle
import paddle.fluid as fluid
from paddle.utils.cpp_extension import get_build_directory, load
from paddle.utils.cpp_extension.extension_utils import run_cmd
......@@ -41,24 +40,25 @@ custom_ops = load(
def custom_tanh_double_grad_dynamic(func, device, dtype, np_x):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
paddle.set_device(device)
t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
t.retain_grads()
out = func(t)
out.stop_gradient = False
out.retain_grads()
dx = paddle.grad(
outputs=[out], inputs=[t], create_graph=True, retain_graph=True
)
dx[0].retain_grads()
dx[0].backward()
assert out.grad is not None
assert dx[0].grad is not None
return dx[0].numpy(), dx[0].grad.numpy(), out.grad.numpy()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
class TestCustomTanhDoubleGradJit(unittest.TestCase):
......@@ -68,7 +68,6 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase):
self.devices = ['cpu']
def test_double_grad_dynamic(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
for device in self.devices:
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
......@@ -102,7 +101,6 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase):
dout, pd_dout
),
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
if __name__ == "__main__":
......
......@@ -24,11 +24,11 @@ import numpy as np
def custom_relu_dynamic(func, device, dtype, np_x, use_func=True):
import paddle
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
paddle.set_device(device)
t = paddle.to_tensor(np_x, dtype=dtype)
t.stop_gradient = False
t.retain_grads()
sys.stdout.flush()
out = func(t) if use_func else paddle.nn.functional.relu(t)
......@@ -36,7 +36,6 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True):
out.backward()
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
if t.grad is None:
return out.numpy(), t.grad
else:
......@@ -105,11 +104,12 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
import paddle
paddle.set_device(device)
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
t.retain_grads()
out = func(t) if use_func else paddle.nn.functional.relu(t)
out.retain_grads()
dx = paddle.grad(
outputs=out,
inputs=t,
......@@ -125,7 +125,6 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
create_graph=False,
)
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
assert ddout[0].numpy() is not None
return dx[0].numpy(), ddout[0].numpy()
......
......@@ -30,8 +30,6 @@ class TestCollectiveAllToAllSingle(unittest.TestCase):
paddle.distributed.is_initialized()
), "The distributed environment has been initialized."
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def test_collective_alltoall_single(self):
rank = dist.get_rank()
size = dist.get_world_size()
......
......@@ -23,7 +23,6 @@ import paddle.distributed as dist
class TestCollectiveBatchIsendIrecv(unittest.TestCase):
def setUp(self):
dist.init_parallel_env()
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def test_collective_batch_isend_irecv(self):
rank = dist.get_rank()
......
......@@ -24,7 +24,6 @@ from paddle.distributed.communication.reduce_scatter import _reduce_scatter_base
class TestCollectiveReduceScatter(unittest.TestCase):
def setUp(self):
dist.init_parallel_env()
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def test_collective_reduce_scatter_sum(self):
rank = dist.get_rank()
......
......@@ -34,7 +34,6 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase):
def setUp(self):
strategy = fleet.DistributedStrategy()
fleet.init(is_collective=True, strategy=strategy)
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def test_parallel_margin_softmax_cross_entropy(self):
margin1s = [1.0, 1.0, 1.35]
......@@ -93,6 +92,7 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase):
norm_weight = paddle.divide(weight, weight_l2)
data = paddle.matmul(norm_input, norm_weight)
data.retain_grads()
data.stop_gradient = False
sta = (
......@@ -118,6 +118,7 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase):
group=check_group,
)
integral_data = integral_data.detach().clone()
integral_data.retain_grads()
integral_data.stop_gradient = False
# add arcface margin to logit
......
......@@ -96,11 +96,9 @@ class TestTanhTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph(
self.tanh_wrapper, [x], y, x_init=x_arr, place=place
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self):
paddle.enable_static()
......@@ -128,11 +126,9 @@ class TestTanhDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.tanh_wrapper, [x], y, x_init=x_arr, place=place
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self):
paddle.enable_static()
......@@ -160,11 +156,9 @@ class TestAbsDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.abs_wrapper, [x], y, x_init=x_arr, place=place
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self):
paddle.enable_static()
......@@ -256,11 +250,9 @@ class TestELUDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.elu_wrapper, [x], y, x_init=x_arr, place=place
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self):
paddle.enable_static()
......@@ -292,11 +284,9 @@ class TestCELUDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.celu_wrapper, [x], y, x_init=x_arr, place=place
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self):
paddle.enable_static()
......@@ -390,11 +380,9 @@ class TestSquareDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.square_wrapper, [x], y, x_init=x_arr, place=place
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self):
paddle.enable_static()
......@@ -424,11 +412,9 @@ class TestLogDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.log_wrapper, [x], y, x_init=x_arr, place=place
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self):
paddle.enable_static()
......@@ -456,11 +442,9 @@ class TestSinDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.sin_wrapper, [x], y, x_init=x_arr, place=place
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self):
paddle.enable_static()
......@@ -488,11 +472,9 @@ class TestCosDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.cos_wrapper, [x], y, x_init=x_arr, place=place
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self):
paddle.enable_static()
......
......@@ -37,16 +37,12 @@ class TestAssignOp(op_test.OpTest):
def test_forward(self):
paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.check_output(check_eager=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.disable_static()
def test_backward(self):
paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.check_grad(['X'], 'Out', check_eager=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.disable_static()
......@@ -60,23 +56,18 @@ class TestAssignFP16Op(op_test.OpTest):
def test_forward(self):
paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.check_output(check_eager=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.disable_static()
def test_backward(self):
paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.check_grad(['X'], 'Out', check_eager=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.disable_static()
class TestAssignOpWithLoDTensorArray(unittest.TestCase):
def test_assign_LoDTensorArray(self):
paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
main_program = Program()
startup_program = Program()
with program_guard(main_program):
......@@ -92,7 +83,6 @@ class TestAssignOpWithLoDTensorArray(unittest.TestCase):
sums = paddle.tensor.array_read(array=init_array, i=i)
mean = paddle.mean(sums)
append_backward(mean)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
place = (
fluid.CUDAPlace(0)
......@@ -207,12 +197,13 @@ class TestAssignOApi(unittest.TestCase):
np.testing.assert_allclose(result3.numpy(), np.array([1]), rtol=1e-05)
def test_clone(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.python_api = paddle.clone
x = paddle.ones([2])
x.stop_gradient = False
x.retain_grads()
clone_x = paddle.clone(x)
clone_x.retain_grads()
y = clone_x**3
y.backward()
......@@ -220,7 +211,6 @@ class TestAssignOApi(unittest.TestCase):
np.testing.assert_array_equal(x, [1, 1])
np.testing.assert_array_equal(clone_x.grad.numpy(), [3, 3])
np.testing.assert_array_equal(x.grad.numpy(), [3, 3])
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.enable_static()
with program_guard(Program(), Program()):
......@@ -241,7 +231,6 @@ class TestAssignOApi(unittest.TestCase):
class TestAssignOpErrorApi(unittest.TestCase):
def test_errors(self):
paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with program_guard(Program(), Program()):
# The type of input must be Variable or numpy.ndarray.
x1 = fluid.create_lod_tensor(
......@@ -251,7 +240,6 @@ class TestAssignOpErrorApi(unittest.TestCase):
# When the type of input is numpy.ndarray, the dtype of input must be float32, int32.
x2 = np.array([[2.5, 2.5]], dtype='uint8')
self.assertRaises(TypeError, paddle.assign, x2)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.disable_static()
def test_type_error(self):
......@@ -281,7 +269,6 @@ class TestAssignDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.assign_wrapper, [data], out, x_init=[data_arr], place=place
)
......@@ -313,7 +300,6 @@ class TestAssignTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph(
self.assign_wrapper, [data], out, x_init=[data_arr], place=place
)
......
......@@ -166,6 +166,7 @@ class TestDygraphTripleGrad(TestCase):
@dygraph_guard
def func_example_with_gradient_and_create_graph(self):
x = random_var(self.shape)
x.retain_grads()
x_np = x.numpy()
x.stop_gradient = False
......@@ -222,10 +223,8 @@ class TestDygraphTripleGrad(TestCase):
np.testing.assert_allclose(dddx_grad_actual, dddx_expected, rtol=1e-05)
def test_all_cases(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.func_exception()
self.func_example_with_gradient_and_create_graph()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
class TestDygraphTripleGradBradcastCase(TestCase):
......@@ -259,6 +258,7 @@ class TestDygraphTripleGradBradcastCase(TestCase):
@dygraph_guard
def func_example_with_gradient_and_create_graph(self):
x = random_var(self.x_shape)
x.retain_grads()
x_np = x.numpy()
x.stop_gradient = False
......@@ -316,9 +316,7 @@ class TestDygraphTripleGradBradcastCase(TestCase):
np.testing.assert_allclose(dddx_grad_actual, dddx_expected, rtol=1e-05)
def test_all_cases(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.func_example_with_gradient_and_create_graph()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
# d_ddout is none, dtype is float32
......
......@@ -72,7 +72,6 @@ class TestVariable(unittest.TestCase):
np.testing.assert_array_equal(res1.numpy(), res2.numpy())
def test_trace_backward(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with fluid.dygraph.guard():
a = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
b = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
......@@ -80,8 +79,11 @@ class TestVariable(unittest.TestCase):
y = fluid.dygraph.to_variable(b)
x.stop_gradient = False
y.stop_gradient = False
x.retain_grads()
y.retain_grads()
loss = _legacy_C_ops.elementwise_mul(x, y)
loss.retain_grads()
loss.backward()
x_grad = x.gradient()
......@@ -89,7 +91,6 @@ class TestVariable(unittest.TestCase):
np.testing.assert_array_equal(x_grad, loss.gradient() * b)
np.testing.assert_array_equal(y_grad, loss.gradient() * a)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
if __name__ == '__main__':
......
......@@ -505,17 +505,18 @@ class TestReshapeZeroTensor(unittest.TestCase):
class TestReshapeAPI_ZeroDim(unittest.TestCase):
def test_dygraph(self):
paddle.disable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
x = paddle.rand([])
x.stop_gradient = False
out = paddle.reshape(x, [1])
out.retain_grads()
out.backward()
self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1])
self.assertEqual(out.grad.shape, [1])
out = paddle.reshape(x, [-1, 1])
out.retain_grads()
out.backward()
self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1, 1])
......@@ -524,6 +525,7 @@ class TestReshapeAPI_ZeroDim(unittest.TestCase):
x = paddle.rand([1])
x.stop_gradient = False
out = paddle.reshape(x, [])
out.retain_grads()
out.backward()
self.assertEqual(x.grad.shape, [1])
self.assertEqual(out.shape, [])
......
......@@ -895,7 +895,6 @@ class TestSliceDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.slice_wrapper, [data], out, x_init=[data_arr], place=place
)
......@@ -931,7 +930,6 @@ class TestSliceTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph(
self.slice_wrapper, [data], out, x_init=[data_arr], place=place
)
......
......@@ -43,7 +43,6 @@ class TestSparseElementWiseAPI(unittest.TestCase):
"""
def setUp(self):
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
np.random.seed(2022)
self.op_list = op_list
self.csr_shape = [128, 256]
......@@ -109,7 +108,9 @@ class TestSparseElementWiseAPI(unittest.TestCase):
y, dtype=dtype, stop_gradient=False
)
coo_x = s_dense_x.to_sparse_coo(sparse_dim)
coo_x.retain_grads()
coo_y = s_dense_y.to_sparse_coo(sparse_dim)
coo_y.retain_grads()
actual_res = get_actual_res(coo_x, coo_y, op)
actual_res.backward(actual_res)
......@@ -157,9 +158,12 @@ class TestSparseElementWiseAPI(unittest.TestCase):
sp_a = sparse.sparse_coo_tensor(
indices_data, values1_data, shape, stop_gradient=False
)
sp_a.retain_grads()
sp_b = sparse.sparse_coo_tensor(
indices_data, values2_data, shape, stop_gradient=False
)
sp_b.retain_grads()
values1 = paddle.to_tensor(values1_data, stop_gradient=False)
values2 = paddle.to_tensor(values2_data, stop_gradient=False)
......@@ -185,6 +189,7 @@ class TestSparseElementWiseAPI(unittest.TestCase):
sp_a = sparse.sparse_coo_tensor(
indices_data, values_data, shape, stop_gradient=False
)
sp_a.retain_grads()
bias_values = [1.0, 2.0]
......
......@@ -17,7 +17,6 @@ import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
devices = ['cpu', 'gpu']
......@@ -148,7 +147,6 @@ class TestSparseConvert(unittest.TestCase):
assert np.array_equal(dense_x.grad.numpy(), out_grad.to_dense().numpy())
def test_coo_to_dense(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]]
values = [1.0, 2.0, 3.0, 4.0, 5.0]
indices_dtypes = ['int32', 'int64']
......@@ -159,6 +157,7 @@ class TestSparseConvert(unittest.TestCase):
shape=[3, 4],
stop_gradient=False,
)
sparse_x.retain_grads()
dense_tensor = sparse_x.to_dense()
# test to_dense_grad backward
out_grad = [
......@@ -180,12 +179,12 @@ class TestSparseConvert(unittest.TestCase):
shape=[3, 4],
stop_gradient=False,
)
sparse_x_cpu.retain_grads()
dense_tensor_cpu = sparse_x_cpu.to_dense()
dense_tensor_cpu.backward(paddle.to_tensor(out_grad))
assert np.array_equal(
correct_x_grad, sparse_x_cpu.grad.values().numpy()
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_to_sparse_csr(self):
x = [[0, 1, 0, 2], [0, 0, 3, 0], [4, 5, 0, 0]]
......@@ -202,7 +201,6 @@ class TestSparseConvert(unittest.TestCase):
assert np.array_equal(dense_tensor.numpy(), x)
def test_coo_values_grad(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]]
values = [1.0, 2.0, 3.0, 4.0, 5.0]
sparse_x = paddle.sparse.sparse_coo_tensor(
......@@ -211,6 +209,7 @@ class TestSparseConvert(unittest.TestCase):
shape=[3, 4],
stop_gradient=False,
)
sparse_x.retain_grads()
values_tensor = sparse_x.values()
out_grad = [2.0, 3.0, 5.0, 8.0, 9.0]
# test coo_values_grad
......@@ -230,6 +229,7 @@ class TestSparseConvert(unittest.TestCase):
shape=[3, 4, 2],
stop_gradient=False,
)
sparse_x.retain_grads()
values_tensor = sparse_x.values()
out_grad = [
[2.0, 2.0],
......@@ -241,7 +241,6 @@ class TestSparseConvert(unittest.TestCase):
# test coo_values_grad
values_tensor.backward(paddle.to_tensor(out_grad))
assert np.array_equal(out_grad, sparse_x.grad.values().numpy())
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_sparse_coo_tensor_grad(self):
for device in devices:
......
......@@ -22,7 +22,6 @@ import paddle.fluid as fluid
class TensorFillDiagonal_Test(unittest.TestCase):
def test_dim2_normal(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array([[1, 2, 2], [2, 1, 2], [2, 2, 1]]).astype(
'float32'
)
......@@ -44,6 +43,7 @@ class TensorFillDiagonal_Test(unittest.TestCase):
x = paddle.ones((3, 3), dtype=dtype)
x.stop_gradient = False
y = x * 2
y.retain_grads()
y.fill_diagonal_(1, offset=0, wrap=True)
loss = y.sum()
loss.backward()
......@@ -55,10 +55,8 @@ class TensorFillDiagonal_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(),
True,
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_offset(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array([[2, 2, 1], [2, 2, 2], [2, 2, 2]]).astype(
'float32'
)
......@@ -80,6 +78,7 @@ class TensorFillDiagonal_Test(unittest.TestCase):
x = paddle.ones((3, 3), dtype=dtype)
x.stop_gradient = False
y = x * 2
y.retain_grads()
y.fill_diagonal_(1, offset=2, wrap=True)
loss = y.sum()
loss.backward()
......@@ -91,7 +90,6 @@ class TensorFillDiagonal_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(),
True,
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_bool(self):
expected_np = np.array(
......@@ -116,7 +114,6 @@ class TensorFillDiagonal_Test(unittest.TestCase):
self.assertEqual((x.numpy() == expected_np).all(), True)
def test_dim2_unnormal_wrap(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array(
[
[1, 2, 2],
......@@ -154,6 +151,7 @@ class TensorFillDiagonal_Test(unittest.TestCase):
x = paddle.ones((7, 3), dtype=dtype)
x.stop_gradient = False
y = x * 2
y.retain_grads()
y.fill_diagonal_(1, offset=0, wrap=True)
loss = y.sum()
loss.backward()
......@@ -165,10 +163,8 @@ class TensorFillDiagonal_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(),
True,
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_dim2_unnormal_unwrap(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array(
[
[1, 2, 2],
......@@ -206,6 +202,7 @@ class TensorFillDiagonal_Test(unittest.TestCase):
x = paddle.ones((7, 3), dtype=dtype)
x.stop_gradient = False
y = x * 2
y.retain_grads()
y.fill_diagonal_(1, offset=0, wrap=False)
loss = y.sum()
loss.backward()
......@@ -217,10 +214,8 @@ class TensorFillDiagonal_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(),
True,
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_dim_larger2_normal(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array(
[
[[1, 2, 2], [2, 2, 2], [2, 2, 2]],
......@@ -250,6 +245,7 @@ class TensorFillDiagonal_Test(unittest.TestCase):
x = paddle.ones((3, 3, 3), dtype=dtype)
x.stop_gradient = False
y = x * 2
y.retain_grads()
y.fill_diagonal_(1, offset=0, wrap=True)
loss = y.sum()
loss.backward()
......@@ -261,7 +257,6 @@ class TensorFillDiagonal_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(),
True,
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
if __name__ == '__main__':
......
......@@ -286,7 +286,6 @@ class TestTileDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.tile_wrapper, [data], out, x_init=[data_arr], place=place
)
......@@ -318,7 +317,6 @@ class TestTileTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph(
self.tile_wrapper, [data], out, x_init=[data_arr], place=place
)
......@@ -335,24 +333,26 @@ class TestTileTripleGradCheck(unittest.TestCase):
class TestTileAPI_ZeroDim(unittest.TestCase):
def test_dygraph(self):
paddle.disable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
x = paddle.rand([])
x.stop_gradient = False
out = paddle.tile(x, [])
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
self.assertEqual(x.grad.shape, [])
self.assertEqual(out.grad.shape, [])
out = paddle.tile(x, [3])
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [3])
self.assertEqual(x.grad.shape, [])
self.assertEqual(out.grad.shape, [3])
out = paddle.tile(x, [2, 3])
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [2, 3])
self.assertEqual(x.grad.shape, [])
......
......@@ -21,8 +21,6 @@ import paddle
import paddle.fluid as fluid
import paddle.nn.functional as F
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
unary_api_list = [
paddle.nn.functional.elu,
paddle.nn.functional.gelu,
......@@ -102,7 +100,9 @@ class TestUnaryAPI(unittest.TestCase):
for api in unary_api_list:
x = paddle.rand([])
x.stop_gradient = False
x.retain_grads()
out = api(x)
out.retain_grads()
out.backward()
self.assertEqual(x.shape, [])
......@@ -202,7 +202,9 @@ class TestReduceAPI(unittest.TestCase):
else:
x = paddle.rand([])
x.stop_gradient = False
x.retain_grads()
out = api(x, None)
out.retain_grads()
out.backward()
self.assertEqual(x.shape, [])
......@@ -291,12 +293,16 @@ class TestBinaryAPI(unittest.TestCase):
y = paddle.rand([])
x.stop_gradient = False
y.stop_gradient = False
x.retain_grads()
y.retain_grads()
if isinstance(api, dict):
out = api['func'](x, y)
out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y)
np.testing.assert_array_equal(out_cls.numpy(), out.numpy())
else:
out = api(x, y)
out.retain_grads()
out.backward()
self.assertEqual(x.shape, [])
......@@ -312,12 +318,16 @@ class TestBinaryAPI(unittest.TestCase):
y = paddle.rand([])
x.stop_gradient = False
y.stop_gradient = False
x.retain_grads()
y.retain_grads()
if isinstance(api, dict):
out = api['func'](x, y)
out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y)
np.testing.assert_array_equal(out_cls.numpy(), out.numpy())
else:
out = api(x, y)
out.retain_grads()
out.backward()
self.assertEqual(x.shape, [2, 3, 4])
......@@ -331,6 +341,8 @@ class TestBinaryAPI(unittest.TestCase):
# 3) x is 0D , y is ND
x = paddle.rand([])
y = paddle.rand([2, 3, 4])
x.retain_grads()
y.retain_grads()
x.stop_gradient = False
y.stop_gradient = False
if isinstance(api, dict):
......@@ -339,6 +351,8 @@ class TestBinaryAPI(unittest.TestCase):
np.testing.assert_array_equal(out_cls.numpy(), out.numpy())
else:
out = api(x, y)
out.retain_grads()
out.backward()
self.assertEqual(x.shape, [])
......@@ -352,9 +366,11 @@ class TestBinaryAPI(unittest.TestCase):
# 4) x is 0D , y is scalar
x = paddle.rand([])
x.stop_gradient = False
x.retain_grads()
y = 0.5
if isinstance(api, dict):
out = getattr(paddle.Tensor, api['cls_method'])(x, y)
out.retain_grads()
out.backward()
self.assertEqual(x.shape, [])
......@@ -528,7 +544,9 @@ class TestSundryAPI(unittest.TestCase):
def test_flip(self):
x = paddle.rand([])
x.stop_gradient = False
x.retain_grads()
out = paddle.flip(x, axis=[])
out.retain_grads()
out.backward()
self.assertEqual(x.shape, [])
self.assertEqual(out.shape, [])
......@@ -618,7 +636,9 @@ class TestSundryAPI(unittest.TestCase):
def test_pow_factor(self):
x = paddle.rand([])
x.stop_gradient = False
x.retain_grads()
out = paddle.pow(x, 2.0)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
......@@ -628,7 +648,9 @@ class TestSundryAPI(unittest.TestCase):
def test_cast(self):
x = paddle.full([], 1.0, 'float32')
x.stop_gradient = False
x.retain_grads()
out = paddle.cast(x, 'int32')
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
......@@ -638,7 +660,9 @@ class TestSundryAPI(unittest.TestCase):
def test_cumprod(self):
x = paddle.full([], 1.0, 'float32')
x.stop_gradient = False
x.retain_grads()
out = paddle.cumprod(x, 0)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
......@@ -651,7 +675,9 @@ class TestSundryAPI(unittest.TestCase):
def test_clip(self):
x = paddle.uniform([], None, -10, 10)
x.stop_gradient = False
x.retain_grads()
out = paddle.clip(x, -5, 5)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
......@@ -661,7 +687,9 @@ class TestSundryAPI(unittest.TestCase):
def test_increment(self):
x = paddle.rand([])
x.stop_gradient = False
x.retain_grads()
out = paddle.increment(x, 1.0)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
......@@ -694,8 +722,10 @@ class TestSundryAPI(unittest.TestCase):
def test_gather_1D(self):
x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
x.retain_grads()
index = paddle.full([], 2, 'int64')
out = paddle.gather(x, index)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
......@@ -707,8 +737,10 @@ class TestSundryAPI(unittest.TestCase):
x = paddle.to_tensor(
[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False
)
x.retain_grads()
index = paddle.full([], 1, 'int64')
out = paddle.gather(x, index)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [3])
......@@ -720,8 +752,10 @@ class TestSundryAPI(unittest.TestCase):
x = paddle.to_tensor(
[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False
)
x.retain_grads()
index = paddle.full([], 1, 'int64')
out = paddle.gather(x, index, axis=1)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [2])
......@@ -731,9 +765,11 @@ class TestSundryAPI(unittest.TestCase):
def test_scatter_1D(self):
x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
x.retain_grads()
index = paddle.full([], 2, 'int64')
updates = paddle.full([], 4.0)
out = paddle.scatter(x, index, updates)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [5])
......@@ -747,6 +783,7 @@ class TestSundryAPI(unittest.TestCase):
index = paddle.full([], 1, 'int64')
updates = paddle.to_tensor([1.0, 2.0, 3.0])
out = paddle.scatter(x, index, updates)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [2, 3])
......@@ -762,10 +799,18 @@ class TestSundryAPI(unittest.TestCase):
x2.stop_gradient = False
x3.stop_gradient = False
x1.retain_grads()
x2.retain_grads()
x3.retain_grads()
out1 = paddle.diagflat(x1, 1)
out2 = paddle.diagflat(x2, -1)
out3 = paddle.diagflat(x3, 0)
out1.retain_grads()
out2.retain_grads()
out3.retain_grads()
out1.backward()
out2.backward()
out3.backward()
......@@ -800,8 +845,11 @@ class TestSundryAPI(unittest.TestCase):
def test_scatter_nd(self):
index = paddle.to_tensor([3], dtype="int64")
updates = paddle.full([], 2, dtype='float32')
updates.retain_grads()
updates.stop_gradient = False
out = paddle.scatter_nd(index, updates, [5])
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [5])
......@@ -818,6 +866,7 @@ class TestSundryAPI(unittest.TestCase):
x = paddle.randn(())
x.stop_gradient = False
x.retain_grads()
out = paddle.kthvalue(x, 1)
out[0].backward()
......@@ -838,6 +887,7 @@ class TestSundryAPI(unittest.TestCase):
paddle.set_device(place)
x = paddle.randn(())
x.retain_grads()
x.stop_gradient = False
out = paddle.mode(x)
......@@ -854,11 +904,13 @@ class TestSundryAPI(unittest.TestCase):
def test_flatten(self):
x = paddle.rand([])
x.stop_gradient = False
x.retain_grads()
start_axis = 0
stop_axis = -1
out = paddle.flatten(x, start_axis=start_axis, stop_axis=stop_axis)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [1])
......@@ -868,7 +920,9 @@ class TestSundryAPI(unittest.TestCase):
def test_scale(self):
x = paddle.rand([])
x.stop_gradient = False
x.retain_grads()
out = paddle.scale(x, scale=2.0, bias=1.0)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
......@@ -911,6 +965,9 @@ class TestSundryAPI(unittest.TestCase):
out1 = paddle.add_n(x1)
out2 = paddle.add_n([x2, x3])
out1.retain_grads()
out2.retain_grads()
out1.backward()
out2.backward()
......@@ -928,26 +985,31 @@ class TestSundryAPI(unittest.TestCase):
def test_reshape_list(self):
x = paddle.rand([])
x.stop_gradient = False
x.retain_grads()
out = paddle.reshape(x, [])
out.retain_grads()
out.backward()
self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [])
self.assertEqual(out.grad.shape, [])
out = paddle.reshape(x, [1])
out.retain_grads()
out.backward()
self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1])
self.assertEqual(out.grad.shape, [1])
out = paddle.reshape(x, [-1])
out.retain_grads()
out.backward()
self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1])
self.assertEqual(out.grad.shape, [1])
out = paddle.reshape(x, [-1, 1])
out.retain_grads()
out.backward()
self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1, 1])
......@@ -955,9 +1017,11 @@ class TestSundryAPI(unittest.TestCase):
def test_reshape_tensor(self):
x = paddle.rand([1, 1])
x.retain_grads()
x.stop_gradient = False
out = paddle.reshape(x, [])
out.retain_grads()
out.backward()
self.assertEqual(x.grad.shape, [1, 1])
self.assertEqual(out.shape, [])
......@@ -965,6 +1029,7 @@ class TestSundryAPI(unittest.TestCase):
new_shape = paddle.to_tensor([1, 1, 1], "int32")
out = paddle.reshape(x, new_shape)
out.retain_grads()
out.backward()
self.assertEqual(x.grad.shape, [1, 1])
self.assertEqual(out.shape, [1, 1, 1])
......@@ -972,6 +1037,7 @@ class TestSundryAPI(unittest.TestCase):
new_shape = paddle.to_tensor([-1], "int32")
out = paddle.reshape(x, new_shape)
out.retain_grads()
out.backward()
self.assertEqual(x.grad.shape, [1, 1])
self.assertEqual(out.shape, [1])
......@@ -979,6 +1045,7 @@ class TestSundryAPI(unittest.TestCase):
new_shape = [paddle.full([], -1, "int32"), paddle.full([], 1, "int32")]
out = paddle.reshape(x, new_shape)
out.retain_grads()
out.backward()
self.assertEqual(x.grad.shape, [1, 1])
self.assertEqual(out.shape, [1, 1])
......@@ -1019,6 +1086,7 @@ class TestSundryAPI(unittest.TestCase):
x = paddle.rand([])
x.stop_gradient = False
out = paddle.reverse(x, axis=[])
out.retain_grads()
out.backward()
self.assertEqual(x.shape, [])
self.assertEqual(out.shape, [])
......@@ -1029,9 +1097,14 @@ class TestSundryAPI(unittest.TestCase):
x2 = paddle.rand([])
x1.stop_gradient = False
x2.stop_gradient = False
x1.retain_grads()
x2.retain_grads()
out1 = paddle.sort(x1, axis=-1)
out2 = paddle.sort(x2, axis=0)
out1.retain_grads()
out2.retain_grads()
out1.backward()
out2.backward()
......@@ -1051,9 +1124,15 @@ class TestSundryAPI(unittest.TestCase):
x2 = paddle.rand([])
x1.stop_gradient = False
x2.stop_gradient = False
x1.retain_grads()
x2.retain_grads()
out1 = paddle.argsort(x1, axis=-1)
out2 = paddle.argsort(x2, axis=0)
out1.retain_grads()
out2.retain_grads()
out1.backward()
out2.backward()
......@@ -1075,6 +1154,7 @@ class TestSundryAPI(unittest.TestCase):
w0 = paddle.rand([])
x0.stop_gradient = False
y0.stop_gradient = False
y0.retain_grads()
out0 = paddle.lerp(x0, y0, w0)
out0.backward()
......@@ -1089,6 +1169,8 @@ class TestSundryAPI(unittest.TestCase):
w1 = paddle.rand([])
x1.stop_gradient = False
y1.stop_gradient = False
x1.retain_grads()
y1.retain_grads()
out1 = paddle.lerp(x1, y1, w1)
out1.backward()
......@@ -1103,6 +1185,8 @@ class TestSundryAPI(unittest.TestCase):
w2 = paddle.rand([])
x2.stop_gradient = False
y2.stop_gradient = False
x2.retain_grads()
y2.retain_grads()
out2 = paddle.lerp(x2, y2, w2)
out2.backward()
......@@ -1120,6 +1204,7 @@ class TestSundryAPI(unittest.TestCase):
x = paddle.randn(())
x.stop_gradient = False
x.retain_grads()
out = paddle.repeat_interleave(x, 2, None)
out.backward()
......@@ -1145,6 +1230,7 @@ class TestSundryAPI(unittest.TestCase):
dtype='float32',
stop_gradient=False,
)
logit.retain_grads()
label = paddle.to_tensor(
[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype='float32'
)
......@@ -1153,6 +1239,7 @@ class TestSundryAPI(unittest.TestCase):
out0 = F.sigmoid_focal_loss(logit, label, normalizer=fg_num_0)
out1 = F.sigmoid_focal_loss(logit, label, normalizer=fg_num_1)
out0.retain_grads()
np.testing.assert_array_equal(
out0.numpy(),
......@@ -1173,7 +1260,10 @@ class TestSundryAPI(unittest.TestCase):
x2 = paddle.full([], 2)
x1.stop_gradient = False
x2.stop_gradient = False
x1.retain_grads()
x2.retain_grads()
out = paddle.where(x1 > x2, x1, x2)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
self.assertEqual(out.numpy(), 2)
......@@ -1186,9 +1276,12 @@ class TestSundryAPI(unittest.TestCase):
def test_atan2(self):
x1 = paddle.full([], 0)
x2 = paddle.full([], 2)
x1.retain_grads()
x2.retain_grads()
x1.stop_gradient = False
x2.stop_gradient = False
out = paddle.atan2(x1, x2)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
self.assertEqual(out.numpy(), 0)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册